[med-svn] [subread] 01/08: Imported Upstream version 1.5.0+dfsg
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Wed Nov 11 18:06:58 UTC 2015
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository subread.
commit 32f5127f65e03f7cf69dda11f0ae92dcd9b64eb8
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Fri Nov 6 14:59:25 2015 +0100
Imported Upstream version 1.5.0+dfsg
---
doc/MDSplot.png | Bin 22947 -> 0 bytes
doc/SubreadUsersGuide.bib | 73 -
doc/SubreadUsersGuide.tex | 220 +-
doc/indel.png | Bin 108132 -> 0 bytes
doc/junction.png | Bin 172344 -> 0 bytes
doc/seed-and-vote.png | Bin 112767 -> 0 bytes
doc/voom_mean_variance.png | Bin 99213 -> 0 bytes
src/BACK-sambam-file.h | 56 -
src/HelperFunctions.c | 305 +-
src/HelperFunctions.h | 20 +-
src/Makefile.FreeBSD | 3 +-
src/Makefile.Linux | 14 +-
src/Makefile.MacOS | 17 +-
src/SNPCalling.c | 11 +-
src/SUBindel.c | 11 +-
src/SeekGZ.h | 10 +
src/core-bigtable.c | 488 +++
src/core-bigtable.h | 49 +
src/core-indel.c | 561 ++-
src/core-indel.h | 71 +-
src/core-interface-aligner.c | 275 +-
src/core-interface-subjunc.c | 326 +-
src/core-junction-V2.c | 2922 --------------
src/core-junction.c | 4140 +++++++++++++++-----
src/core-junction.h | 53 +-
src/core.c | 3216 ++++++++-------
src/core.h | 358 +-
src/coverage_calc.c | 25 +-
src/del4-mmap-test.c | 44 +
src/filterJunctionTable.c | 7 +-
src/fullscan.c | 35 +-
src/gene-algorithms.c | 36 +-
src/gene-algorithms.h | 6 +-
src/global-reassembly.c | 30 +-
src/hashtable.c | 15 +-
src/hashtable.h | 4 +-
src/index-builder.c | 83 +-
src/input-files.c | 3017 ++++++++++++--
src/input-files.h | 121 +-
src/makefile.version | 2 +-
src/propmapped.c | 18 +-
src/qualityScores.c | 33 +-
src/read-repair.c | 130 +
src/readSummary.c | 2770 +++++++------
src/removeDupReads.c | 23 +-
src/samMappedBases.c | 124 +
src/sambam-file.c | 55 +-
src/sambam-file.h | 3 +
src/seek-zlib.c | 338 ++
src/seek-zlib.h | 23 +
src/sorted-hashtable.c | 20 +-
src/subfilter.c | 48 +
src/subread.h | 87 +-
src/subtools.c | 9 +-
src/t.c | 4 +
src/test-seek-zlib.c | 101 +
src/test_qs.c | 106 +
src/zlib_test.c | 88 +
src/zpipe.c | 192 -
test/featureCounts/data/DEL4-1385690721.FC | 9 -
test/featureCounts/data/DEL4-1385690721.FC.summary | 9 -
test/featureCounts/data/corner-BINS.SAF | 7 +
test/featureCounts/data/corner-BINS.sam | 9 +
test/featureCounts/data/corner-JUNC.sam | 8 +-
test/featureCounts/data/test-chrname.sam | 604 +--
test/featureCounts/data/test-minimum-dup.ora | 9 +
...ce.ora.summary => test-minimum-dup.ora.summary} | 9 +-
test/featureCounts/featureCounts-test.sh | 2 +-
test/featureCounts/result/test-minimum.FC | 9 -
test/featureCounts/result/test-minimum.FC.summary | 12 -
test/featureCounts/test_all.sh | 6 -
test/featureCounts/test_corner_cases.sh | 2 +
test/subjunc/subjunc-test.sh | 4 +-
test/subread-align/subread-align-test.sh | 14 +-
test/subread-align/test-tmp.log | 36 +-
75 files changed, 12848 insertions(+), 8697 deletions(-)
diff --git a/doc/MDSplot.png b/doc/MDSplot.png
deleted file mode 100644
index a07a5c4..0000000
Binary files a/doc/MDSplot.png and /dev/null differ
diff --git a/doc/SubreadUsersGuide.bib b/doc/SubreadUsersGuide.bib
deleted file mode 100644
index 7379ac2..0000000
--- a/doc/SubreadUsersGuide.bib
+++ /dev/null
@@ -1,73 +0,0 @@
- at article{liao,
- author={Liao, Y. and Smyth, G. K. and Shi, W.},
- year={2013},
- title={The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote},
- journal={Nucleic Acids Research},
- volume={41},
- issue={10},
- pages={e108}
-}
- at article{TangNC2013,
- author={Tang, K. W. and Alaei-Mahabadi, B. and Samuelsson, T. and Lindh, M. and Larsson, E.},
- year={2013},
- title={{The landscape of viral expression and host gene fusion and adaptation in human cancer}},
- journal={Nature Communications.},
- volume={2013 Oct 1;4:2513. doi: 10.1038/ncomms3513},
- pages={}
-}
- at article{ManNI2013,
- author={Man, K. and Miasari, M. and Shi, W. and Xin, A. and Henstridge, D. C. and Preston, S. and Pellegrini, M. and Belz, G. T. and Smyth, G. K. and Febbraio, M. A. and Nutt, S. L. and Kallies, A.},
- year={2013},
- title={{The transcription factor IRF4 is essential for TCR affinity-mediated metabolic programming and clonal expansion of T cells}},
- journal={Nature Immunology},
- volume={2013 Sep 22. doi: 10.1038/ni.2710},
- pages={}
-}
- at article{SpangenbergSCR2013,
- author={Spangenberg, L. and Shigunov, P. and Abud, A. P. and Cofré, A. R. and Stimamiglio, M. A. and Kuligovski, C. and Zych, J. and Schittini, A. V. and Costa, A. D. and Rebelatto, C. K. and Brofman, P. R. and Goldenberg, S. and Correa, A. and Naya, H. and Dallagiovanna, B.},
- year={2013},
- title={{Polysome profiling shows extensive posttranscriptional regulation during human adipocyte stem cell differentiation into adipocytes}},
- journal={Stem Cell Research},
- volume={11},
- pages={902-12}
-}
- at article{tang,
- author={Tang, J. Z. and Carmichael, C. L. and Shi, W. and Metcalf, D. and Ng, A. P. and Hyland, C. D. and Jenkins, N. A. and Copeland, N. G. and Howell, V. M. and Zhao, Z. J. and Smyth, G. K. and Kile, B. T. and Alexander, W. S.},
- year={2013},
- title={{Transposon mutagenesis reveals cooperation of ETS family transcription factors with signaling pathways in erythro-megakaryocytic leukemia}},
- journal={Proc Natl Acad Sci U S A},
- volume={110},
- pages={6091-6}
-}
- at article{ezh2,
- author={Pal, B. and Bouras, T. and Shi, W and Vaillant, F. and Sheridan, J. M. and Fu, N. and Breslin, K. and Jiang, K. and Ritchie, M. E. and Young, M. and Lindeman, G. J. and Smyth, G. K. and Visvader, J. E.},
- year={2013},
- title={{Global changes in the mammary epigenome are induced by hormonal cues and coordinated by Ezh2}},
- journal={Cell Reports},
- volume={3},
- pages={411-26}
-}
- at article{fcounts,
- author={Liao, Y. and Smyth, G. K. and Shi, W.},
- year={2014},
- title={{featureCounts: an efficient general-purpose program for assigning sequence reads to genomic features.}},
- journal={Bioinformatics},
- volume={30},
- issue={7},
- pages={923-30}
-}
- at article{seqc,
- author={SEQC/MAQC-III Consortium},
- year={2014},
- title={{A comprehensive assessment of RNA-seq accuracy, reproducibility and information content by the Sequencing Quality Control Consortium.}},
- journal={Nature Biotechnology},
- volume={32},
- issue={9},
- pages={903-14}
-}
- at article{exactsnp,
- author={Liao, Y. and Smyth, G. K. and Shi, W.},
- year={},
- title={{ExactSNP: an efficient and accurate SNP calling algorithm}},
- journal={In preparation},
-}
diff --git a/doc/SubreadUsersGuide.tex b/doc/SubreadUsersGuide.tex
index c93db6e..dc7edf3 100644
--- a/doc/SubreadUsersGuide.tex
+++ b/doc/SubreadUsersGuide.tex
@@ -21,6 +21,7 @@
\newcommand{\DGEList}{\textsf{DGEList}}
\newcommand{\voom}{\textsf{voom}}
\newcommand{\featureCounts}{\textsf{featureCounts}}
+\newcommand{\repair}{\textsf{repair}}
\newcommand{\R}{\textsf{R}}
\newcommand{\C}{\textsf{C}}
\newcommand{\Rpackage}[1]{\textsf{#1}}
@@ -34,9 +35,9 @@
\begin{center}
{\Huge\bf Subread/Rsubread Users Guide}\\
\vspace{1 cm}
-{\centering\large Subread v1.4.6-p5/Rsubread v1.19.2\\}
+{\centering\large Subread v1.5.0/Rsubread v1.20.1\\}
\vspace{1 cm}
-\centering 28 August 2015\\
+\centering 28 October 2015\\
\vspace{5 cm}
\Large Wei Shi and Yang Liao\\
\vspace{1 cm}
@@ -72,12 +73,10 @@ It scales up efficiently for longer reads.
\code{Subread} is a general-purpose read aligner.
It can be used to align reads generated from both genomic DNA sequencing and RNA sequencing technologies.
-It been successfully used in a number of high-profile studies \cite{TangNC2013,ManNI2013,SpangenbergSCR2013,tang,ezh2}.
+It has been successfully used in a number of high-profile studies \cite{TangNC2013,ManNI2013,SpangenbergSCR2013,tang,ezh2}.
\code{Subjunc} is specifically designed to detect exon-exon junctions and to perform full alignments for RNA-seq reads.
Note that \code{Subread} performs local alignments for RNA-seq reads, whereas \code{Subjunc} performs global alignments for RNA-seq reads.
-Both \code{Subread} and \code{Subjunc} detect insertions, deletions, fusions and these genomic events are reported in the read mapping results.
-\code{Subjunc} also reports all the splice sites discovered from the exon-spanning reads.
-It re-aligns all the reads using the compiled list of splice sites in its last step of read alignment, and it can detect splice sites in any location of the reads.
+\code{Subread} and \code{Subjunc} comprise a read re-alignment step in which reads are re-aligned using genomic variation data and junction data collected from the initial mapping.
The \code{Subindel} program carries out local read assembly to discover long insertions and deletions.
Read mapping should be performed before running this program.
@@ -112,6 +111,7 @@ Liao Y, Smyth GK and Shi W. featureCounts: an efficient general-purpose program
\section{Download and installation}
+
\subsection{SourceForge {\Subread} package}
\subsubsection{Installation from a binary distribution}
@@ -145,6 +145,7 @@ To install it on Oracle Solaris or OpenSolaris computer operating systems, issue
\code{make -f Makefile.SunOS}\\
+To install it on a Windows computer, you will need to firstly install a unix-like environment such as cygwin and then install the {\Subread} package.\\
A new directory called \code{bin} will be created under the home directory of the software package, and the executables generated from the compilation are saved to that directory.
To enable easy access to these executables, you may copy them to a system directory such as \code{/usr/bin} or add the path to them to your search path (your search path is usually specified in the environment variable \code{`PATH'}).
@@ -211,11 +212,6 @@ Therefore, indels in the reads can be readily detected by examining the differen
Moreover, the number of bases by which the mapping location of subreads are shifted gives the precise length of the indel.
Since no mismatches are allowed in the mapping of the subreads, the indels can be detected with a very high accuracy.
-\subsection{Detection of long indels}
-
-Detection of long indels is conducted by performing local read assembly.
-When the specified indel length (`-I' option in SourceForge \code{C} or `indels' paradigm in \code{Rsubread}) is greater than 16, the \code{Subread} and \code{Subjunc} will automatically start the read assembly procedure to identify indels of up to 200bp long.
-\code{Subindel} outputs the assembled contig sequences that contain the detected long insertions and/or deletions and also the CIGAR info for the indels.
\section{Detection of canonical exon-exon junctions}
\label{sec:junction}
@@ -234,35 +230,35 @@ Orientation of splicing sites is indicated by `XA' tag in section of optional fi
\includegraphics[scale=0.5]{junction.png}
\end{center}
-\section{Fusion detection}
-\code{Subjunc} can detect genomic fusion events such as chimera in both RNA sequencing and genomic DNA sequencing data.
-It performs fusion detection in a manner similar to what it does for exon-exon junction detection, but it allows the same read to be splitted across different chromosomes.
-It also allows a read to be splitted across different strands on the same chromosome.
-It does not require donor/receptor sites when calling fusions.
-Non-canonical exon-exon junctions, which have donor/receptor sites other than GT/AG, may also be reported when using \code{subjunc} to detect fusions.
+\section{Detection of structural variants (SVs)}
+
+\code{Subread} and \code{Subjunc} can be used detect SV events including long indel, duplication, inversion and translocation, in RNA-seq and genomic DNA-seq data.
+
+Detection of long indels is conducted by performing local read assembly.
+When the specified indel length (`-I' option in SourceForge \code{C} or `indels' paradigm in \code{Rsubread}) is greater than 16, \code{Subread} and \code{Subjunc} will automatically start the read assembly process to detect long indels (up to 200bp).
-If a read is found to (i) map to two or more chromosomes, or (ii) map to different strands of the same chromosome or (iii) to span a regions wider than $2^{27}$ bases, \code{Subjunc} will use optional fields in the SAM/BAM output file to report the secondary alignments of the read.
-The primary alignment of the read is saved in the main fields of the same record.
-The following tags are used for secondary alignments in the optional fields:
-CC(chromosome name), CP(mapping position), CG(CIGAR string) and CT(strand).
+Breakpoints detected from SV events will be saved to a text file (`.breakpoint.txt'), which includes chromosomal coordinates of breakpoints and also the number of reads supporting each pair of breakpoints found from the same SV event.
+For the reads that were found to contain SV breakpoints, extra tags will be added for them in mapping output.
+These tags include CC(chromosome name), CP(mapping position), CG(CIGAR string) and CT(strand), and they describe the secondary alignment of the read (the primary alignment is described in the main fields).
-\section{Read re-alignments}
-Both \code{Subread} and \code{Subjunc} aligners re-align the reads after identifying indels, fusions and exon-exon junctions (subjunc only) from the data.
-They make use of the flanking window approach to identify indels, fusions and exon junctions.
-This is a highly accurate approach since it requires the identified indels, fusions or exon junctions to be flanked by perfectly matched subreads (16mers) at both sides.
-These discovered indels, fusions and exon junctions are then used to re-align the reads.
-Indels, fusions and exon junctions that are located very close to read ends can also be found during the re-alignment.
-We will remove those indels, fusions and junctions if they were found not to be supported by any read after read re-alignment.
-Numbers of reads supporting these genomic events will be reported.
+\section{Two-scan read alignment}
-\section{Recommended alignment setting}
+\code{Subread} and \code{Subjunc} aligners employ a two-scan approach for read mapping.
+In the first scan, the aligners use seed-and-vote method to identify candidate mapping locations for each read and also discover short indels, exon-exon junctions and structural variants.
+In the second scan, they carry out final alignment for each read using the variant and junction information.
+Variant and junction data (including chromosomal coordinates and number of supporting reads) will be output along with the read mapping results.
+To the best of our knowledge, \code{Subread} and \code{Subjunc} are the first to employ a two-scan mapping strategy to achieve a superior mapping accuracy.
+This strategy was later adopted by other aligners as well (called `two-pass').
-It is recommended to turn on \code{-u} option (reporting uniquely mapped reads only) and also \code{-H} option (breaking ties using Hamming distance), when running \code{Subread} and \code{Subjunc} aligners.
-This should give the most accurate mapping results with little or no cost to the mapping percentage.
-This is the default setting used in \code{align} and \code{subjunc} functions in {\Rsubread} package (\code{unique=TRUE} and \code{tieBreakHamming=TRUE}).
+
+\section{Recommended aligner setting}
+
+It is recommended to report uniquely mapped reads only when running \code{Subread} and \code{Subjunc} aligners since this will give the most accurate mapping result.
+By default, only uniquely mapped reads are reported when running aligners in Bioconductor {\Rsubread} package.
+This however needs to be explicitly specified when running aligners in SourceForge {\Subread} package (\code{-u}).
\chapter{Mapping reads generated by genomic DNA sequencing technologies}
@@ -281,8 +277,7 @@ An index must be built for the reference first and then the read mapping can be
{\noindent\bf Step 2: Align reads}\\
\noindent Map single-end reads from a gzipped file using 5 threads and save mapping results to a BAM file:\\
-\code{subread-align -T 5 -i my\_index --gzFASTQinput --BAMoutput}\\
-\code{-r reads.txt.gz -o subread\_results.bam}\\
+\code{subread-align -T 5 -i my\_index -r reads.txt.gz -o subread\_results.bam}\\
\noindent Detect indels of up to 16bp:\\
\code{subread-align -I 16 -i my\_index -r reads.txt -o subread\_results.sam}\\
@@ -327,11 +322,6 @@ align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",indel
align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",nBestLocations=3)
\end{Rcode}
-\noindent Report uniquely mapped reads only:
-\begin{Rcode}
-align(index="my_index",readfile1="reads.txt.gz",output_file="rsubread.bam",unique=TRUE)
-\end{Rcode}
-
\noindent Map paired-end reads:
\begin{Rcode}
align(index="my_index",readfile1="reads1.txt.gz",readfile2="reads2.txt.gz",
@@ -342,13 +332,14 @@ output_file="rsubread.bam",minFragLength=50,maxFragLength=600)
\section{Index building}
\label{sec:index}
-The \code{subread-buildindex} (\code{buildindex} function in \Rsubread) program builds an base-space or color-space index using the reference sequences.
+The \code{subread-buildindex} (\code{buildindex} function in \Rsubread) program builds an index for reference genome by creating a hash table in which keys are 16bp mers (subreads) extracted from the genome and values are their chromosomal locations.
+By default, subreads are extracted from the genome at a 2bp interval.
The reference sequences should be in FASTA format (the header line for each chromosomal sequence starts with ``$>$'').\\
-This program extracts all the 16 mer sequences from the reference genome at a 2bp interval and then uses them to build a hash table.
-Keys in the hash table are unique 16 mers and values are their chromosomal locations.
Table 1 describes the arguments used by the \code{subread-buildindex} program.
+\newpage
+
\begin{table}[h]
\raggedright{Table 1: Arguments used by the \code{subread-buildindex} program (\code{buildindex} function in \Rsubread).
Arguments in parenthesis in the first column are used by \code{buildindex}.\newline\\}
@@ -362,7 +353,7 @@ chr1.fa, chr2.fa, ... \newline (\code{reference}) & Give names of chromosome fil
\hline
-c \newline (\code{colorspace}) & Build a color-space index.\\
\hline
--f $<int>$ \newline (\code{TH\_subread}) & Specify the threshold for removing uninformative subreads (highly repetitive 16mers). Subreads will be excluded from the index if they occur more than threshold number of times in the reference genome. Default value is 24.\\
+-f $<int>$ \newline (\code{TH\_subread}) & Specify the threshold for removing uninformative subreads (highly repetitive 16bp mers). Subreads will be excluded from the index if they occur more than threshold number of times in the reference genome. Default value is 100.\\
\hline
-F \newline (\code{gappedIndex=FALSE}) & Build a full index for the reference genome. 16bp mers (subreads) will be extracted from every position of the reference genome. Under default setting (`-F' is not specified), subreads are extracted in every three bases from the genome.\\
\hline
@@ -379,18 +370,18 @@ chr1.fa, chr2.fa, ... \newline (\code{reference}) & Give names of chromosome fil
\section{Read mapping}
-The \texttt{subread-align} program (\code{align} in \Rsubread) extracts a number of subreads from each read and then uses these subreads to vote for the mapping location of the read.
-It uses the the ``seed-and-vote'' paradigm for read mapping.
-\code{subread-align} program automatically determines if a read should be globally aligned or locally aligned, making it particularly poweful for mapping RNA-seq reads.
-Table 2 describes the arguments used by the \code{subread-align} program (and also the \code{subjunc} program).
-These arguments are used by the read mapping programs included in both SourceForge \code{Subread} package and Bioconductor \code{Rsubread} package, although argument names are different in these two packages (arguments names used by Bioconductor \code{Rsubread} are included in parenthesis).
+The {\Subread} aligner (\texttt{subread-align} program in SourceForge {\Subread} package or \code{align} function in Bioconductor {\Rsubread} package) extracts a number of subreads from each read and then uses these subreads to vote for the mapping location of the read.
+It uses the the ``seed-and-vote'' paradigm for read mapping and reports the largest mappable region for each read.
+Table 2 describes the arguments used by {\Subread} aligner (and also \code{Subjunc} aligner).
+Arguments used in Bioconductor \code{Rsubread} package are included in parenthesis.\\
+
-\newpage
\begin{longtable}{|p{4cm}|p{12cm}|}
\multicolumn{2}{p{16cm}}{Table 2: Arguments used by the \code{subread-align}/\code{subjunc} programs included in the SourceForge {\Subread} package.
Arguments in parenthesis in the first column are the equivalent arguments used in Bioconductor {\Rsubread} package.
-Arguments used by \code{subread-align} only are marked with `$^*$' and arguments used by \code{subjunc} only are marked with `$^{**}$'.
+Arguments used only by \code{subread-align} only are marked with $^*$.
+Arguments used only by \code{subjunc} only are marked with $^{**}$.
\newline
}
\endfirsthead
@@ -405,11 +396,9 @@ Arguments & Description \\
\hline
-D $<int>$ \newline (\code{maxFragLength}) & Specify the maximum fragment/template length, 600 by default.\\
\hline
-$^*$ -E $<int>$ \newline (\code{DP\_GapExtPenalty}) & Specify the penalty for extending the gap when performing the Smith-Waterman dynamic programming. 0 by defaut.\\
-\hline
-$^*$ -G $<int>$ \newline (\code{DP\_GapOpenPenalty}) & Specify the penalty for opening a gap when applying the Smith-Waterman dynamic programming to detecting indels. -2 by defaut.\\
+-E $<int>$ \newline (\code{DP\_GapExtPenalty}) & Specify the penalty for extending the gap when performing the Smith-Waterman dynamic programming. 0 by defaut.\\
\hline
--H \newline (\code{tieBreakHamming =TRUE}) & Use Hamming distance to break ties when more than one best mapping location is found.\\
+-G $<int>$ \newline (\code{DP\_GapOpenPenalty}) & Specify the penalty for opening a gap when applying the Smith-Waterman dynamic programming to detecting indels. -2 by defaut.\\
\hline
-i $<index> \newline (\code{index}) $ & Specify the base name of the index.\\
\hline
@@ -421,41 +410,37 @@ $^*$ -G $<int>$ \newline (\code{DP\_GapOpenPenalty}) & Specify the penalty for o
\hline
-n $<int>$ \newline (\code{nsubreads}) & Specify the number of subreads extracted from each read, 10 by default.\\
\hline
--o $<output>$ \newline (\code{output\_file}) & Give the name of output file. Default output format in SourceForge {\Subread} is SAM, and default output format in Bioconductor {\Rsubread} is BAM. All reads are included in mapping output, including both mapped and unmapped reads, and they are in the same order as in the input file.\\
+-o $<output>$ \newline (\code{output\_file}) & Give the name of output file. The default output format is BAM. All reads are included in mapping output, including both mapped and unmapped reads, and they are in the same order as in the input file.\\
\hline
-p $<int>$ \newline (\code{TH2}) & Specify the minimum number of consensus subreads both reads from the same pair must have. This argument is only applicable for paired-end read data. The value of this argument should not be greater than that of `-m' option, so as to rescue those read pairs in which one read has a high mapping quality but the other does not. 1 by default.\\
\hline
-P $<3:6>$ \newline (\code{phredOffset}) & Specify the format of Phred scores used in the input data, '3' for phred+33 and '6' for phred+64. '3' by default. For \code{align} function in \Rsubread, the possible values are `33' (for phred+33) and `64' (for phred+64). `33' by default.\\
\hline
--Q \newline (\code{tieBreakQS=TRUE}) & Use mapping quality scores to break ties when more than one best mapping location is found.\\
-\hline
--r $<input>$ \newline (\code{readfile1}) & Give the name of input file(s) (multiple files are allowed to be provided to \code{align} and \code{subjunc} functions in {\Rsubread}). For paired-end read data, this gives the first read file and the other read file should be provided via the -R option. Supported input formats include FASTQ/FASTA, gzipped FASTQ/FASTA, SAM and BAM. Default input format in SourceForge {\Subread} is FASTQ/FASTA and default input format in Bioconductor {\Rsubread} [...]
+-r $<input>$ \newline (\code{readfile1}) & Give the name of input file(s) (multiple files are allowed to be provided to \code{align} and \code{subjunc} functions in {\Rsubread}). For paired-end read data, this gives the first read file and the other read file should be provided via the -R option. Supported input formats include FASTQ/FASTA (uncompressed or gzip compressed)(default), SAM and BAM.\\
\hline
-R $<input>$ \newline (\code{readfile2}) & Provide name of the second read file from paired-end data. The program will switch to paired-end read mapping mode if this file is provided. (multiple files are allowed to be provided to \code{align} and \code{subjunc} functions in {\Rsubread}).\\
\hline
-S $<ff:fr:rf>$ \newline (\code{PE\_orientation}) & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
\hline
+-t $<int>$ \newline (\code{type}) & Specify the type of input sequencing data. Possible values include \code{0} (RNA-seq data) and \code{1} (genomic DNA-seq data such as WGS, WES, ChIP-seq data etc.).\\
+\hline
-T $<int>$ \newline (\code{nthreads}) & Specify the number of threads/CPUs used for mapping. The value should be between 1 and 32. 1 by default.\\
\hline
-u \newline (\code{unique=TRUE}) & Output uniquely mapped reads only. Reads that were found to have more than one best mapping location will not be reported.\\
\hline
-$^*$ -X $<int>$ \newline (\code{DP\_MismatchPenalty}) & Specify the penalty for mismatches when performing the Smith-Waterman dynamic programming. 0 by defaut.\\
+-X $<int>$ \newline (\code{DP\_MismatchPenalty}) & Specify the penalty for mismatches when performing the Smith-Waterman dynamic programming. 0 by defaut.\\
\hline
-$^*$ -Y $<int>$ \newline (\code{DP\_MatchScore}) & Specify the score for the matched base when performing the Smith-Waterman dynamic programming. 2 by defaut.\\
+-Y $<int>$ \newline (\code{DP\_MatchScore}) & Specify the score for the matched base when performing the Smith-Waterman dynamic programming. 2 by defaut.\\
\hline
-$^{**}$$--$allJunctions \newline (\code{reportAllJunctions =TRUE}) & This option should only be used with \code{subjunc} for RNA-seq data. If specified, \code{subjunc} will report non-canonical exon-exon junctions and structural variants, in addition to the canonical exon-exon junctions (canonical donor/receptor sites detected). Both canonical and non-canonical junctions will be output to a file with a suffix name ".junction.bed". Locations of breakpoints for structural variants will be [...]
+$^{**}$$--$allJunctions \newline (\code{reportAllJunctions =TRUE}) & This option should be used with \code{subjunc} for detecting canonical exon-exon junctions (with `GT/AG' donor/receptor sites), non-canonical exon-exon junctions and structural variants (SVs) in RNA-seq data. detected junctions will be saved to a file with suffix name ``.junction.bed". Detected SV breakpoints will be saved to a file with suffix name ``.breakpoints.txt", which includes chromosomal coordinates of detected [...]
\hline
$--$BAMinput \newline (\code{input\_format="BAM"}) & Specify that the input read data are in BAM format.\\
\hline
-$--$BAMoutput \newline (\code{output\_format="BAM"}) & Specify that mapping results are saved into a BAM format file. \\
-\hline
-$^{**}$$--$dnaseq \newline (\code{DNAseq=TRUE}) & This option should only be used with \code{subjunc} for genomic DNA sequencing data. If specified, \code{subjunc} will search for long deletions, inversions and translocations in the read data. Locations of breakpoints for these structural variants will be saved to a file with a suffix name ".breakpoints.txt". Breakpoint-containing reads may include the following fields for their minor mapped regions in mapping output: CC(Chr), CP(Positio [...]
-\hline
-$--$gzFASTQinput \newline (\code{input\_format=} \newline \code{"gzFASTQ"}) & Specify that the input read data are in gzipped FASTQ or gzipped FASTA format.\\
+$--$SAMoutput \newline (\code{output\_format="SAM"}) & Specify that mapping results are saved into a SAM format file. \\
\hline
-$--$minDistance BetweenVariants $<int>$ & Minimum allowed distance between two neighboring variants (or junctions in RNA-seq data) within the same read. 16 by default. The value should be greater than 0 and less than the length of the read.\\
+$--$complexIndels & Detect multiple short indels that occur concurrently in a small genomic region (these indels could be as close as 1bp apart).\\
\hline
-$^*$$--$reportFusions \newline (\code{reportFusions=TRUE}) & This option should only be used with \code{subread-align} for the mapping of genomic DNA-seq data. If specified, \code{subread-align} will report discovered fusion events such as chimeras. Fusions are reported in the same format as that used in `$--$dnaseq' option.\\
+$^*$$--$sv \newline (\code{detectSV=TRUE}) & This option should be used with \code{subread-align} for detecting structural variants (SVs) in genomic DNA sequencing data. Detected SV breakpoints will be saved to a file with suffix name ``.breakpoints.txt", which includes chromosomal coordinates of detected SV breakpoints and also number of supporting reads for each SV event. In the read mapping output, each breakpoint-containing read will contain the following extra fields for the descrip [...]
\hline
$--$rg $<string>$ \newline (\code{readGroup}) & Add a $<tag:value>$ to the read group (RG) header in the mapping output. \\
\hline
@@ -467,6 +452,8 @@ $--$trim5 $<int>$ \newline (\code{nTrim5}) & Trim off $<int>$ number of bases fr
\hline
$--$trim3 $<int>$ \newline (\code{nTrim3}) & Trim off $<int>$ number of bases from 3' end of each read. 0 by default.\\
\hline
+$--$type $<int>$ \newline (\code{type}) & Specify the type of input sequencing data. Possible values include \code{0}, denoting RNA-seq data, or \code{1}, denoting genomic DNA-seq data.\\
+\hline
-v & Output version of the program. \\
\hline
\end{longtable}
@@ -495,9 +482,9 @@ The MQS is a read-length normalized value and it is in the range [0, 60).
\section{Mapping output}
-Read mapping results for each library will be saved to a SAM or BAM format file.
-A text file, which includes discovered insertions and deletions, will also be generated for each library (`*.indel').
-If \code{subread-align} was run with the parameter `--reportFusions' or \code{subjunc} was run with the parameter `--dnaseq', then the breakpoints detected from structural variant events will be output to a text file for each library as well.
+Read mapping results for each library will be saved to a BAM or SAM format file.
+Short indels detected from the read data will be saved to a text file (`.indel').
+If `--sv' is specified when running \code{subread-align}, breakpoints detected from structural variant events will be output to a text file for each library as well (`.breakpoints.txt').
\newpage
@@ -523,7 +510,7 @@ You can provide a list of FASTA files or a single FASTA file including all the r
\noindent{{\Subread}}\\
-\noindent For the purpose of differential expression analysis (ie. discovering differentially expressed genes), we recommend you to use the {\Subread} aligner.
+\noindent If the purpose of an RNA-seq experiment is to quantify gene-level expression and discover differentially expressed genes, the {\Subread} aligner is recommended.
{\Subread} carries out local alignments for RNA-seq reads.
The commands used by {\Subread} to align RNA-seq reads are the same as those used to align gDNA-seq reads.
Below is an example of using {\Subread} to map single-end RNA-seq reads.\\
@@ -536,15 +523,15 @@ The main difference between {\Subread} and {\Subjunc} is that {\Subread} does no
For the alignments of the exon-spanning reads, {\Subread} just uses the largest mappable regions in the reads to find their mapping locations.
This makes {\Subread} more computationally efficient.
The largest mappable regions can then be used to reliably assign the reads to their target genes by using a read summarization program (eg. \featureCounts, see Section~\ref{sec:featureCounts}), and differential expression analysis can be readily performed based on the read counts yielded from read summarization.
-Therefore, {\Subread} is sufficient for read mapping if the purpose of the RNA-seq analysis is to perform a differential expression analysis.
+Therefore, {\Subread} is sufficient for read mapping if the purpose of RNA-seq analysis is to perform a differential expression analysis.
Also, {\Subread} could report more mapped reads than {\Subjunc}.
-For example, the exon-spanning reads that are not aligned by {\Subjunc} due to the lack of GT/AG splicing signals (this is the only donor/receptor site accepted by {\Subjunc}) could be aligned by {\Subread}, as long as they have a good match with the target region.\\
+For example, the exon-spanning reads that are not aligned by {\Subjunc} due to the lack of canonical GT/AG splicing signals can be aligned by {\Subread} as long as they have a good match with the reference sequence.\\
\noindent{{\Subjunc}}\\
-For other purposes of the RNA-seq data anlayses such as exon-exon junction detection and genomic mutation detection, in which reads need to be fully aligned (especially the exon-spanning reads), {\Subjunc} aligner should be used.
+For other purposes of the RNA-seq data anlayses such as exon-exon junction detection, alternative splicing analysis and genomic mutation detection, {\Subjunc} aligner should be used because exon-spanning reads need to be fully aligned.
Below is an example command of using {\Subjunc} to perform global alignments for paired-end RNA-seq reads.
-Note that there are two files included in the output: one containing the discovered exon-exon junctions (BED format) and the other containing the mapping results for reads (SAM or BAM format).\\
+Note that there are two files produced after mapping: one is a BAM-format file including mapping results and the other a BED-format file including discovered exon-exon junctions.\\
\code{subjunc -i my\_index -r rnaseq-reads1.txt -R rnaseq-reads2.txt -o subjunc\_result}
@@ -601,20 +588,10 @@ Table 2 describes the arguments used by the {\Subjunc} program.\\
\section{Mapping output}
-Read mapping results for each library will be saved to a SAM or BAM format file.
-The detected exon-exon junctions will be saved to a text file for each library (`*.junction.bed').
-Detected indels will be saved to a text file for each library as well (`*.indel').\\
-
-If the `--allJunctions' argument was specified when running \code{subjunc}, the breakpoints detected from structural variant events will also be saved to a text file for each library (`*.fusion.txt').
-A read gives rise to such a breakpoint if \\
-(1) one part of its sequence maps to one chromosome and the other part maps to a different chromosome (the two parts are separated by the breakpoint), or\\
-(2) both parts map to the same chromosome but to different strands, or\\
-(3) both parts map to the same strand on the same chromosome but their relative position swapped (ie. the right-side part map to the left of the left-side part).
+Read mapping results for each library will be saved to a BAM/SAM file.
+Detected exon-exon junctions will be saved to a BED file for each library (`.junction.bed').
+Detected short indels will be saved to a text file (`.indel').\\
-Reads falling into one of the above conditions will contain additional fields in the mapping output, including CC(Chr), CP(Position),CG(CIGAR) and CT(strand).
-Note that exon-spanning reads that span an intron of $>$500kb long will also contain such additional fields (splicing points detected from these reads are saved in the `*.junction.bed' file).
-
-If none of the above conditions was satisfied, the detected breakpoints will be put into the `*.junction.bed' file, indicating that the breakpoints arise from exon-exon splicing events.
\section{Mapping microRNA sequencing reads (miRNA-seq)}
@@ -623,7 +600,7 @@ To use {\Subread} aligner to map miRNA-seq reads, a full index must be built for
For example, the following command builds a full index for mouse reference genome \emph{mm10}:
\code{\\
-subread-buildindex -F -B -f 72 -o mm10\_full\_index mm10.fa \\
+subread-buildindex -F -B -o mm10\_full\_index mm10.fa \\
}
The full index includes 16bp mers extracted from every genomic location in the genome.
@@ -652,7 +629,7 @@ Below is an example of mapping 50bp long reads (adaptor sequences were included
\code{\\
subread-align -i mm10\_full\_index -n 35 -m 4 -M 3 -T 10 -I 0 -P 3 -B 10 \\
---BAMoutput -r miRNA\_reads.fastq -o result.sam\\
+-r miRNA\_reads.fastq -o result.sam\\
}
The `-B 10' parameter instructs {\Subread} aligner to report up to 10 best mapping locations (equally best) in the mapping results.
@@ -674,9 +651,8 @@ Care must be taken to ensure that such reads are not over-counted or under-count
Here we describe the {\featureCounts} program, an efficient and accurate read quantifier.
{\featureCounts} has the following features:
\begin{itemize}
-\item It carries out precise and accurate read assignments by taking care of indels, junctions and fusions in the reads.
-\item It takes less than 4 minutes to summarize 20 million pairs of reads to 26k RefSeq genes using one thread, and uses $<$20MB of memory (you can run it on a Mac laptop).
-\item It supports multi-threaded running, making it extremely fast for summarizing large datasets.
+\item It carries out precise and accurate read assignments by taking care of indels, junctions and structural variants in the reads.
+\item It takes only $\sim$1 minute to summarize 20 million read pairs of reads to 26 thousand RefSeq genes.
\item It supports GTF/SAF format annotation and SAM/BAM read data.
\item It supports strand-specific read summarization.
\item It can perform read summarization at both feature level (eg. exon level) and meta-feature level (eg. gene level).
@@ -693,7 +669,14 @@ Here we describe the {\featureCounts} program, an efficient and accurate read qu
\subsection{Input data}
-The data input to {\featureCounts} consists of (i) one or more files of aligned reads in either SAM or BAM format and (ii) a list of genomic features in either Gene Transfer Format (GTF) or General Feature Format (GFF) or Simplified Annotation Format (SAF). The read input format (SAM or BAM) is automatically detected and so does not need to be specified by the user. For paired reads, {\featureCounts} also automatically sorts reads by name if paired reads are not in consecutive positions [...]
+The data input to {\featureCounts} consists of (i) one or more files of aligned reads in either SAM or BAM format and (ii) a list of genomic features in either Gene Transfer Format (GTF) or General Feature Format (GFF) or Simplified Annotation Format (SAF). The format of input reads is automatically detected (SAM or BAM).
+
+For paired-end reads, if they were location-sorted in the input {\featureCounts} will automatically re-order the reads to place next to each other the reads from the same pair before counting them.
+We also provide an utility program {\repair} to allow users to pair up the reads before feeding them to {\featureCounts}.
+Note that name-sorted paired-end reads generated by other programs may include incorrectly paired reads due to for example multi-mapping issue.
+If this is the case, {\featureCounts} will re-sort them.
+
+Both read alignment and read counting should use the same reference genome. For each read, the BAM/SAM file gives the name of the reference chromosome or contig the read mapped to, the start position of the read on the chromosome or contig/scaffold, and the so-called CIGAR string giving the detailed alignment information including insertions and deletions and so on relative to the start position.
The genomic features can be specified in either GTF/GFF or SAF format. The SAF format is the simpler and includes only five required columns for each feature (see next section). In either format, the feature identifiers are assumed to be unique, in accordance with commonly used Gene Transfer Format (GTF) refinement of GFF.
@@ -743,7 +726,7 @@ We recommend to use unique gene identifiers, such as NCBI Entrez gene identifier
\subsection{Overlap of reads with features}
-{\featureCounts} preforms precise read assignment by comparing mapping location of every base in the read or fragment with the genomic region spanned by each feature. It takes account of any gaps (insertions, deletions, exon-exon junctions or fusions) that are found in the read. It calls a hit if any overlap (1bp or more) is found between the read or fragment and a feature.
+{\featureCounts} preforms precise read assignment by comparing mapping location of every base in the read or fragment with the genomic region spanned by each feature. It takes account of any gaps (insertions, deletions, exon-exon junctions or structural variants) that are found in the read. It calls a hit if any overlap (1bp or more) is found between the read or fragment and a feature.
A hit is called for a meta-feature if the read or fragment overlaps any component feature of the meta-feature.
\subsection{Multiple overlaps}
@@ -841,7 +824,7 @@ input\_files \newline (\code{files}) & Give the names of input read files that i
\hline
-s $<int>$ \newline (\code{isStrandSpecific}) & Indicate if strand-specific read counting should be performed. It has three possible values: 0 (unstranded), 1 (stranded) and 2 (reversely stranded). 0 by default. For paired-end reads, strand of the first read is taken as the strand of the whole fragment and FLAG field of the current read is used to tell if it is the first read in the fragment.\\
\hline
--S $<ff:fr:rf>$ & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
+-S $<ff:fr:rf>$ \newline (\code{PE\_orientation}) & Specify the orientation of the two reads from the same pair. It has three possible values including `fr', `ff' and `'rf. Letter `f' denotes the forward strand and letter `r' the reverse strand. `fr' by default (ie. the first read in the pair is on the forward strand and the second read on the reverse strand).\\
\hline
-t $<input>$ \newline (\code{GTF.featureType}) & Specify the feature type. Only rows which have the matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default.\\
\hline
@@ -994,7 +977,7 @@ However, it is extremely computing-intensive to analyze the data generated from
To discover SNPs, reads need to be mapped to the reference genome first and then all the read data mapped to a particular site will be used for SNP calling for that site.
Discovery of SNPs is often confounded by many sources of errors.
Mapping errors and sequencing errors are often the major sources of errors causing incorrect SNP calling.
-Incorrect alignments of indels, exon-exon junctions and fusions in the reads can also result in wrong placement of blocks of continuous read bases, likely giving rise to consecutive incorrectly reported SNPs.
+Incorrect alignments of indels, exon-exon junctions and structural variants in the reads can also result in wrong placement of blocks of continuous read bases, likely giving rise to consecutive incorrectly reported SNPs.
We have developed a highly accurate and efficient SNP caller, called \emph{exactSNP} \cite{exactSNP}.
\emph{exactSNP} calls SNPs for individual samples, without requiring control samples to be provided.
@@ -1048,6 +1031,43 @@ Arguments & Description \\
+\chapter{Utility programs}
+
+Usage info for each utility program can be seen by just typing the program name on the command prompt.
+
+\section{repair}
+
+This program takes as input a paired-end BAM file and places reads from the same pair next to each other in its output.
+BAM files generated by {\repair} are compatible with {\featureCounts} program, ie they will not be re-sorted by {\featureCounts}.
+Note that you do not have to run {\repair} before running {\featureCounts}.
+{\featureCounts} calls {\repair} automatically if it finds that reads need to be re-sorted.
+
+The {\repair} program uses a novel approach to quickly find reads from the same pair, rather than performing time-consuming sort of read names.
+It takes only about half a minute to re-order a location-sorted BAM file including 30 million read pairs.
+
+\section{coverageCount}
+
+Compute the read coverage for each chromosomal location in the genome.
+
+\section{propmapped}
+
+Get number of mapped reads from a BAM/SAM file.
+
+\section{qualityScores}
+
+Retrieve Phred scores for read bases from a Fastq/BAM/SAM file.
+
+\section{removeDup}
+
+Remove duplicated reads from a SAM file.
+
+
+\section{subread-fullscan}
+
+Get all chromosomal locations that contain a genomic sequence sharing high homology with a given input sequence.
+
+
+
\chapter{Case studies}
\section{A Bioconductor R pipeline for analyzing RNA-seq data}
@@ -1095,7 +1115,7 @@ buildindex(basename="chr1",reference="hg19_chr1.fa")
\begin{Rcode}
targets <- readTargets()
align(index="chr1",readfile1=targets$InputFile,input_format="gzFASTQ",output_format="BAM",
-output_file=targets$OutputFile,tieBreakHamming=TRUE,unique=TRUE,indels=5)
+output_file=targets$OutputFile,unique=TRUE,indels=5)
\end{Rcode}
{\noindent\bf Read summarization.} Summarize mapped reads to NCBI RefSeq genes.
diff --git a/doc/indel.png b/doc/indel.png
deleted file mode 100644
index 659d7c2..0000000
Binary files a/doc/indel.png and /dev/null differ
diff --git a/doc/junction.png b/doc/junction.png
deleted file mode 100644
index 0fe274d..0000000
Binary files a/doc/junction.png and /dev/null differ
diff --git a/doc/seed-and-vote.png b/doc/seed-and-vote.png
deleted file mode 100644
index 7a59415..0000000
Binary files a/doc/seed-and-vote.png and /dev/null differ
diff --git a/doc/voom_mean_variance.png b/doc/voom_mean_variance.png
deleted file mode 100644
index f3b4302..0000000
Binary files a/doc/voom_mean_variance.png and /dev/null differ
diff --git a/src/BACK-sambam-file.h b/src/BACK-sambam-file.h
deleted file mode 100644
index b2e4e4d..0000000
--- a/src/BACK-sambam-file.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef _SAMBAM_FILE_H_
-#define _SAMBAM_FILE_H_
-
-typedef unsigned char BS_uint_8;
-typedef unsigned short BS_uint_16;
-typedef unsigned int BS_uint_32;
-
-typedef struct
-{
- FILE * os_file;
- unsigned long long next_block_header_offset;
- int file_mode;
-} BamSam_FILE;
-
-typedef struct
-{
- BS_uint_8 ID1;
- BS_uint_8 ID2;
- BS_uint_8 CM;
- BS_uint_8 FLG;
- BS_uint_32 MTIME;
- BS_uint_8 XFL;
- BS_uint_8 OS;
- BS_uint_16 XLEN;
-} BGZF_Header;
-
-typedef struct
-{
- BS_uint_8 SI1;
- BS_uint_8 SI2;
- BS_uint_16 SLEN;
- BS_uint_16 BSIZE;
-} RCF1952_Subfield;
-
-#define SAMBAM_FILE_SAM 10
-#define SAMBAM_FILE_BAM 20
-
-// this function returns 0 if OK, or a minus value if the file reaches EOF.
-// the file pointer is put to the first byte of the first subfield after the header.
-int get_next_BGZF_block_header(BamSam_FILE * fp, BGZF_Header * header);
-
-// The file pointer is put to the first byte of CDATA.
-// There should be a BamSam subfield found so the function can return 0; if not, the return value is minus.
-int get_RFC1952_subfield(BamSam_FILE * fp, RCF1952_Subfield * field);
-
-// This function moves 8 bytes of fp, putting the file pointer to the first byte of the next BamSam block.
-int finalise_BamSam_block(BamSam_FILE * fp);
-
-// This function opens a BamSam file in read-only mode.
-// FILE_MODE specifies if it is a SAM file or a BAM file.
-// It returns NULL if fopen(fn,"r") == NULL.
-BamSam_FILE * fopen_BamSam(const char * fn, int FILE_MODE);
-
-// just like feof(fp)
-int feof_BamSam(BamSam_FILE *fp);
-#endif
diff --git a/src/HelperFunctions.c b/src/HelperFunctions.c
index 0ceea7b..1c6d855 100644
--- a/src/HelperFunctions.c
+++ b/src/HelperFunctions.c
@@ -19,16 +19,312 @@
#include <ctype.h>
#include <string.h>
+#include <assert.h>
#include "subread.h"
+#include "gene-algorithms.h"
#include "HelperFunctions.h"
-int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
+
+
+// This assumes the first part of Cigar has differet strandness to the main part of the cigar.
+// Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
+// The first base in the read actually has a larger coordinate than Pos.
+// new_cigar has to be at least 100 bytes.
+unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar) {
+ int cigar_cursor = 0;
+ new_cigar[0]=0;
+ unsigned int tmpi=0;
+ int last_piece_end = 0;
+ int last_sec_start = 0;
+ unsigned int chro_pos = pos, this_section_start = pos, ret = pos;
+ int is_positive_dir = 0;
+ int read_cursor = 0;
+ int section_no = 0;
+
+ for(cigar_cursor = 0 ; ; cigar_cursor++)
+ {
+ if( cigar [cigar_cursor] == 'n' || cigar [cigar_cursor] == 'b' || cigar [cigar_cursor] == 0)
+ {
+ int xk1, jmlen=0, nclen=strlen(new_cigar);
+ char jump_mode [13];
+
+ if(cigar [cigar_cursor] !=0)
+ {
+ sprintf(jump_mode, "%u%c", tmpi, cigar [cigar_cursor] == 'b'?'n':'b');
+ jmlen = strlen(jump_mode);
+ }
+
+ for(xk1=nclen-1;xk1>=0; xk1--)
+ new_cigar[ xk1 + last_piece_end + jmlen - last_sec_start ] = new_cigar[ xk1 ];
+ new_cigar [nclen + jmlen + last_piece_end - last_sec_start ] = 0;
+
+ memcpy(new_cigar , jump_mode, jmlen);
+ memcpy(new_cigar + jmlen , cigar + last_sec_start, last_piece_end - last_sec_start);
+
+ last_sec_start = cigar_cursor+1;
+
+ if(is_positive_dir && cigar [cigar_cursor] !=0)
+ {
+ if(cigar [cigar_cursor] == 'b') chro_pos -= tmpi - read_cursor - 1;
+ else chro_pos += tmpi - read_cursor - 1;
+ }
+ if((!is_positive_dir) && cigar [cigar_cursor] !=0)
+ {
+ if(cigar [cigar_cursor] == 'b') chro_pos = this_section_start - tmpi - read_cursor - 1;
+ else chro_pos = this_section_start + tmpi - read_cursor - 1;
+ }
+
+ this_section_start = chro_pos;
+
+ if(section_no == 0)
+ ret = chro_pos;
+
+ is_positive_dir = ! is_positive_dir;
+ section_no++;
+ tmpi=0;
+ }
+ else if(isalpha(cigar [cigar_cursor]))
+ {
+ if(cigar [cigar_cursor]=='M' || cigar [cigar_cursor] == 'S')
+ read_cursor += tmpi;
+ tmpi=0;
+ last_piece_end = cigar_cursor+1;
+ }
+ else tmpi = tmpi*10 + (cigar [cigar_cursor] - '0');
+
+ if(cigar [cigar_cursor] == 0)break;
+ }
+
+ SUBREADprintf("REV CIGAR: %s => %s\n", cigar, new_cigar);
+ return ret;
+}
+
+unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar){
+ int delta_from_right = 0;
+ int cigar_cursor = 0;
+ unsigned int tmpi = 0;
+ while(1){
+ int nch = cigar[cigar_cursor++];
+ if(nch == 0) break;
+ if(isdigit(nch)){
+ tmpi = tmpi * 10 + nch - '0';
+ }else{
+ if(nch == 'M'||nch == 'D' || nch == 'N'){
+ delta_from_right +=tmpi;
+ }
+ tmpi = 0;
+ }
+ }
+ return right_pos - delta_from_right;
+}
+
+
+char contig_fasta_int2base(int v){
+ if(v == 1) return 'A';
+ if(v == 2) return 'T';
+ if(v == 3) return 'G';
+ if(v == 4) return 'C';
+ return 'N';
+}
+
+int contig_fasta_base2int(char base){
+ base = tolower(base);
+ if((base) == 'a'){ return 1;}
+ else if((base) == 't' || (base) == 'u'){ return 2;}
+ else if((base) == 'g'){ return 3;}
+ else if((base) == 'c'){ return 4;}
+ else return 15 ;
+}
+
+int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases){
+ unsigned int this_size = HashTableGet( tab -> size_table, chro ) - NULL;
+ if(this_size > 0){
+ if(this_size >= len && pos <= this_size - len){
+ char * bin_block = HashTableGet(tab -> contig_table, chro );
+ unsigned int bin_byte = pos / 2;
+ int bin_bit = 4*(pos % 2), x1;
+
+ for(x1 = 0 ;x1 < len; x1++)
+ {
+ int bin_int = (bin_block[bin_byte] >> bin_bit) & 0xf;
+ if(bin_bit == 4) bin_byte++;
+ bin_bit = (bin_bit == 4)?0:4;
+ out_bases[x1] = contig_fasta_int2base(bin_int);
+ }
+
+ return 0;
+ }
+ }
+ return 1;
+}
+
+void destroy_contig_fasta(fasta_contigs_t * tab){
+ HashTableDestroy( tab -> size_table );
+ HashTableDestroy( tab -> contig_table );
+}
+int read_contig_fasta(fasta_contigs_t * tab, char * fname){
+ FILE * fp = f_subr_open(fname, "r");
+ if(fp != NULL){
+ tab -> contig_table = HashTableCreate(3943);
+ tab -> size_table = HashTableCreate(3943);
+
+ HashTableSetDeallocationFunctions(tab -> contig_table, free, free);
+ HashTableSetDeallocationFunctions(tab -> size_table, NULL, NULL);
+
+ HashTableSetKeyComparisonFunction(tab -> contig_table, fc_strcmp_chro);
+ HashTableSetKeyComparisonFunction(tab -> size_table, fc_strcmp_chro);
+
+ HashTableSetHashFunction(tab -> contig_table, fc_chro_hash);
+ HashTableSetHashFunction(tab -> size_table, fc_chro_hash);
+
+ char chro_name[MAX_CHROMOSOME_NAME_LEN];
+ unsigned int inner_cursor = 0, current_bin_space = 0;
+ int status = 0;
+ char * bin_block = NULL;
+ chro_name[0]=0;
+
+ while(1){
+ char nch = fgetc(fp);
+ if(status == 0){
+ assert(nch == '>');
+ status = 1;
+ }else if(status == 1){
+ if(inner_cursor == 0){
+ bin_block = calloc(sizeof(char),10000);
+ current_bin_space = 10000;
+ }
+ if(nch == '|' || nch == ' ') status = 2;
+ else if(nch == '\n'){
+ status = 3;
+ inner_cursor = 0;
+ }else{
+ chro_name[inner_cursor++] = nch;
+ chro_name[inner_cursor] = 0;
+ }
+ }else if(status == 2){
+ if(nch == '\n'){
+ status = 3;
+ inner_cursor = 0;
+ }
+ }else if(status == 3){
+ if(nch == '>' || nch <= 0){
+ char * mem_chro = malloc(strlen(chro_name)+1);
+ strcpy(mem_chro, chro_name);
+ HashTablePut(tab -> size_table , mem_chro, NULL + inner_cursor);
+ HashTablePut(tab -> contig_table , mem_chro, bin_block);
+ // SUBREADprintf("Read '%s' : %u bases\n", chro_name, inner_cursor);
+ inner_cursor = 0;
+ status = 1;
+ if(nch <= 0) break;
+ }else if(nch != '\n'){
+ int bin_bytes = inner_cursor / 2;
+ int bin_bits = 4*(inner_cursor % 2);
+ int base_int = contig_fasta_base2int(nch);
+ if(bin_bytes >= current_bin_space){
+ unsigned int new_bin_space = current_bin_space / 4 * 5;
+ if(current_bin_space > 0xffff0000 /5 * 4){
+ assert(0);
+ }
+ bin_block = realloc(bin_block, new_bin_space);
+ memset(bin_block + current_bin_space, 0, new_bin_space - current_bin_space);
+ current_bin_space = new_bin_space;
+ }
+ bin_block[bin_bytes] |= (base_int << bin_bits);
+ inner_cursor++;
+ }
+ }
+ }
+
+ fclose(fp);
+ }
+ return 1;
+}
+
+int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read){
+ int ret = RSubread_parse_CIGAR_string(MainChro, MainPos, CIGAR_Str, Chros, Staring_Chro_Points, Section_Start_Read_Pos, Section_Length, is_junction_read);
+
+ char read_main_strand = (((FLAG & 0x40)==0x40) == ((FLAG & 0x10) == 0x10 ))?'-':'+';
+ int tag_cursor=0;
+ //SUBREADprintf("EXTRA=%s\n", Extra_Tags);
+ int status = PARSE_STATUS_TAGNAME;
+ char tag_name[2], typechar=0;
+ int tag_inner_cursor=0;
+
+ char current_fusion_char[MAX_CHROMOSOME_NAME_LEN];
+ unsigned int current_fusion_pos = 0;
+ char current_fusion_strand = 0;
+ char current_fusion_cigar[FC_CIGAR_PARSER_ITEMS * 15];
+ current_fusion_cigar [0] =0;
+ current_fusion_char [0]=0;
+
+ while(1){
+ int nch = Extra_Tags[tag_cursor];
+ if(status == PARSE_STATUS_TAGNAME){
+ tag_name[tag_inner_cursor++] = nch;
+ if(tag_inner_cursor == 2){
+ status = PARSE_STATUS_TAGTYPE;
+ tag_cursor += 1;
+ assert(Extra_Tags[tag_cursor] == ':');
+ }
+ }else if(status == PARSE_STATUS_TAGTYPE){
+ typechar = nch;
+ tag_cursor +=1;
+ assert(Extra_Tags[tag_cursor] == ':');
+ tag_inner_cursor = 0;
+ status = PARSE_STATUS_TAGVALUE;
+ }else if(status == PARSE_STATUS_TAGVALUE){
+ if(nch == '\t' || nch == 0 || nch == '\n'){
+ if(current_fusion_cigar[0] && current_fusion_char[0] && current_fusion_pos && current_fusion_strand){
+ //SUBREADprintf("ENTER CALC:%s\n", current_fusion_char );
+ unsigned int left_pos = current_fusion_pos;
+ if(current_fusion_strand!=read_main_strand)
+ left_pos = find_left_end_cigar(current_fusion_pos, current_fusion_cigar);
+ ret += RSubread_parse_CIGAR_string(current_fusion_char, left_pos, current_fusion_cigar, Chros + ret, Staring_Chro_Points+ ret, Section_Start_Read_Pos+ ret, Section_Length + ret, is_junction_read);
+
+ current_fusion_pos = 0;
+ current_fusion_strand = 0;
+ current_fusion_cigar [0] =0;
+ current_fusion_char [0]=0;
+ //SUBREADprintf("EXIT CALC:%s\n", current_fusion_char );
+ }
+
+ tag_inner_cursor = 0;
+ status = PARSE_STATUS_TAGNAME;
+ }else{
+ if(tag_name[0]=='C' && tag_name[1]=='C' && typechar == 'Z'){
+ current_fusion_char[tag_inner_cursor++]=nch;
+ current_fusion_char[tag_inner_cursor]=0;
+ }else if(tag_name[0]=='C' && tag_name[1]=='G' && typechar == 'Z'){
+ current_fusion_cigar[tag_inner_cursor++]=nch;
+ current_fusion_cigar[tag_inner_cursor]=0;
+ }else if(tag_name[0]=='C' && tag_name[1]=='P' && typechar == 'i'){
+ current_fusion_pos = current_fusion_pos * 10 + (nch - '0');
+ }else if(tag_name[0]=='C' && tag_name[1]=='T' && typechar == 'Z'){
+ //SUBREADprintf("pos=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
+ current_fusion_strand = nch;
+ //SUBREADprintf("spo=%d %c -> %c\n", tag_cursor, current_fusion_strand, nch);
+ }
+ }
+ }
+
+ if(nch == 0 || nch == '\n'){
+ assert(status == PARSE_STATUS_TAGNAME);
+ break;
+ }
+
+ tag_cursor++;
+ //SUBREADprintf("CUR=%d [%s], c=%d\n", tag_cursor, Extra_Tags, Extra_Tags[tag_cursor]);
+ }
+ return ret;
+}
+
+int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read)
{
unsigned int tmp_int=0;
int cigar_cursor=0;
unsigned short current_section_chro_len=0, current_section_start_read_pos = 0, read_cursor = 0;
- unsigned int chromosome_cursor=0;
+ unsigned int chromosome_cursor=first_pos;
int ret=0;
for(cigar_cursor=0; ; cigar_cursor++)
@@ -53,6 +349,7 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Section_Start_Chro
{
if(current_section_chro_len>0)
{
+ Section_Chromosomes[ret] = chro;
Section_Start_Chro_Pos[ret] = chromosome_cursor - current_section_chro_len;
Section_Start_Read_Pos[ret] = current_section_start_read_pos;
Section_Chro_Length[ret] = current_section_chro_len;
@@ -77,12 +374,12 @@ int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Section_Start_Chro
void display_sections(char * CIGAR_Str)
{
- int is_junc=0;
+ //int is_junc=0;
int Section_Start_Chro_Pos[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Start_Read_Pos[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Chro_Length[FC_CIGAR_PARSER_ITEMS];
- int retv = RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
+ int retv = 0;//RSubread_parse_CIGAR_string(CIGAR_Str, Section_Start_Chro_Pos, Section_Start_Read_Pos, Section_Chro_Length, &is_junc);
int x1;
SUBREADprintf("Cigar=%s ; Sections=%d\n", CIGAR_Str, retv);
diff --git a/src/HelperFunctions.h b/src/HelperFunctions.h
index 254627a..302e11a 100644
--- a/src/HelperFunctions.h
+++ b/src/HelperFunctions.h
@@ -20,6 +20,20 @@
#ifndef __HELPER_FUNCTIONS_H_
#define __HELPER_FUNCTIONS_H_
+#include "hashtable.h"
+
+#define PARSE_STATUS_TAGNAME 1
+#define PARSE_STATUS_TAGTYPE 2
+#define PARSE_STATUS_TAGVALUE 3
+
+typedef struct{
+ HashTable * contig_table;
+ HashTable * size_table;
+} fasta_contigs_t;
+
+int read_contig_fasta(fasta_contigs_t * tab, char * fname);
+int get_contig_fasta(fasta_contigs_t * tab, char * chro, unsigned int pos, int len, char * out_bases);
+void destroy_contig_fasta(fasta_contigs_t * tab);
// This function parses CIGAR_Str and extract the relative starting points and lengths of all sections (i.e., the sections of read that are separated by 'N').
// CIGAR_Str is a CIGAR string containing 'S', 'M', 'I', 'D' and 'N' operations. Other operations are all ignored. The length of CIGAR_Str should be always less than 100 bytes or "-1" is returned.
@@ -29,8 +43,10 @@
// This function returns the number of sections found in the CIGAR string. It returns -1 if the CIGAR string cannot be parsed.
-int RSubread_parse_CIGAR_string(const char * CIGAR_Str, int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
+int RSubread_parse_CIGAR_string(char * chro , unsigned int first_pos, const char * CIGAR_Str, char ** Section_Chromosomes, unsigned int * Section_Start_Chro_Pos,unsigned short * Section_Start_Read_Pos, unsigned short * Section_Chro_Length, int * is_junction_read);
+
+int RSubread_parse_CIGAR_Extra_string(int FLAG, char * MainChro, unsigned int MainPos, const char * CIGAR_Str, const char * Extra_Tags, char ** Chros, unsigned int * Staring_Chro_Points, unsigned short * Section_Start_Read_Pos, unsigned short * Section_Length, int * is_junction_read);
// This function try to find the attribute value of a given attribute name from the extra column string in GTF/GFF.
// If the value is found, it returns the length of the value (must be > 0 by definition), or -1 if no attribute is found or the format is wrong.
@@ -50,4 +66,6 @@ char *str_replace(char *orig, char *rep, char *with) ;
// // Third, compare the remainder.
int strcmp_number(char * s1, char * s2);
+unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
+unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar);
#endif
diff --git a/src/Makefile.FreeBSD b/src/Makefile.FreeBSD
index fb64369..839d3f1 100644
--- a/src/Makefile.FreeBSD
+++ b/src/Makefile.FreeBSD
@@ -8,7 +8,8 @@ LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE -l compat
CC = gcc ${CCFLAGS} -ggdb -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
-ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge
+ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable
+
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
diff --git a/src/Makefile.Linux b/src/Makefile.Linux
index c145ec2..565218f 100644
--- a/src/Makefile.Linux
+++ b/src/Makefile.Linux
@@ -1,4 +1,4 @@
-#MACOS = -D MACOS
+MACOS = -D MACOS
include makefile.version
@@ -8,15 +8,15 @@ LDFLAGS = ${STATIC_MAKE} -lpthread -lz -lm ${MACOS} -O9 -DMAKE_FOR_EXON -D MAKE_
CC = gcc ${CCFLAGS} -ggdb -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
-ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable
+ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
-all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount globalReassembly # samMappedBases mergeVCF
+all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # samMappedBases mergeVCF testZlib
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
- mv globalReassembly coverageCount propmapped qualityScores removeDup subread-fullscan subtools ../bin/utilities
+ mv repair coverageCount propmapped qualityScores removeDup subread-fullscan ../bin/utilities
globalReassembly: global-reassembly.c ${ALL_OBJECTS}
${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS}
@@ -57,6 +57,12 @@ subread-fullscan: fullscan.c ${ALL_OBJECTS}
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
+#testZlib: test-seek-zlib.c ${ALL_OBJECTS}
+# ${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS}
+
+repair: read-repair.c ${ALL_OBJECTS}
+ ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
+
#samMappedBases: samMappedBases.c ${ALL_OBJECTS}
# ${CC} -o samMappedBases samMappedBases.c ${ALL_OBJECTS} ${LDFLAGS}
#mergeVCF: mergeVCF.c ${ALL_OBJECTS}
diff --git a/src/Makefile.MacOS b/src/Makefile.MacOS
index ceb899d..33ea676 100644
--- a/src/Makefile.MacOS
+++ b/src/Makefile.MacOS
@@ -6,15 +6,18 @@ LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPO
CC = gcc ${CCFLAGS} ${STATIC_MAKE} -ggdb -fomit-frame-pointer -O3 -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0
-ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge # long-hashtable
+ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable seek-zlib
ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS})
ALL_H=$(addsuffix .h, ${ALL_LIBS})
ALL_C=$(addsuffix .c, ${ALL_LIBS})
-all: featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped coverageCount # globalReassembly
+all: repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped coverageCount # globalReassembly testZlib
mkdir -p ../bin/utilities
mv subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/
- mv coverageCount subread-fullscan qualityScores removeDup subtools propmapped ../bin/utilities
+ mv repair coverageCount subread-fullscan qualityScores removeDup propmapped ../bin/utilities
+
+repair: read-repair.c ${ALL_OBJECTS}
+ ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS}
propmapped: propmapped.c ${ALL_OBJECTS}
${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS}
@@ -37,8 +40,8 @@ featureCounts: readSummary.c subread.h ${ALL_OBJECTS}
subread-align: core-interface-aligner.c ${ALL_OBJECTS}
${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS}
-subtools: subtools.c ${ALL_OBJECTS}
- ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS}
+#subtools: subtools.c ${ALL_OBJECTS}
+# ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS}
subjunc: core-interface-subjunc.c ${ALL_OBJECTS}
${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS}
@@ -49,11 +52,11 @@ qualityScores: qualityScores.c ${ALL_OBJECTS}
subread-fullscan: fullscan.c ${ALL_OBJECTS}
${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS}
-
coverageCount: coverage_calc.c ${ALL_OBJECTS}
${CC} -o coverageCount coverage_calc.c ${ALL_OBJECTS} ${LDFLAGS}
-
+testZlib: test-seek-zlib.c ${ALL_OBJECTS}
+ ${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS}
clean:
rm -f subtools core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS}
diff --git a/src/SNPCalling.c b/src/SNPCalling.c
index 5d44bb9..55017c7 100644
--- a/src/SNPCalling.c
+++ b/src/SNPCalling.c
@@ -1134,12 +1134,13 @@ int parse_read_lists_maybe_threads(char * in_FASTA_file, char * out_BED_file, ch
SUBREADprintf("Cannot open the output file: '%s'\n", out_BED_file);
}
fputs("##fileformat=VCFv4.0\n",out_fp);
+ fputs("##comment=The QUAL values for the SNPs in this VCF file are calculated as min(40, - log_10 (p_value)), where p_value is from the Fisher's Exact Test. The QUAL values for the Indels in this VCF file are always 1.0.\n", out_fp);
fputs("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n", out_fp);
- fputs("##INFO=<ID=BGMM,Number=1,Type=Integer,Description=\"Number of mismatched bases in the background\">\n", out_fp);
- fputs("##INFO=<ID=BGTOTAL,Number=1,Type=Integer,Description=\"Total number of bases in the background\">\n", out_fp);
- fputs("##INFO=<ID=MM,Number=1,Type=String,Description=\"Number of supporting reads for each alternative allele\">\n", out_fp);
+ fputs("##INFO=<ID=BGMM,Number=1,Type=Integer,Description=\"Number of mismatched bases in the background (for SNP only)\">\n", out_fp);
+ fputs("##INFO=<ID=BGTOTAL,Number=1,Type=Integer,Description=\"Total number of bases in the background (for SNP only)\">\n", out_fp);
+ fputs("##INFO=<ID=MM,Number=1,Type=String,Description=\"Number of supporting reads for each alternative allele (for SNP only)\">\n", out_fp);
fputs("##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n", out_fp);
- fputs("##INFO=<ID=SR,Number=1,Type=String,Description=\"Number of supporting reads for variants\">\n", out_fp);
+ fputs("##INFO=<ID=SR,Number=1,Type=Integer,Description=\"Number of supporting reads (for INDEL only)\">\n", out_fp);
fputs("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n", out_fp);
if(all_threads < 2)
{
@@ -1524,7 +1525,7 @@ int main_snp_calling_test(int argc,char ** argv)
for(xk1=0;xk1<1;xk1++){
- char c;
+ int c;
char in_SAM_file[5000];
char out_BED_file[300];
char temp_path[300];
diff --git a/src/SUBindel.c b/src/SUBindel.c
index 015a3b4..0ebcebb 100644
--- a/src/SUBindel.c
+++ b/src/SUBindel.c
@@ -24,6 +24,8 @@
#include <getopt.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <time.h>
+#include <sys/time.h>
#include <unistd.h>
@@ -48,7 +50,7 @@ int load_global_context_forindel(global_context_t * context)
char tmp_fname [MAX_FILE_NAME_LENGTH];
warning_file_limit();
- context -> input_reads.avg_read_length = guess_reads_density_format(context->config.first_read_file , 0, NULL, NULL);
+ context -> input_reads.avg_read_length = 200;//guess_reads_density_format(context->config.first_read_file , 0, NULL, NULL, NULL);
if(context -> input_reads.avg_read_length<0 )context -> input_reads.avg_read_length = 250;
if(context -> input_reads.avg_read_length<0 ||geinput_open_sam(context->config.first_read_file, &context->input_reads.first_read_file, context -> input_reads.is_paired_end_reads))
{
@@ -321,6 +323,13 @@ int main(int argc, char ** argv)
int option_index = 0;
int ret=0;
global_context_t * global_context;
+
+ struct timeval xtime;
+ gettimeofday(&xtime,NULL);
+ srand(time(NULL)^xtime.tv_usec);
+
+
+
global_context = (global_context_t*)malloc(sizeof(global_context_t));
init_global_context(global_context);
global_context->config.entry_program_name = CORE_PROGRAM_SUBINDEL;
diff --git a/src/SeekGZ.h b/src/SeekGZ.h
new file mode 100644
index 0000000..aec8dde
--- /dev/null
+++ b/src/SeekGZ.h
@@ -0,0 +1,10 @@
+
+
+typedef {
+ g
+} SeekGZ_fp;
+
+typedef {
+ unsigned int current_block_offset;
+ char * current_block_compressed;
+} SeekGZ_checkpoint;
diff --git a/src/core-bigtable.c b/src/core-bigtable.c
new file mode 100644
index 0000000..4cfc1a2
--- /dev/null
+++ b/src/core-bigtable.c
@@ -0,0 +1,488 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "core.h"
+#include "core-bigtable.h"
+#include "gene-algorithms.h"
+
+#define TABLE_PRELOAD_SIZE 24
+
+#define calc_offset(pair_NO) ((pair_NO) * (1 + global_context -> input_reads.is_paired_end_reads) + is_second_read) * (sizeof(short) * 3 * global_context -> config.big_margin_record_size + (sizeof(mapping_result_t) + global_context -> config.do_breakpoint_detection*sizeof(subjunc_result_t)) * global_context -> config.multi_best_reads)
+
+
+void bigtable_lock(global_context_t * global_context){
+ subread_lock_occupy(&global_context -> bigtable_lock);
+}
+
+void bigtable_unlock(global_context_t * global_context){
+ subread_lock_release(&global_context -> bigtable_lock);
+}
+
+unsigned int get_handling_thread_number(global_context_t * global_context , subread_read_number_t pair_number){
+
+ /*
+ if(global_context -> config.all_threads<2) return 0;
+
+ for(ret = 0; ret < global_context -> config.all_threads; ret++){
+ if(global_context -> input_reads.start_read_number_blocks[ret] > pair_number) return ret - 1;
+ }
+ return global_context -> config.all_threads - 1;
+ */return 0;
+}
+
+unsigned long long get_inner_pair(global_context_t * global_context , subread_read_number_t pair_number){
+ return pair_number;
+}
+
+bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int is_second_read, int load_more);
+
+void bigtable_readonly_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t * return_ptr, subjunc_result_t * return_junction_ptr){
+
+
+ if(global_context -> bigtable_cache_file_fp){
+ int loadjunc;
+ long long inner_pair_number = get_inner_pair(global_context, pair_number);
+
+ if(global_context -> bigtable_cache_file_loaded_fragments_begin >=0){
+ bigtable_write_thread_cache(global_context);
+ global_context -> bigtable_cache_file_loaded_fragments_begin = -1;
+ }
+
+ for(loadjunc = 0; loadjunc < 2; loadjunc++){
+ if(loadjunc){ if(!return_junction_ptr) continue; }
+ else{ if(!return_ptr) continue; }
+
+ unsigned long long offset = calc_offset(inner_pair_number);
+ offset += sizeof(short) * 3 * global_context -> config.big_margin_record_size ;
+ if(loadjunc) offset += sizeof(mapping_result_t) * global_context -> config.multi_best_reads + sizeof(subjunc_result_t) * result_number;
+ else offset += sizeof(mapping_result_t) * result_number;
+ fseeko(global_context -> bigtable_cache_file_fp , offset , SEEK_SET);
+
+ void * write_ptr = return_ptr;
+ if(loadjunc) write_ptr = return_junction_ptr;
+
+ fread(write_ptr, loadjunc?sizeof(subjunc_result_t):sizeof(mapping_result_t), 1, global_context -> bigtable_cache_file_fp);
+ }
+ }else{
+ bigtable_cached_result_t * rett = bigtable_retrieve_cache(global_context , thread_context , pair_number, is_second_read,0);
+
+ int best_offset = result_number;
+ if(return_ptr)memcpy(return_ptr, rett -> alignment_res + best_offset, sizeof(mapping_result_t));
+ if(return_junction_ptr)memcpy(return_junction_ptr, rett -> subjunc_res + best_offset, sizeof(subjunc_result_t));
+ }
+}
+
+
+int init_bigtable_results(global_context_t * global_context, int is_rewinding)
+{
+
+ if(global_context -> config.use_memory_buffer) {
+ global_context -> bigtable_chunked_fragments = global_context -> config.reads_per_chunk+1;
+ global_context -> bigtable_cache_size = global_context -> bigtable_chunked_fragments * (1+global_context -> input_reads.is_paired_end_reads);
+ } else {
+ global_context -> bigtable_chunked_fragments = 300000 - 260000;
+ global_context -> bigtable_cache_size = global_context -> config.all_threads * global_context -> bigtable_chunked_fragments * (1+global_context -> input_reads.is_paired_end_reads);
+ }
+
+
+ //SUBREADprintf("reads_per_chunk = %u ; cached_single_reads = %u ; size of each read = %d + %d\n", global_context -> config.reads_per_chunk, global_context -> bigtable_cache_size, sizeof(mapping_result_t) , sizeof(subjunc_result_t));
+
+ if(!is_rewinding)
+ global_context -> bigtable_cache = malloc(sizeof(bigtable_cached_result_t) * global_context -> bigtable_cache_size);
+
+
+ int xk1;
+ for(xk1 = 0; xk1 < global_context -> bigtable_cache_size; xk1++){
+ if(!is_rewinding)
+ global_context -> bigtable_cache [xk1].alignment_res = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
+
+ if(global_context -> config.use_memory_buffer)
+ {
+ memset(global_context -> bigtable_cache [xk1].big_margin_data, 0, sizeof(global_context -> bigtable_cache [xk1].big_margin_data));
+ memset(global_context -> bigtable_cache [xk1].alignment_res, 0, sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
+ }
+
+ if(global_context -> config.do_breakpoint_detection){
+ if(!is_rewinding)
+ global_context -> bigtable_cache [xk1].subjunc_res = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
+ if(global_context -> config.use_memory_buffer)
+ memset(global_context -> bigtable_cache [xk1].subjunc_res , 0, sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
+ }
+ }
+
+ subread_init_lock(&global_context -> bigtable_lock);
+
+ if(global_context -> config.use_memory_buffer)
+ global_context -> bigtable_cache_file_fp = NULL;
+ else {
+ char tmpfname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
+
+ //if(is_rewinding) unlink(tmpfname);
+ FILE * fp = fopen(tmpfname, "w+");
+ global_context -> bigtable_cache_file_fp = fp;
+ global_context -> bigtable_cache_file_fragments = -1;
+ global_context -> bigtable_cache_file_loaded_fragments_begin = -1;
+ global_context -> bigtable_dirty_data = 0;
+ }
+
+ return 0;
+}
+mapping_result_t * _global_retrieve_alignment_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id){
+ mapping_result_t * ret;
+ bigtable_retrieve_result(global_context, NULL, pair_number, best_read_id, is_second_read, &ret, NULL);
+ return ret;
+}
+
+subjunc_result_t * _global_retrieve_subjunc_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id){
+ subjunc_result_t * ret;
+ bigtable_retrieve_result(global_context, NULL, pair_number, best_read_id, is_second_read, NULL, &ret);
+ return ret;
+}
+
+
+#define calc_file_location(pair_no) (pair_no)* (1 + global_context -> input_reads.is_paired_end_reads) * (sizeof(short) * 3 * global_context -> config.big_margin_record_size + (sizeof(mapping_result_t) + global_context -> config.do_breakpoint_detection*sizeof(subjunc_result_t)) * global_context -> config.multi_best_reads)
+
+
+void bigtable_write_thread_cache(global_context_t * global_context){
+ if(global_context -> bigtable_cache_file_fp == NULL) return;
+
+ if(global_context -> bigtable_dirty_data && global_context -> bigtable_cache_file_loaded_fragments_begin>=0 )
+ {
+ long long start_file_location = calc_file_location( global_context -> bigtable_cache_file_loaded_fragments_begin);
+ int xk1, xk2;
+ fseeko(global_context -> bigtable_cache_file_fp, start_file_location, SEEK_SET);
+
+ for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++) {
+ for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++){
+ bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1 * (1+global_context -> input_reads.is_paired_end_reads) + xk2;
+
+ fwrite( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp);
+ fwrite( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
+ if(global_context -> config.do_breakpoint_detection)
+ fwrite( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
+ }
+ }
+ }
+}
+
+
+void wait_occupied(global_context_t * global_context , unsigned long long old_begin){
+ while(old_begin == global_context -> bigtable_cache_file_loaded_fragments_begin){
+ int i, all_released = 1;
+ for(i=0; i< global_context -> bigtable_chunked_fragments ; i++)
+ {
+ if(global_context -> bigtable_cache[i].status == CACHE_STATUS_OCCUPIED)
+ all_released = 0;
+ }
+ if(all_released) break;
+ }
+}
+
+bigtable_cached_result_t * bigtable_retrieve_cache(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int is_second_read, int load_more)
+{
+ long long inner_pair_number = get_inner_pair(global_context, pair_number);
+ long long load_start_pair_no = inner_pair_number - inner_pair_number % global_context -> bigtable_chunked_fragments;
+
+ if(global_context -> bigtable_cache_file_fp){
+ if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
+ {
+ wait_occupied(global_context, global_context -> bigtable_cache_file_loaded_fragments_begin);
+ }
+
+ bigtable_lock(global_context);
+ //SUBREADprintf("inner_pair_number=%lld, fragments_begin=%lld\n", inner_pair_number, global_context -> bigtable_cache_file_loaded_fragments_begn[thread_no]);
+ if(global_context -> bigtable_cache_file_loaded_fragments_begin == -1 || inner_pair_number >= global_context -> bigtable_cache_file_loaded_fragments_begin + global_context -> bigtable_chunked_fragments || inner_pair_number < global_context -> bigtable_cache_file_loaded_fragments_begin)
+ {
+ long long load_end_pair_no = load_start_pair_no + global_context -> bigtable_chunked_fragments;
+
+ // this function will see if there is data to write or not.
+ bigtable_write_thread_cache(global_context);
+
+ // load or extend the real file
+ if(load_start_pair_no < global_context -> bigtable_cache_file_fragments){
+ long long start_file_location = calc_file_location(load_start_pair_no);
+ int xk1, xk2;
+
+ //SUBREADprintf("READ_IN %lld\n", load_start_pair_no);
+ fseeko(global_context -> bigtable_cache_file_fp, start_file_location, SEEK_SET);
+ for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++)
+ {
+ for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++){
+ bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1* (1+global_context -> input_reads.is_paired_end_reads) + xk2;
+ fread( current_cache -> big_margin_data , sizeof(short) * 3 * global_context -> config.big_margin_record_size , 1, global_context -> bigtable_cache_file_fp );
+ fread( current_cache -> alignment_res , sizeof(mapping_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp );
+
+ if(global_context -> config.do_breakpoint_detection)
+ fread( current_cache -> subjunc_res , sizeof(subjunc_result_t) * global_context -> config.multi_best_reads , 1, global_context -> bigtable_cache_file_fp);
+ }
+ }
+ }else{
+ long long new_file_size = calc_file_location(load_end_pair_no);
+ //SUBREADprintf("FILE_TRUNCATE %lld\n", load_start_pair_no);
+ ftruncate(fileno(global_context -> bigtable_cache_file_fp), new_file_size);
+ global_context -> bigtable_cache_file_fragments = load_end_pair_no;
+ int xk1, xk2;
+ for(xk1 = 0; xk1 < global_context -> bigtable_chunked_fragments; xk1++)
+ {
+ for(xk2 = 0; xk2 < 1 + global_context -> input_reads.is_paired_end_reads; xk2++)
+ {
+ bigtable_cached_result_t * current_cache = global_context -> bigtable_cache + xk1 * (1+global_context -> input_reads.is_paired_end_reads) + xk2;
+ memset( current_cache -> big_margin_data , 0 , sizeof(short) * 3 * global_context -> config.big_margin_record_size);
+ if(0 && xk1 < 10)
+ {
+ //SUBREADprintf("CACHEP_211: %p (%d from %llu)\n", current_cache, xk1, pair_number);
+ SUBREADprintf("NENSET_211: %p\n", current_cache -> alignment_res);
+ }
+ memset( current_cache -> alignment_res , 0, sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
+
+ if(0 && xk1 < 10)
+ {
+ SUBREADprintf("FINE_211: %p\n", current_cache -> alignment_res);
+ }
+ if(global_context -> config.do_breakpoint_detection)
+ memset( current_cache -> subjunc_res , 0, sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
+ }
+ }
+ }
+
+ global_context -> bigtable_cache_file_loaded_fragments_begin = load_start_pair_no;
+ global_context -> bigtable_dirty_data = 0;
+ }
+
+ bigtable_unlock(global_context);
+ }
+
+ if(global_context -> bigtable_cache_file_fp)
+ global_context -> bigtable_cache[inner_pair_number - load_start_pair_no].status = CACHE_STATUS_OCCUPIED;
+ bigtable_cached_result_t * ret_cache = global_context -> bigtable_cache + (inner_pair_number - load_start_pair_no)* (1+global_context -> input_reads.is_paired_end_reads) + is_second_read;
+
+ return ret_cache;
+}
+
+int bigtable_retrieve_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t ** return_ptr, subjunc_result_t ** return_junction_ptr)
+{
+ bigtable_cached_result_t * cache = bigtable_retrieve_cache(global_context, thread_context, pair_number, is_second_read, 0);
+
+ int best_offset = result_number;
+
+ if(return_ptr)(*return_ptr)= cache -> alignment_res + best_offset;
+ if(return_junction_ptr)(*return_junction_ptr)= cache -> subjunc_res + best_offset;
+
+ return 0;
+}
+
+
+unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_context, subread_read_number_t pair_number, subread_read_number_t is_second_read){
+ bigtable_cached_result_t * cache = bigtable_retrieve_cache(global_context, NULL, pair_number, is_second_read, 0);
+ return cache -> big_margin_data;
+}
+
+
+// do nothing : the data is automatically saved into the temporary file when using mmap.
+void bigtable_release_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int commit_change){
+ long long inner_pair_number = get_inner_pair(global_context, pair_number);
+ long long load_start_pair_no = inner_pair_number - inner_pair_number % global_context -> bigtable_chunked_fragments;
+ if(global_context -> bigtable_cache_file_fp)
+ global_context -> bigtable_cache[inner_pair_number - load_start_pair_no].status = CACHE_STATUS_RELEASED;
+
+ if(commit_change){
+ global_context -> bigtable_dirty_data=1;
+ }
+}
+
+
+int finalise_bigtable_results(global_context_t * global_context){
+ if(global_context -> bigtable_cache_file_fp){
+ fclose(global_context -> bigtable_cache_file_fp);
+
+ char tmpfname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmpfname, "%s-%02d-align.bin", global_context -> config.temp_file_prefix, 0);
+ unlink(tmpfname);
+ }
+
+ int x1;
+ for(x1 = 0; x1 < global_context -> bigtable_cache_size; x1++){
+ free(global_context -> bigtable_cache[x1].alignment_res);
+ if(global_context -> config.do_breakpoint_detection)
+ free(global_context -> bigtable_cache[x1].subjunc_res);
+ }
+
+ free(global_context -> bigtable_cache);
+ return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+void bktable_key_deallocator(void * key){
+ free(key);
+}
+
+void bktable_bucket_deallocator(void * vbuk){
+ bucketed_table_bucket_t * buk = vbuk;
+ free(buk -> positions);
+ free(buk -> details);
+ free(buk);
+}
+
+void bktable_init(bucketed_table_t * tab, unsigned int maximum_interval_length, unsigned int expected_items){
+ memset(tab, 0, sizeof(bucketed_table_t));
+ tab -> expected_items = expected_items;
+ tab -> maximum_interval_length = maximum_interval_length;
+ tab -> entry_table = HashTableCreate(expected_items / 3);
+
+ HashTableSetDeallocationFunctions(tab->entry_table, bktable_key_deallocator, bktable_bucket_deallocator);
+ HashTableSetKeyComparisonFunction(tab->entry_table, fc_strcmp_chro);
+ HashTableSetHashFunction(tab->entry_table, HashTableStringHashFunction);
+}
+
+void bktable_destroy(bucketed_table_t * tab){
+ HashTableDestroy(tab -> entry_table);
+}
+
+void bktable_append(bucketed_table_t * tab, char * chro, unsigned int pos, void * detail){
+ unsigned int twokeys[2], keyi;
+ //assert(detail != NULL);
+ if(detail == NULL) return;
+ twokeys[0] = pos - pos % tab -> maximum_interval_length;
+ if(twokeys[0] > tab -> maximum_interval_length)
+ twokeys[1] = twokeys[0] - tab -> maximum_interval_length;
+ else twokeys[1] = 0xffffffff;
+
+ for(keyi = 0; keyi < 2 ; keyi++)
+ {
+ char static_key [20 + MAX_CHROMOSOME_NAME_LEN];
+ unsigned int curr_key_pos = twokeys[keyi];
+ if(curr_key_pos == 0xffffffff) continue;
+
+ sprintf(static_key, "%s:%u", chro, curr_key_pos);
+
+ bucketed_table_bucket_t * had_items = HashTableGet(tab -> entry_table, static_key);
+
+ //SUBREADprintf("INSERT STATIC_KEY [%d] = %s ; FOUND = %p ; ITEMS=%d\n", keyi, static_key, had_items, had_items?had_items->items:-1);
+
+ if(NULL == had_items)
+ {
+ had_items = malloc(sizeof(bucketed_table_bucket_t));
+ memset(had_items, 0, sizeof(bucketed_table_bucket_t));
+
+ had_items -> capacity = BUCKETED_TABLE_INIT_ITEMS;
+ had_items -> positions = malloc(sizeof(int) * BUCKETED_TABLE_INIT_ITEMS);
+ had_items -> details = malloc(sizeof(void *) *BUCKETED_TABLE_INIT_ITEMS);
+ had_items -> keyed_bucket = curr_key_pos;
+ had_items -> maximum_interval_length = tab -> maximum_interval_length;
+
+ char * dynamic_key = malloc(strlen(static_key) + 1);
+ strcpy(dynamic_key, static_key);
+ HashTablePut(tab->entry_table, dynamic_key, had_items);
+ }
+ if(had_items -> capacity <= had_items -> items){
+ had_items -> capacity = max(had_items -> capacity + 5, had_items -> capacity * 1.3);
+ had_items -> positions = realloc(had_items -> positions, had_items -> capacity * sizeof(int));
+ had_items -> details = realloc(had_items -> details, had_items -> capacity * sizeof(void *));
+ }
+
+ had_items -> positions[ had_items -> items ] = pos;
+ had_items -> details[ had_items -> items ] = detail;
+ had_items -> items++;
+ }
+
+ tab -> fragments ++;
+}
+
+
+void bktable_free_ptrs(void * buckv, HashTable * tab){
+ int x1;
+ bucketed_table_bucket_t * buck = buckv;
+ for(x1 = 0; x1 < buck -> items; x1++)
+ {
+ if(buck->positions[x1] - buck->positions[x1] % buck -> maximum_interval_length == buck -> keyed_bucket)
+ {
+ //SUBREADprintf("FREE : %u ~ %u\n", buck->positions[x1] , buck -> keyed_bucket);
+ free(buck->details[x1]);
+ }
+ }
+}
+
+int bktable_lookup(bucketed_table_t * tab, char * chro, unsigned int start_pos, unsigned int interval_length, unsigned int * hit_pos_list, void ** hit_ptr_list, int max_hits){
+ unsigned int my_key_pos;
+ my_key_pos = start_pos - start_pos % tab -> maximum_interval_length;
+
+ char static_key [20 + MAX_CHROMOSOME_NAME_LEN];
+ sprintf(static_key, "%s:%u", chro, my_key_pos);
+
+ bucketed_table_bucket_t * had_items = HashTableGet(tab -> entry_table, static_key);
+
+ //if(strcmp(chro, "X")==0 && abs(start_pos - 21067381 - 500) < 10){
+ // SUBREADprintf("LOOK STATIC_KEY = %s, BUCK=%p, ITEMS=%d\n", static_key, had_items, had_items?had_items->items:-1);
+ //}
+
+ if(!had_items) // no bucket at all.
+ return 0;
+
+ int item_i, ret = 0;
+ for(item_i = 0 ; item_i < had_items->items; item_i++){
+ unsigned int potential_pos = had_items -> positions[item_i];
+ //if(strcmp(chro, "X")==0 && abs(start_pos - 21067381 - 500) < 10){
+ // SUBREADprintf("POTENTIAL_POS=%u, ACCEPT=%u ~ %u\n", potential_pos, start_pos, start_pos + interval_length);
+ //}
+ if(potential_pos >= start_pos && potential_pos < start_pos + interval_length)
+ {
+ hit_pos_list[ret] = potential_pos;
+ hit_ptr_list[ret] = had_items -> details[item_i];
+ ret ++;
+ if(ret >= max_hits) break;
+ }
+ }
+
+ return ret;
+}
+
+
+void fraglist_init(fragment_list_t * list){
+ memset(list, 0, sizeof(fragment_list_t));
+ list -> capacity = FRAGMENT_LIST_INIT_ITEMS;
+ list -> fragment_numbers = malloc(sizeof(subread_read_number_t) * list -> capacity);
+}
+
+void fraglist_destroy(fragment_list_t * list){
+ free(list -> fragment_numbers);
+}
+
+void fraglist_append(fragment_list_t * list, subread_read_number_t fragment_number){
+ if(list -> fragments >= list -> capacity){
+ list -> capacity = max(list -> capacity + 5, list -> capacity * 1.3);
+ list -> fragment_numbers = realloc(list -> fragment_numbers, sizeof(subread_read_number_t) * list -> capacity);
+ }
+
+ list -> fragment_numbers[ list -> fragments ++ ] = fragment_number;
+}
+
diff --git a/src/core-bigtable.h b/src/core-bigtable.h
new file mode 100644
index 0000000..518a75d
--- /dev/null
+++ b/src/core-bigtable.h
@@ -0,0 +1,49 @@
+#ifndef __CORE_BIGTABLE_H_
+#define __CORE_BIGTABLE_H_
+
+#include "subread.h"
+#include "core.h"
+#include "hashtable.h"
+#include "gene-algorithms.h"
+
+#define CACHE_STATUS_RELEASED 0
+#define CACHE_STATUS_OCCUPIED 1
+
+// This function creates an empty data structure for all results.
+// The number of reads is unknown at this stage.
+int init_bigtable_results(global_context_t * global_context, int is_rewind);
+
+// This function tries to retrieve the required result data structure into memory and set the return_ptr to the address of the data structure.
+// Junction ptr can be NULL.
+// This function returns ZERO if the record is available. It returns -1 if the record is unavailable.
+int bigtable_retrieve_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t ** return_ptr, subjunc_result_t ** return_junction_ptr);
+
+// This function notifies the bigtable subsystem to save changes and deallocate the memory block if necessary.
+// Junction ptr can be NULL.
+// If the data has been changed, commit_change must be set to an non-ZERO value
+void bigtable_release_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int commit_change);
+
+// This function destroies the buffers and deletes the temporary MMAP files.
+int finalise_bigtable_results(global_context_t * global_context);
+
+void bigtable_readonly_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t * return_ptr, subjunc_result_t * return_junction_ptr);
+
+void bktable_append(bucketed_table_t * tab, char * chro, unsigned int pos, void * detail);
+
+int bktable_lookup(bucketed_table_t * tab, char * chro, unsigned int start_pos, unsigned int interval_length, unsigned int * hit_pos_list, void ** hit_ptr_list, int max_hits);
+
+void bktable_init(bucketed_table_t * tab, unsigned int maximum_interval_length, unsigned int expected_items);
+
+void bktable_destroy(bucketed_table_t * tab);
+
+void bktable_free_ptrs(void * buckv, HashTable * tab);
+
+void fraglist_init(fragment_list_t * list);
+
+void fraglist_append(fragment_list_t * list, subread_read_number_t fragment_number);
+
+void fraglist_destroy(fragment_list_t * list);
+
+void bigtable_write_thread_cache(global_context_t * global_context);
+
+#endif
diff --git a/src/core-indel.c b/src/core-indel.c
index 4b3172c..07b48cb 100644
--- a/src/core-indel.c
+++ b/src/core-indel.c
@@ -31,6 +31,7 @@
#include "core.h"
#include "core-indel.h"
#include "core-junction.h"
+#include "core-bigtable.h"
#include "sublog.h"
@@ -200,10 +201,10 @@ int anti_supporting_read_scan(global_context_t * global_context)
int best_read_id;
for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
{
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
+ mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
if(current_result -> selected_votes<1) break;
if(!global_context->config.report_multi_mapping_reads)if(current_result -> result_flags & CORE_IS_BREAKEVEN) continue;
- if(current_result -> result_flags & CORE_IS_GAPPED_READ) continue;
+ //if(current_result -> result_flags & CORE_IS_GAPPED_READ) continue;
if(current_result->selected_votes < global_context->config.minimum_subread_for_first_read)
continue;
@@ -251,6 +252,7 @@ int anti_supporting_read_scan(global_context_t * global_context)
}
}
}
+ bigtable_release_result(global_context, NULL, current_read_number, 0);
}
free(small_side_ordered_event_ids);
@@ -308,41 +310,6 @@ chromosome_event_t * reallocate_event_space( global_context_t* global_context,th
}
-void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t* indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote, [...]
-{
- if(best_read_id >= global_context->config.multi_best_reads) return;
- alignment_result_t * alignment_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
- alignment_result -> selected_position = position;
- alignment_result -> selected_votes = votes;
- alignment_result -> indels_in_confident_coverage = indel_recorder_copy(alignment_result -> selected_indel_record, indel_record);
- alignment_result -> confident_coverage_end = best_cover_end;
- alignment_result -> confident_coverage_start = best_cover_start;
- alignment_result -> result_flags = is_negative_strand? (alignment_result -> result_flags|CORE_IS_NEGATIVE_STRAND):(alignment_result -> result_flags &~CORE_IS_NEGATIVE_STRAND);
- alignment_result -> result_flags = is_strand_jumped? (alignment_result -> result_flags|CORE_IS_STRAND_JUMPED):(alignment_result -> result_flags &~CORE_IS_STRAND_JUMPED);
-
- alignment_result -> result_flags&=~0x3;
- if(is_GT_AG_donors<0 || is_GT_AG_donors>2) alignment_result -> result_flags |= 3;
- else
- alignment_result -> result_flags = is_GT_AG_donors? (alignment_result -> result_flags|CORE_IS_GT_AG_DONORS):(alignment_result -> result_flags &~CORE_IS_GT_AG_DONORS);
-
- alignment_result -> used_subreads_in_vote = used_subreads_in_vote;
- alignment_result -> noninformative_subreads_in_vote = noninformative_subreads_in_vote;
-
-
- if(global_context -> config.is_rna_seq_reads)
- {
- subjunc_result_t * subjunc_result = _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id);
- subjunc_result -> split_point = split_point;
- subjunc_result -> minor_position = minor_position;
- subjunc_result -> minor_votes = minor_votes;
- subjunc_result -> minor_coverage_start = minor_coverage_start;
- subjunc_result -> minor_coverage_end = minor_coverage_end;
- subjunc_result -> indel_at_junction = (char) inserted_bases;
- subjunc_result -> double_indel_offset = (minor_indel_offset & 0xf)|((major_indel_offset & 0xf)<<4);
- }
- ////if(is_negative_strand)printf("NEG:%d; %d\n", pair_number, alignment_result -> result_flags );
-}
-
int is_ambiguous_indel_score(chromosome_event_t * e)
{
return 0;
@@ -352,8 +319,6 @@ int is_ambiguous_indel_score(chromosome_event_t * e)
//return 0;
}
-
-
int event_neighbour_sort_compare(void * arr, int l, int r){
unsigned int ** sort_data = (unsigned int **) arr;
if(sort_data[1][l] > sort_data[1][r]) return 1;
@@ -433,8 +398,6 @@ int test_redundant_event( global_context_t * global_context, chromosome_event_t*
return is_redund;
}
-#define remove_sorted_neighbours remove_neighbour
-
void remove_sorted_neighbours(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
@@ -489,20 +452,24 @@ void remove_sorted_neighbours(global_context_t * global_context)
}
- //SUBREADprintf("%d neighbours were removed\n", to_be_removed_number);
+ SUBREADprintf("%d neighbours were removed\n", to_be_removed_number);
for(xk1=0; xk1<to_be_removed_number; xk1++)
{
chromosome_event_t * deleted_event = &event_space[to_be_removed_ids[xk1]];
int * id_list = HashTableGet(event_table, NULL+deleted_event-> event_small_side);
int xk2;
- for(xk2=0; xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
+ for(xk2=1; xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
if(to_be_removed_ids[xk1] == id_list[xk2] - 1)break;
if(xk2<MAX_EVENT_ENTRIES_PER_SITE)
{
int xk3;
for(xk3 = xk2; xk3<MAX_EVENT_ENTRIES_PER_SITE -1; xk3++)
+ {
+ if(id_list[xk3+1]==0) break;
+
id_list[xk3] = id_list[xk3+1];
+ }
id_list[xk3] = 0;
}
@@ -518,11 +485,9 @@ void remove_sorted_neighbours(global_context_t * global_context)
}
-
-
-void remove_neighbour_random(global_context_t * global_context)
+void remove_neighbour(global_context_t * global_context)
{
- //#warning "====================== MUST COMMENT THIS LINE!! ====================="
+ //#warning "====================== MUST COMMENT THIS LINE!! NOT REMOVING NEIGHBOURS FOR DETECTING PAIRED INVERSION EVENTS ====================="
//return;
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
@@ -546,7 +511,7 @@ void remove_neighbour_random(global_context_t * global_context)
{
if(event_type) // for INDELs
{
- int neighbour_range = 10;
+ int neighbour_range = 3;
if(event_body->event_type != CHRO_EVENT_TYPE_INDEL) continue;
if(to_be_removed_number >= maxinum_removed_events) break;
@@ -611,7 +576,7 @@ void remove_neighbour_random(global_context_t * global_context)
chromosome_event_t * tested_neighbour = search_return[xk3];
if(tested_neighbour -> indel_at_junction > event_body -> indel_at_junction) continue;
- if(event_body -> indel_at_junction > tested_neighbour -> indel_at_junction && tested_neighbour -> event_large_side - tested_neighbour -> event_small_side + tested_neighbour -> indel_at_junction == event_body -> event_large_side - event_body -> event_small_side + event_body -> indel_at_junction)
+ if(event_body -> indel_at_junction > tested_neighbour -> indel_at_junction && abs(tested_neighbour -> event_large_side - tested_neighbour -> event_small_side + tested_neighbour -> indel_at_junction - event_body -> event_large_side + event_body -> event_small_side - event_body -> indel_at_junction) <= 16)
to_be_removed_ids[to_be_removed_number++] = event_body -> global_event_id;
else if(tested_neighbour -> event_large_side >= event_body -> event_large_side -indel_range + xk2 && tested_neighbour -> event_large_side <= event_body -> event_large_side + indel_range + xk2 && (event_body -> supporting_reads < tested_neighbour -> supporting_reads || (event_body -> supporting_reads == tested_neighbour -> supporting_reads && xk2<0)))
to_be_removed_ids[to_be_removed_number++] = event_body -> global_event_id;
@@ -631,9 +596,17 @@ void remove_neighbour_random(global_context_t * global_context)
unsigned int test_pos_small = event_body -> event_small_side + xk2;
chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- int found_events = search_event(global_context,event_table, event_space, test_pos_small + delta_small , EVENT_SEARCH_BY_BOTH_SIDES, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
- if(found_events)
- to_be_removed_ids[to_be_removed_number++] = event_body -> global_event_id;
+ int xk3, found_events = search_event(global_context,event_table, event_space, test_pos_small + delta_small , EVENT_SEARCH_BY_BOTH_SIDES, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
+
+ for(xk3 = 0; xk3<found_events; xk3++)
+ {
+ chromosome_event_t * tested_neighbour = search_return[xk3];
+ if(found_events && event_body -> supporting_reads < tested_neighbour -> supporting_reads)
+ {
+ to_be_removed_ids[to_be_removed_number++] = event_body -> global_event_id;
+ break;
+ }
+ }
}
}
}
@@ -643,19 +616,28 @@ void remove_neighbour_random(global_context_t * global_context)
for(xk1=0; xk1<to_be_removed_number; xk1++)
{
chromosome_event_t * deleted_event = &event_space[to_be_removed_ids[xk1]];
+
int * id_list = HashTableGet(event_table, NULL+deleted_event-> event_small_side);
- int xk2;
- for(xk2=0; xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
- if(to_be_removed_ids[xk1] == id_list[xk2] - 1)break;
- if(xk2<MAX_EVENT_ENTRIES_PER_SITE)
- {
- int xk3;
- for(xk3 = xk2; xk3<MAX_EVENT_ENTRIES_PER_SITE -1; xk3++)
- id_list[xk3] = id_list[xk3+1];
- id_list[xk3] = 0;
- }
+ if(NULL == id_list){
+ SUBREADprintf("Missing entry : %u for %d\n", deleted_event-> event_small_side, to_be_removed_ids[xk1]);
+ }else{
+ int xk2, current_items = id_list[0]&0x0fffffff;
+ for(xk2=1; xk2< current_items && id_list[xk2] >0 ; xk2++)
+ if(to_be_removed_ids[xk1] == id_list[xk2] - 1)break;
+ if(xk2< current_items && id_list[xk2] > 0)
+ {
+ int xk3;
+ for(xk3 = xk2; xk3<current_items -1; xk3++)
+ {
+ if(0==id_list[xk3+1]) break;
- //printf("NBR_REMOVED=%u - %u\n", deleted_event -> event_small_side , deleted_event -> event_large_side);
+ id_list[xk3] = id_list[xk3+1];
+ }
+ id_list[xk3] = 0;
+ }
+
+ }
+ //printf("NBR_REMOVED=%u - %u\n", deleted_event -> event_small_side , deleted_event -> event_large_side);
if(deleted_event -> event_type == CHRO_EVENT_TYPE_INDEL && deleted_event -> inserted_bases)
free(deleted_event -> inserted_bases);
deleted_event -> event_type = CHRO_EVENT_TYPE_REMOVED;
@@ -742,35 +724,7 @@ int init_indel_thread_contexts(global_context_t * global_context, thread_context
indel_thread_context_t * indel_thread_context = (indel_thread_context_t*)malloc(sizeof(indel_thread_context_t));
indel_context_t * indel_context = (indel_context_t *) global_context -> module_contexts[MODULE_INDEL_ID];
- if(task == STEP_VOTING)
- {
-/* indel_thread_context -> event_entry_table = HashTableCreate(399997);
- indel_thread_context -> event_entry_table -> appendix1=indel_context -> event_entry_table-> appendix1;
- indel_thread_context -> event_entry_table -> appendix2=indel_context -> event_entry_table-> appendix2;
- HashTableSetKeyComparisonFunction(indel_thread_context->event_entry_table, localPointerCmp_forEventEntry);
- HashTableSetHashFunction(indel_thread_context->event_entry_table, localPointerHashFunction_forEventEntry);
-
- indel_thread_context -> total_events = 0;
- indel_thread_context -> current_max_event_number = global_context->config.init_max_event_number;
- indel_thread_context -> event_space_dynamic = malloc(sizeof(chromosome_event_t)*indel_thread_context -> current_max_event_number);
- if(!indel_thread_context -> event_space_dynamic)
- {
- sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_FATAL, "Cannot allocate memory for threads. Please try to reduce the thread number.");
- return 1;
- }
-
- indel_thread_context -> dynamic_align_table = malloc(sizeof(short*)*MAX_READ_LENGTH);
- indel_thread_context -> dynamic_align_table_mask = malloc(sizeof(char *)*MAX_READ_LENGTH);
-
- int xk1;
- for(xk1=0;xk1<MAX_READ_LENGTH; xk1++)
- {
- indel_thread_context -> dynamic_align_table[xk1] = malloc(sizeof(short)*MAX_READ_LENGTH);
- indel_thread_context -> dynamic_align_table_mask[xk1] = malloc(sizeof(char)*MAX_READ_LENGTH);
- }
-*/
- }
- else if(task == STEP_ITERATION_ONE)
+ if(task == STEP_VOTING || task == STEP_ITERATION_ONE)
{
indel_thread_context -> event_entry_table = HashTableCreate(399997);
indel_thread_context -> event_entry_table -> appendix1=indel_context -> event_entry_table-> appendix1;
@@ -808,6 +762,13 @@ int init_indel_thread_contexts(global_context_t * global_context, thread_context
memset(indel_thread_context -> final_reads_mismatches_array , 0, sizeof(unsigned short)*indel_context -> total_events);
memset(indel_thread_context -> final_counted_reads_array , 0, sizeof(unsigned short)*indel_context -> total_events);
+
+ thread_context -> output_buffer = malloc(sizeof(output_fragment_buffer_t) * global_context -> config.reported_multi_best_reads * MULTI_THREAD_OUTPUT_ITEMS);
+ thread_context -> output_buffer_item = 0;
+ thread_context -> output_buffer_pointer = 0;
+
+ subread_init_lock(&thread_context -> output_lock);
+
}
thread_context -> module_thread_contexts[MODULE_INDEL_ID] = indel_thread_context;
@@ -826,6 +787,7 @@ void destory_event_entry_table(HashTable * old)
{
if (!cursor) break;
int * id_list = (int *) cursor ->value;
+//#warning "=============== UNCOMMENT THE NEXT LINE IN THE RELEASE!!! THIS IS FOR REMOVING A SEGFAULT ERROR =================="
free(id_list);
cursor = cursor->next;
@@ -838,10 +800,7 @@ int finalise_indel_thread(global_context_t * global_context, thread_context_t *
{
indel_context_t * indel_context = (indel_context_t*)global_context -> module_contexts[MODULE_INDEL_ID];
indel_thread_context_t * indel_thread_context = (indel_thread_context_t*)thread_context -> module_thread_contexts[MODULE_INDEL_ID];
- if(task == STEP_VOTING)
- {
- }
- if(task == STEP_ITERATION_ONE)
+ if(task == STEP_VOTING || task == STEP_ITERATION_ONE)
{
int xk1;
for(xk1 = 0; xk1 < indel_thread_context -> total_events; xk1++)
@@ -903,6 +862,7 @@ int finalise_indel_thread(global_context_t * global_context, thread_context_t *
}
free(indel_thread_context -> final_counted_reads_array);
free(indel_thread_context -> final_reads_mismatches_array);
+ free(thread_context -> output_buffer);
}
free(indel_thread_context);
return 0;
@@ -954,6 +914,7 @@ void mark_event_bitmap(unsigned char * bitmap, unsigned int pos)
bitmap[offset_byte] |= (1<<offset_bit);
}
+
void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int event_no)
{
unsigned int sides[2];
@@ -969,13 +930,35 @@ void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int
unsigned int * id_list = HashTableGet(event_table, NULL+sides[xk1]);
if(!id_list)
{
- id_list = malloc(sizeof(int)*MAX_EVENT_ENTRIES_PER_SITE);
- id_list[0]=0;
+ //#warning "====== DO NOT NEED TO CLEAR THE MEMORY BUFFER! MALLOC IS GOOD ======"
+ //id_list = calloc(sizeof(unsigned int),EVENT_ENTRIES_INIT_SIZE);
+ id_list = malloc(sizeof(unsigned int)*EVENT_ENTRIES_INIT_SIZE);
+ id_list[0]=EVENT_ENTRIES_INIT_SIZE;
+ id_list[1]=0;
HashTablePut(event_table , NULL+sides[xk1] , id_list);
}
- for(xk2=0;xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
+
+ unsigned int current_capacity = id_list[0] & 0x0fffffff;
+ assert(current_capacity >= EVENT_ENTRIES_INIT_SIZE && current_capacity <= MAX_EVENT_ENTRIES_PER_SITE);
+
+ for(xk2=1;xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
if(id_list[xk2]==0) break;
- if(xk2 < MAX_EVENT_ENTRIES_PER_SITE )
+
+ if(xk2 >= current_capacity - 1 && current_capacity < MAX_EVENT_ENTRIES_PER_SITE)
+ {
+ while(1){
+ id_list = HashTableGet(event_table, NULL+sides[xk1]);
+ if((id_list[0] & 0xf0000000)==0)break;
+ }
+
+ id_list[0] |= 0x80000000;
+ current_capacity = min(MAX_EVENT_ENTRIES_PER_SITE, current_capacity*2);
+ id_list = realloc(id_list, sizeof(unsigned int) * current_capacity);
+ id_list[0] = current_capacity;
+ HashTablePut(event_table, NULL+sides[xk1] , id_list);
+ }
+
+ if(xk2 < MAX_EVENT_ENTRIES_PER_SITE)
id_list[xk2] = event_no+1;
if(xk2 < MAX_EVENT_ENTRIES_PER_SITE -1) id_list[xk2+1] = 0;
}
@@ -1006,16 +989,24 @@ int search_event(global_context_t * global_context, HashTable * event_table, chr
}
unsigned int * res = HashTableGet(event_table, NULL+pos);
+ if(0 && 42399326 == pos){
+ SUBREADprintf("EVENT_HIT=%p for %u\n", res, pos);
+ }
if(res)
{
int xk2;
- for(xk2=0; xk2<MAX_EVENT_ENTRIES_PER_SITE; xk2++)
+ int current_size = res[0]&0x0fffffff;
+ for(xk2=1; xk2< current_size ; xk2++)
{
+ if(0 && res[xk2] > 520000){
+ SUBREADprintf("TOO LARGE EVENT : %u ; POS=%d/%u\n", res[xk2] , xk2, res[0]);
+ }
if(!res[xk2])break;
//if(res[xk2] - 1>= ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ) { SUBREADprintf("FATAL ERROR: Event id out-of-boundary: %u > %u!\n", res[xk2], ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> current_max_event_number ); continue;}
chromosome_event_t * event_body = &event_space[res[xk2]-1];
if((event_body -> event_type & event_type) == 0)continue;
+ //SUBREADprintf("VB1:%u\n", res[xk2]);
if(search_type == EVENT_SEARCH_BY_SMALL_SIDE && event_body -> event_small_side != pos)continue;
if(search_type == EVENT_SEARCH_BY_LARGE_SIDE && event_body -> event_large_side != pos)continue;
if(search_type == EVENT_SEARCH_BY_BOTH_SIDES && event_body -> event_small_side != pos && event_body -> event_large_side != pos)continue;
@@ -1121,6 +1112,8 @@ chromosome_event_t * local_add_indel_event(global_context_t * global_context, th
new_event -> event_quality = 1;//pow(0.5 , 3*mismatched_bases);
//new_event -> is_ambiguous = is_ambiguous;
+ //SUBREADprintf("NEW INDEL:%d LEFT=%u RIGHT=%u\n", new_event -> indel_length, new_event -> event_small_side , new_event -> event_large_side);
+
put_new_event(event_table, new_event , event_no);
return new_event;
}
@@ -1134,7 +1127,77 @@ float EXON_INDEL_MATCHING_RATE_HEAD = 0.8;
-int core_extend_covered_region(gene_value_index_t *array_index, unsigned int read_start_pos, char * read, int read_len, int cover_start, int cover_end, int window_size, int req_match_5end , int req_match_3end, int indel_tolerance, int space_type, int tail_indel, short * head_indel_pos, int * head_indel_movement, short * tail_indel_pos, int * tail_indel_movement, int is_head_high_quality, char * qual_txt, int qual_format, float head_matching_rate, float tail_matching_rate)
+int core_extend_covered_region_15(global_context_t * global_context, gene_value_index_t *array_index, unsigned int read_start_pos, char * read, int read_len, int cover_start, int cover_end, int window_size, int req_match_5end , int req_match_3end, int indel_tolerance, int space_type, int tail_indel, short * head_indel_pos, int * head_indel_movement, short * tail_indel_pos, int * tail_indel_movement, int is_head_high_quality, char * qual_txt, int qual_format, float head_matching_rate, flo [...]
+ int is_head;
+
+ if(0){
+ char posout[100];
+ absoffset_to_posstr(global_context, read_start_pos, posout);
+ SUBREADprintf("RTXT=%s, MAPP=%s\n", read, posout);
+ }
+
+ for(is_head = 0; is_head < 2; is_head ++){
+ int move_i;
+ int best_match_n = -1, best_movement = 0;
+ for(move_i = 0; move_i < 2* indel_tolerance-1; move_i ++){
+ int indel_move = (move_i+1)/2 * (move_i %2?1:-1);
+ int indel_movement = indel_move + (is_head?0:tail_indel);
+ int this_match_n;
+
+ if(is_head)
+ this_match_n = match_chro(read, array_index, read_start_pos - indel_movement, window_size, 0, space_type );
+ else
+ this_match_n = match_chro(read + read_len - window_size, array_index, read_start_pos + read_len - window_size + indel_movement, window_size, 0, space_type );
+
+ // indel_movement : negativ = insertion, positive = deletion before or after the covered region
+
+ //SUBREADprintf("MOVE: HEAD=%d, MATCH=%d, MOV=%d\n", is_head, this_match_n, indel_movement);
+ if(this_match_n > best_match_n){
+ best_match_n = this_match_n;
+ best_movement = indel_movement;
+ }
+ }
+
+ int max_score=-1, best_splice_on_read=0;
+ if(best_match_n >0 && best_movement!=0){
+ int test_start = is_head?window_size:(cover_end);
+ int test_end = is_head?cover_start - max(0, -best_movement):(read_len - window_size - max(0, -best_movement));
+ int indel_first_piece_end_on_read, indel_second_piece_start_on_read;
+
+ for(indel_first_piece_end_on_read = test_start ; indel_first_piece_end_on_read < test_end; indel_first_piece_end_on_read++){
+ unsigned int indel_first_piece_end_on_chro = read_start_pos + indel_first_piece_end_on_read + (is_head? -best_movement :tail_indel);
+ indel_second_piece_start_on_read = indel_first_piece_end_on_read - min(0, best_movement);
+ unsigned int indel_second_piece_start_on_chro = indel_first_piece_end_on_chro + max(0,best_movement);
+ int first_piece_match = match_chro(read + indel_first_piece_end_on_read - window_size , array_index , indel_first_piece_end_on_chro - window_size, window_size, 0, space_type);
+ int second_piece_match = match_chro(read + indel_second_piece_start_on_read , array_index , indel_second_piece_start_on_chro, window_size, 0, space_type);
+ int score = first_piece_match + second_piece_match;
+ if(score > max_score){
+ max_score = score;
+ best_splice_on_read = indel_first_piece_end_on_read;
+ }
+ if(score == 2*window_size)break;
+ //SUBREADprintf("FIND_SPLICE_POINT: IS_HEAD=%d, FIRST_PIECE_END=%d-%d, CHRO=%u-%u MATCHED=%d,%d\n", is_head, indel_first_piece_end_on_read, indel_second_piece_start_on_read, indel_first_piece_end_on_chro, indel_second_piece_start_on_chro, first_piece_match, second_piece_match);
+ }
+ }
+
+ if(max_score >= 2*window_size - 1){
+ if(is_head){
+ (*head_indel_pos) = best_splice_on_read;
+ (*head_indel_movement) = best_movement;
+ } else {
+ (*tail_indel_pos) = best_splice_on_read;
+ (*tail_indel_movement) = best_movement;
+ }
+ }
+ }
+
+ return 0;
+
+}
+
+
+
+int core_extend_covered_region_13(gene_value_index_t *array_index, unsigned int read_start_pos, char * read, int read_len, int cover_start, int cover_end, int window_size, int req_match_5end , int req_match_3end, int indel_tolerance, int space_type, int tail_indel, short * head_indel_pos, int * head_indel_movement, short * tail_indel_pos, int * tail_indel_movement, int is_head_high_quality, char * qual_txt, int qual_format, float head_matching_rate, float tail_matching_rate)
{
int ret = 0;
*head_indel_pos = -1;
@@ -1184,7 +1247,9 @@ int core_extend_covered_region(gene_value_index_t *array_index, unsigned int rea
int test_start =0;// window_end_pos - max(0, indel_movement) - right_match_number - test_length;
- int matched_bases_after_indel = match_chro_support(read +test_start, array_index, read_start_pos + indel_movement +test_start, test_length,0, space_type, qual_txt, qual_format);
+ float matched_bases_after_indel = match_chro_support(read +test_start, array_index, read_start_pos + indel_movement +test_start, test_length,0, space_type, qual_txt, qual_format);
+ SUBREADprintf("HEAD : MATCHED_AFTER_INDEL = %f ; MVMT=%d ; WINDOW_END=%d\n", matched_bases_after_indel, indel_movement, window_end_pos);
+
float test_rate = head_matching_rate;
if(test_length < 3) test_rate = 1;
@@ -1204,8 +1269,9 @@ int core_extend_covered_region(gene_value_index_t *array_index, unsigned int rea
}
}
if(best_indel_pos<0) *head_indel_pos = window_end_pos - right_match_number;
- break;
- }else window_end_pos--;
+ // break;
+ }
+ window_end_pos--;
}
if (window_end_pos - window_size <= 0) break;
}
@@ -1269,9 +1335,8 @@ int core_extend_covered_region(gene_value_index_t *array_index, unsigned int rea
char * qual_txt_moved = qual_txt;
if(qual_txt[0]) qual_txt_moved+=window_start_pos - min(0, indel_movement) + left_match_number;
- int matched_bases_after_indel = match_chro_support(read + window_start_pos - min(0, indel_movement) + left_match_number, array_index, read_start_pos + window_start_pos + max(0,indel_movement) +left_match_number , test_length,0, space_type, qual_txt_moved , qual_format);
-
- //printf("Matched %d/%d (left match #=%d) after moved %d, tail_indel=%d\n", matched_bases_after_indel, test_length, left_match_number, indel_movement, tail_indel);
+ float matched_bases_after_indel = match_chro_support(read + window_start_pos - min(0, indel_movement) + left_match_number, array_index, read_start_pos + window_start_pos + max(0,indel_movement) +left_match_number , test_length,0, space_type, qual_txt_moved , qual_format);
+ SUBREADprintf("TAIL : MATCHED_AFTER_INDEL = %f ; MVMT=%d ; WINDOW_END=%d\n", matched_bases_after_indel, indel_movement, window_start_pos - min(0, indel_movement) + left_match_number);
float test_rate = tail_matching_rate;
@@ -1292,8 +1357,8 @@ int core_extend_covered_region(gene_value_index_t *array_index, unsigned int rea
*tail_indel_pos = window_start_pos + left_match_number ;
else
*tail_indel_pos = best_indel_pos;
- break;
- }else window_start_pos++;
+ }
+ window_start_pos++;
}
if (window_start_pos + window_size >= read_len) break;
}
@@ -1313,7 +1378,7 @@ int core_extend_covered_region(gene_value_index_t *array_index, unsigned int rea
-int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset);
+int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name);
int find_new_indels(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id)
{
@@ -1333,7 +1398,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
+ mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
if(global_context->config.do_big_margin_filtering_for_reads)
if(is_ambiguous_voting(global_context, pair_number, is_second_read , current_result->selected_votes, current_result->confident_coverage_start, current_result->confident_coverage_end, read_len, (current_result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0))return 0;
@@ -1344,6 +1409,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
voting_position = current_result -> selected_position;
+ //SUBREADprintf("NR=%d\n", indel_recorder[0]);
if(!indel_recorder[0])
return 0;
@@ -1360,35 +1426,50 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
int last_correct_base = find_subread_end(read_len, global_context->config.total_subreads , last_correct_subread) - 9;
int first_correct_base = find_subread_end(read_len, global_context->config.total_subreads , next_correct_subread) - 16 + 9;
+
+ last_correct_base = max(0, last_correct_base);
+ last_correct_base = min(read_len-1, last_correct_base);
+ first_correct_base = min(first_correct_base, read_len-1);
+ first_correct_base = max(0, first_correct_base);
+ first_correct_base = max(first_correct_base, last_correct_base);
+
+ if(first_correct_base < last_correct_base || first_correct_base > last_correct_base + 3000)
+ SUBREADprintf("WRONG ORDER: F=%u, L=%d\n", first_correct_base , last_correct_base);
//int last_second_part_base = find_subread_end(read_len, global_context->config.total_subreads , next_correct_subread_last) ;
- //if(pair_number == 5279)printf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d\n", i, indels, current_result->selected_position, last_correct_subread, next_correct_subread);
+ if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
+ //if(current_result->selected_position > 433897 - 100 && current_result->selected_position < 433897)
+ SUBREADprintf("INDEL_P03: I=%d; INDELS=%d; POS=%u; COVER=%d -- %d\n", i, indels, current_result->selected_position, last_correct_subread, next_correct_subread);
if(global_context -> config.use_dynamic_programming_indel || read_len > EXON_LONG_READ_LENGTH)
{
- char movement_buffer[1500];
+ char movement_buffer[MAX_READ_LENGTH * 10 / 7];
//chromosome_event_t * last_event = NULL;
int last_event_id = -1;
first_correct_base = min(first_correct_base+10, read_len);
- int x1, dyna_steps = core_dynamic_align(global_context, thread_context, read_text + last_correct_base, first_correct_base - last_correct_base, voting_position + last_correct_base + last_indel, movement_buffer, indels);
- movement_buffer[dyna_steps]=0;
+ int x1, dyna_steps;
+ dyna_steps = core_dynamic_align(global_context, thread_context, read_text + last_correct_base, first_correct_base - last_correct_base, voting_position + last_correct_base + last_indel, movement_buffer, indels, read_name);
- #ifdef indel_debug
- printf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base);
- for(x1=0; x1<dyna_steps;x1++)
+ movement_buffer[dyna_steps]=0;
+
+
+ if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
{
- int mc, mv=movement_buffer[x1];
- if(mv==0)mc='=';
- else if(mv==1)mc='D';
- else if(mv==2)mc='I';
- else mc='X';
- putchar(mc);
- }
- putchar('\n');
+ SUBREADprintf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base);
- #endif
+ for(x1=0; x1<dyna_steps;x1++)
+ {
+ int mc, mv=movement_buffer[x1];
+ if(mv==0)mc='=';
+ else if(mv==1)mc='D';
+ else if(mv==2)mc='I';
+ else mc='X';
+ SUBREADprintf("%c",mc);
+ }
+ SUBREADputs("");
+ }
unsigned int cursor_on_chromosome = voting_position + last_correct_base + last_indel, cursor_on_read = last_correct_base;
int last_mv = 0;
unsigned int indel_left_boundary = 0;
@@ -1401,7 +1482,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
}
// if(pair_number==4)printf("XK3==%d\n", total_mismatch);
- if(total_mismatch<2)
+ if(total_mismatch<=2 || (global_context->config.maximise_sensitivity_indel && total_mismatch <= 2 ))
for(x1=0; x1<dyna_steps;x1++)
{
int mv=movement_buffer[x1];
@@ -1436,7 +1517,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
}
//#warning "=========== COMMENT THIS LINE ==============="
- //printf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%u\n",i, current_indel_len, pair_number, indel_left_boundary-1);
+
+ if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", read_name) == 0)
+ SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; PN=%d; LOC=%u\n",i, current_indel_len, pair_number, indel_left_boundary-1);
if(abs(current_indel_len)<=global_context -> config.max_indel_length)
{
chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, read_text + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0);
@@ -1557,10 +1640,9 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
int s_head = match_chro(read_text, current_value_index, voting_position, 8, 0, global_context->config.space_type);
int s_tail = match_chro(read_text+read_len - 8, current_value_index, voting_position + read_len + current_result -> indels_in_confident_coverage - 8, 8, 0, global_context->config.space_type);
- #ifdef indel_debug
- printf("EXT_START %d, %d\n", s_head, s_tail);
- #endif
+ //SUBREADprintf("EXT_START %d, %d\n", s_head, s_tail);
+ //#warning "============ REMOVE THE FIRST '1' FROM THE NEXT LINE! =========="
if(s_head<=6 || s_tail<=6)
{
int is_head_high_quality = (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?0:1;
@@ -1640,20 +1722,24 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
short head_indel_pos=-1 , tail_indel_pos=-1;
int head_indel_movement=0, tail_indel_movement=0;
+ if(0)SUBREADprintf("HQ=%.4f; TQ=%.4f; HM=%d; TM=%d; COVG = %d ~ %d\n", exon_head_matching_rate, exon_tail_matching_rate, head_must_correct, tail_must_correct, cover_start, cover_end);
+ /*
if(exon_head_matching_rate<1)
exon_head_matching_rate = 0.97;
if(exon_tail_matching_rate<1)
exon_tail_matching_rate = 0.97;
-
+ */
+
+ core_extend_covered_region_15(global_context, current_value_index, current_result->selected_position, read_text, read_len, cover_start, cover_end, 7, head_must_correct,tail_must_correct, global_context -> config.max_indel_length + 1, global_context -> config.space_type, current_result -> selected_indel_record[k-1], & head_indel_pos, & head_indel_movement, & tail_indel_pos, & tail_indel_movement, is_head_high_quality, qual_text, global_context -> config.phred_score_format, exon_head_m [...]
- core_extend_covered_region(current_value_index, current_result->selected_position, read_text, read_len, cover_start, cover_end, 4, head_must_correct,tail_must_correct, global_context -> config.max_indel_length + 1, global_context -> config.space_type, current_result -> selected_indel_record[k-1], & head_indel_pos, & head_indel_movement, & tail_indel_pos, & tail_indel_movement, is_head_high_quality, qual_text, global_context -> config.phred_score_format, exon_head_matching_rate, exon_ [...]
+ //head_indel_movement = -head_indel_movement;
- head_indel_movement = -head_indel_movement;
+ if(0)SUBREADprintf("HMV=%d; TMV=%d; TPOS=%d\n", head_indel_movement, tail_indel_movement, tail_indel_pos);
if(head_indel_movement && s_head < 7)
{
unsigned int head_indel_left_edge = head_indel_pos + current_result->selected_position - 1;
- head_indel_left_edge -= max(0, head_indel_movement);
+ //head_indel_left_edge -= max(0, head_indel_movement);
if(head_indel_left_edge>=0 && abs(head_indel_movement)<=global_context -> config.max_indel_length)
{
local_add_indel_event(global_context, thread_context, event_table, read_text + head_indel_pos, head_indel_left_edge, head_indel_movement, 1, 1, 0);
@@ -1694,7 +1780,7 @@ int write_indel_final_results(global_context_t * global_context)
//if(!ofp)
// printf("HOW??? %s\n", fn2);
free(fn2);
- inserted_bases = malloc(MAX_INSERTION_LENGTH);
+ inserted_bases = malloc(MAX_INSERTION_LENGTH + 2);
ref_bases = malloc(1000);
alt_bases = malloc(1000);
@@ -1709,7 +1795,7 @@ int write_indel_final_results(global_context_t * global_context)
chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
- //#warning " ================= REMOVE '- 1' from the next LINE!!! ========================="
+ //#warning " ================= REMOVE '- 1' from the next LINE!!! ========================="
if((event_body -> event_type != CHRO_EVENT_TYPE_INDEL && event_body->event_type != CHRO_EVENT_TYPE_LONG_INDEL && event_body -> event_type != CHRO_EVENT_TYPE_POTENTIAL_INDEL)|| (event_body -> final_counted_reads < 1 && event_body -> event_type == CHRO_EVENT_TYPE_INDEL) )
continue;
@@ -1899,15 +1985,15 @@ int write_local_reassembly(global_context_t *global_context, HashTable *pileup_f
return 0;
}
-int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context, int pair_number , char * read_name , char * read_text ,char * qual_text , int read_len , int mate_read_len , int is_second_read, int best_read_id, int use_mate_pos)
+int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context, int pair_number , char * read_name , char * read_text ,char * qual_text , int read_len , int mate_read_len , int is_second_read, int best_read_id, int use_mate_pos, mapping_result_t *current_result, mapping_result_t *mate_result)
{
unsigned int read_anchor_position;
if(!read_text) return 0;
int is_position_certain = 1;
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
- alignment_result_t * mate_result = _global_retrieve_alignment_ptr(global_context, pair_number, !is_second_read, best_read_id);
+ //mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
+ //mapping_result_t * mate_result = _global_retrieve_alignment_ptr(global_context, pair_number, !is_second_read, best_read_id);
@@ -2726,7 +2812,7 @@ int search_window_once(global_context_t * global_context, reassembly_by_voting_b
#define INDEL_FULL_ALIGN_MAX_MISMATCH 3
#define SINGLE_PROBE_MAX_MISMATCH 1
-int full_indel_alignment(global_context_t * global_context, reassembly_by_voting_block_context_t * block_context, char * full_rebuilt_window, int full_rebuilt_window_size, gene_value_index_t * base_index, unsigned int window_start_pos, unsigned int * perfect_segment_start_pos, int * perfect_segment_lengths,int * indels_after_perfect_segments, short * indels_read_positions, float * indel_quality, unsigned int * contig_start_pos, unsigned int * contig_end_pos, int * head_removed_bases, int [...]
+int full_indel_alignment(global_context_t * global_context, reassembly_by_voting_block_context_t * block_context, char * full_rebuilt_window, int full_rebuilt_window_size, gene_value_index_t * base_index, unsigned int window_start_pos, unsigned int * perfect_segment_start_pos, int * perfect_segment_lengths,int * indels_after_perfect_segments, short * indels_read_positions, float * indel_quality, unsigned int * contig_start_pos, unsigned int * contig_end_pos, int * head_removed_bases, int [...]
{
int xk1;
int is_unreliable = 0;
@@ -2850,22 +2936,16 @@ int full_indel_alignment(global_context_t * global_context, reassembly_by_voting
is_unreliable += section_mismatch;
- // printf("MISS=%d < %d\n\n", section_mismatch, INDEL_FULL_ALIGN_MAX_MISMATCH - 1);
- if(section_mismatch >= INDEL_FULL_ALIGN_MAX_MISMATCH)
- continue;
- //return 0;
- else
- {
- perfect_segment_start_pos[ret] = probe_poses[xk1] - 1;
- // perfect_segment_lengths is : first WANTED base on the second half - first base pos on the first half - deletions
- perfect_segment_lengths[ret] = section_best_edge - probe_poses[xk1] + 1 - max(0, indels_in_section);
-
- indel_quality[ret] = pow(2,-is_unreliable);
- indels_after_perfect_segments[ret] = indels_in_section;
- indels_read_positions[ret] = used_probe_in_rebuilt_window[xk1] + perfect_segment_lengths[ret];
- ret++;
- total_unmatched += section_mismatch;
- }
+ perfect_segment_start_pos[ret] = probe_poses[xk1] - 1;
+ // perfect_segment_lengths is : first WANTED base on the second half - first base pos on the first half - deletions
+ perfect_segment_lengths[ret] = section_best_edge - probe_poses[xk1] + 1 - max(0, indels_in_section);
+
+ good_quality_indel[ret] = section_mismatch < INDEL_FULL_ALIGN_MAX_MISMATCH;
+ indel_quality[ret] = pow(2,-is_unreliable);
+ indels_after_perfect_segments[ret] = indels_in_section;
+ indels_read_positions[ret] = used_probe_in_rebuilt_window[xk1] + perfect_segment_lengths[ret];
+ ret++;
+ total_unmatched += section_mismatch;
}
else
{
@@ -3491,13 +3571,16 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
unsigned int perfect_segment_start_pos[MAX_INDELS_IN_WINDOW], contig_start_pos, contig_end_pos, all_fresh = 0;
int perfect_segment_lengths[MAX_INDELS_IN_WINDOW], head_removed_bases=0, tail_removed_bases=0;
int indels_after_perfect_segments[MAX_INDELS_IN_WINDOW];
+ int good_quality_indels[MAX_INDELS_IN_WINDOW];
float quality_of_indels_aln[MAX_INDELS_IN_WINDOW];
short indels_read_positions[MAX_INDELS_IN_WINDOW];
char contig_CIGAR[200];
- int indels_in_window = full_indel_alignment(global_context, &block_context, full_rebuilt_window, full_rebuilt_window_size, base_index, window_start_pos, perfect_segment_start_pos, perfect_segment_lengths, indels_after_perfect_segments, indels_read_positions, quality_of_indels_aln, &contig_start_pos, &contig_end_pos, &head_removed_bases, &tail_removed_bases);
+ memset(good_quality_indels, 0, sizeof(int) * MAX_INDELS_IN_WINDOW);
+ int indels_in_window = full_indel_alignment(global_context, &block_context, full_rebuilt_window, full_rebuilt_window_size, base_index, window_start_pos, perfect_segment_start_pos, perfect_segment_lengths, indels_after_perfect_segments, indels_read_positions, quality_of_indels_aln, &contig_start_pos, &contig_end_pos, &head_removed_bases, &tail_removed_bases, good_quality_indels);
contig_CIGAR[0]=0;
int read_position_cursor = head_removed_bases;
+ int is_indel_contig = 0;
for(xk2 = 0; xk2 < indels_in_window; xk2++)
{
@@ -3506,20 +3589,22 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
float quality_of_this_indel = quality_of_indels_aln[xk2] * first_half_alleles [first_allele_no].allele_quality * second_half_alleles[second_allele_no].allele_quality;
if(abs(indels) >= global_context -> config.max_indel_length){
+ //#warning "====================== REMOVE THE debug output ==============="
continue;
}
- if(abs(indels) <= 16){
+ /*if(abs(indels) <= 16 && 0){
continue;
- }
+ }*/
+ is_indel_contig=1;
sprintf(contig_CIGAR+strlen(contig_CIGAR), "%dM%d%c", indels_read_positions[xk2] - read_position_cursor, abs(indels), indels<0?'I':'D');
read_position_cursor = indels_read_positions[xk2];
if(indels<0) read_position_cursor -= indels;
all_indels_in_window++;
- if(indels >= -global_context -> config.max_indel_length)
+ if(indels >= -global_context -> config.max_indel_length && good_quality_indels[xk2])
{
int neighbour_delta;
for(neighbour_delta = - 30; neighbour_delta < 30 ; neighbour_delta++)
@@ -3548,7 +3633,9 @@ int finalise_pileup_file_by_voting(global_context_t * global_context , char * te
}
}
- if(all_fresh)
+
+ //#warning "====================== Make sure ' is_indel_contig ' in the next line is harmless before RELEASE ==============="
+ if( is_indel_contig || all_fresh)
{
int write_cursor;
char * chro_begin;
@@ -3810,12 +3897,25 @@ void init_global_context(global_context_t * context)
srand(time(NULL));
memset(context->module_contexts, 0, 5*sizeof(void *));
+
+ context->config.fast_run = 0;
context->config.memory_use_multiplex = 1;
context->config.report_sam_file = 1;
- context->config.is_rna_seq_reads = 0;
+ context->config.do_breakpoint_detection = 0;
context->config.do_fusion_detection = 0;
+ context->config.do_structural_variance_detection = 0;
context->config.more_accurate_fusions = 1;
+
+ //#warning "============= best values for the SVs application: 8; 5; 32 ==============="
+ context->config.top_scores = 8 - 5;
+ context->config.max_vote_combinations = 5 - 3;
+ context->config.max_vote_simples = 5;
+ context->config.max_vote_number_cutoff = 1;
+
+ context->config.experiment_type = 0;
context->config.prefer_donor_receptor_junctions = 1;
+ context->config.maximum_translocation_length = 10000;
+ context->config.maximum_colocating_distance = 500;
context->config.do_big_margin_filtering_for_reads = 0;
context->config.do_big_margin_filtering_for_junctions = 0;
context->config.maximum_intron_length = 500000;
@@ -3831,15 +3931,18 @@ void init_global_context(global_context_t * context)
context->config.use_hamming_distance_break_ties = 0;
context->config.use_quality_score_break_ties = 0;
context->config.extending_search_indels = 0;
+ context->config.PE_predominant_weight = 0;
+
+ //#warning "============= best values for the SVs application: 3 ====================="
context->config.multi_best_reads = 1;
+ context->config.reported_multi_best_reads = 1;
context->config.is_SAM_file_input=0;
context->config.use_dynamic_programming_indel=0;
context->config.use_bitmap_event_table = 1;
context->config.convert_color_to_base = 0;
context->config.is_gzip_fastq = 0;
- context->config.realignment_minimum_variant_distance = 16;
- context->config.is_BAM_output = 0;
+ context->config.is_BAM_output = 1;
context->config.is_BAM_input = 0;
context->config.read_trim_5 = 0;
context->config.read_trim_3 = 0;
@@ -3871,22 +3974,23 @@ void init_global_context(global_context_t * context)
context->config.all_threads = 1;
context->config.is_first_iteration_running = 1;
context->config.is_second_iteration_running = 1;
- context->config.reads_per_chunk = 14*1024*1024;
+ context->config.reads_per_chunk = 1024*1024*1024;
+ context->config.reads_per_chunk = 20*1024*1024;
+ context->config.use_memory_buffer = 1;
context->config.is_methylation_reads = 0;
context->config.report_no_unpaired_reads = 0;
context->config.limited_tree_scan = 0;
context->config.high_quality_base_threshold = 500000;
context->config.report_multiple_best_in_pairs = 0;
+ context->config.realignment_minimum_variant_distance = 16;
- /*
- #warning ##################################################
- #warning ## CHANGE THIS VALUE TO 70000 BEFORE RELEASE! ##
- #warning ##################################################
- */
context->config.init_max_event_number = 70000;
- context->config.show_soft_cliping = 1;
+ context->config.show_soft_cliping = 1 ;
context->config.big_margin_record_size = 9;
+ //#warning "====== FOR HIGH JUNCTION ACCURACT ==========="
+ context->config.big_margin_record_size = 15;
+
context->config.read_group_id[0] = 0;
context->config.read_group_txt[0] = 0;
context->config.first_read_file[0] = 0;
@@ -3903,10 +4007,17 @@ void init_global_context(global_context_t * context)
context->config.max_insertion_at_junctions=0;
context->config.check_donor_at_junctions=1;
+ memset(&context -> input_reads, 0, sizeof(read_input_t));
signal (SIGTERM, COREMAIN_SIGINT_hook);
signal (SIGINT, COREMAIN_SIGINT_hook);
+
+ int seed_rand[2];
+ double double_time = miltime();
+ memcpy(seed_rand, &double_time, 2*sizeof(int));
+ srand(seed_rand[0]^seed_rand[1]);
+
sprintf(context->config.temp_file_prefix, "./core-temp-sum-%06u-%06u", getpid(),rand());
_COREMAIN_delete_temp_prefix = context->config.temp_file_prefix;
@@ -3914,6 +4025,11 @@ void init_global_context(global_context_t * context)
context->config.max_indel_length = 5;
context->config.phred_score_format = FASTQ_PHRED33;
context->start_time = miltime();
+
+ context->timecost_load_index = 0;
+ context->timecost_voting = 0;
+ context->timecost_before_realign = 0;
+ context->timecost_for_realign = 0;
}
@@ -3929,7 +4045,7 @@ void init_global_context(global_context_t * context)
int CORE_DPALIGN_MATCH_SCORE = 2;
int CORE_DPALIGN_MISMATCH_PENALTY = 0;
-int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset)
+int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name)
// read must be converted to the positive strand.
// movement buffer: 0:match, 1: read-insert, 2: gene-insert, 3:mismatch
// the size of the movement buffer must be equal to the length of the read plus max_indel * 3.
@@ -3958,13 +4074,16 @@ int core_dynamic_align(global_context_t * global_context, thread_context_t * thr
table = indel_thread_context -> dynamic_align_table;
table_mask = indel_thread_context -> dynamic_align_table_mask;
}
- #ifdef indel_debug
- int ii, jj;
- for(ii = 0; ii<read_len - expected_offset; ii++)
- putchar(gvindex_get(current_value_index, begin_position + ii));
- SUBREADprintf ("\n%s\n", read);
- #endif
+
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
+ {
+ int ii;
+ for(ii = 0; ii<read_len - expected_offset; ii++)
+ SUBREADprintf("%c",gvindex_get(current_value_index, begin_position + ii));
+
+ SUBREADprintf ("\n%s\n", read);
+ }
// vertical move: deletion (1)
@@ -3972,17 +4091,25 @@ int core_dynamic_align(global_context_t * global_context, thread_context_t * thr
// cross move: match (0) or mismatch (3)
// i: vertical move; j: horizontal move
+ //SUBREADprintf("DM[%d]: %p %d,%d\n", thread_context -> thread_id, table_mask, read_len + expected_offset, read_len);
for (i=0; i<read_len + expected_offset; i++)
{
for(j=0; j<read_len; j++)
{
+ if(0&&(i >= read_len + expected_offset || j >= read_len))
+ SUBREADprintf("XXDM[%d]: %p %d,%d\n", thread_context -> thread_id, table_mask, read_len + expected_offset, read_len);
table_mask[i][j]=0;
+
+ if(0&&(i >= read_len + expected_offset || j >= read_len))
+ SUBREADprintf("YYDM[%d]: %p %d,%d\n", thread_context -> thread_id, table_mask, read_len + expected_offset, read_len);
+
if (j < i - max_indel || j > max_indel + i)
{
table[i][j]=-9999;
- #ifdef indel_debug
- putchar('\t');
- #endif
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
+ {
+ putchar('\t');
+ }
continue;
}
@@ -4036,13 +4163,11 @@ int core_dynamic_align(global_context_t * global_context, thread_context_t * thr
table[i][j] = from_upper;
}
- #ifdef indel_debug
- SUBREADprintf("%c%c\t", chromo_ch, read[j]);
- #endif
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
+ SUBREADprintf("%c%c\t", chromo_ch, read[j]);
}
- #ifdef indel_debug
- SUBREADputs("");
- #endif
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
+ SUBREADputs("");
}
#ifdef indel_debug
@@ -4055,28 +4180,29 @@ int core_dynamic_align(global_context_t * global_context, thread_context_t * thr
int out_pos = 0, delta=0;
j = read_len - 1;
- #ifdef indel_debug
-
- for(ii=0;ii< path_i+1; ii++)
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
{
- SUBREADprintf("%d\t", ii);
+ int ii,jj;
+ for(ii=0;ii< path_i+1; ii++)
+ {
+ SUBREADprintf("%d\t", ii);
+ for(jj=0; jj<j+1; jj++)
+ SUBREADprintf("% 6d",table[ii][jj]);
+ SUBREADputs("");
+ }
+ SUBREADprintf(" \t");
for(jj=0; jj<j+1; jj++)
- SUBREADprintf("% 6d",table[ii][jj]);
+ SUBREADprintf("#%4d ",jj);
SUBREADputs("");
- }
- SUBREADprintf(" \t");
- for(jj=0; jj<j+1; jj++)
- SUBREADprintf("#%4d ",jj);
- SUBREADputs("");
- SUBREADputs("");
-
- for(ii=0;ii< path_i+1; ii++)
- {
- for(jj=0; jj<j+1; jj++)
- SUBREADprintf("% 6d",table_mask[ii][jj]);
SUBREADputs("");
+
+ for(ii=0;ii< path_i+1; ii++)
+ {
+ for(jj=0; jj<j+1; jj++)
+ SUBREADprintf("% 6d",table_mask[ii][jj]);
+ SUBREADputs("");
+ }
}
- #endif
while(1)
{
@@ -4114,19 +4240,20 @@ int core_dynamic_align(global_context_t * global_context, thread_context_t * thr
movement_buffer[i] = tmp;
}
- #ifdef indel_debug
- for(i=0; i<out_pos; i++)
+ if(0 && strcmp(read_name, "MISEQ:13:000000000-A1H1M:1:1112:12194:5511") == 0)
{
- char tmp = movement_buffer[i];
- switch(tmp){
- case 0: putchar('=');break;
- case 1: putchar('D');break;
- case 2: putchar('I');break;
- case 3: putchar('X');break;
+ for(i=0; i<out_pos; i++)
+ {
+ char tmp = movement_buffer[i];
+ switch(tmp){
+ case 0: putchar('=');break;
+ case 1: putchar('D');break;
+ case 2: putchar('I');break;
+ case 3: putchar('X');break;
+ }
}
+ putchar('\n');
}
- putchar('\n');
- #endif
return out_pos;
}
diff --git a/src/core-indel.h b/src/core-indel.h
index 51c01d3..6eea940 100644
--- a/src/core-indel.h
+++ b/src/core-indel.h
@@ -28,7 +28,10 @@
// if it is an insertion event, event_large_site = event_small_site+1.
//#define MAX_EVENT_ENTRIES_PER_SITE 5
-#define MAX_EVENT_ENTRIES_PER_SITE 12
+//#define MAX_EVENT_ENTRIES_PER_SITE 12
+//
+#define EVENT_ENTRIES_INIT_SIZE 9
+#define MAX_EVENT_ENTRIES_PER_SITE 9
#define CHRO_EVENT_TYPE_REMOVED 0
#define CHRO_EVENT_TYPE_INDEL 8
#define CHRO_EVENT_TYPE_LONG_INDEL 16
@@ -48,37 +51,37 @@
#define is_target_window_X(x) 0
//#define MAXIMUM_EVENT_NUMBER 300000
+
+typedef struct{
+ int is_precisely_called;
+ unsigned int source_left_side; // the base BEFORE the translocated sequence.
+ unsigned int target_left_side; // tge base BEFORE the inserted translocated sequence.
+ unsigned int length;
+
+ unsigned int event_P_number;
+ unsigned int event_Q_number;
+ unsigned int event_R_number;
+
+ int is_inv;
+ unsigned int all_sup_P;
+ unsigned int max_sup_QR;
+} translocation_result_t;
+
typedef struct{
- unsigned int event_small_side;
- unsigned int event_large_side;
- //union
- //{
- short indel_length;
- short junction_flanking_left;
- //};
- short junction_flanking_right;
-
- unsigned char event_type;
- char indel_at_junction;
- char is_negative_strand; // this only works to junction detection, according to 'GT/AG' or 'CT/AC' donors. This only applys to junctions.
- char is_strand_jumped; // "strand jumped" means that the left and right sides are on different strands. This only applys to fusions.
- char is_donor_found; // only for junctions: GT/AG is found at the location.
- // Also, if "is_strand_jumped" is true, all coordinates (e.g., splicing points, cover_start, cover_end, etc) are on "reversed read" view.
-
- //char is_ambiguous;
- char connected_next_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "3I" will have 1 here .
- char connected_previous_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "1I" will have 1 here.
-
- //char inserted_bases[(1+MAX_INSERTION_LENGTH) / 4 + 1];
- char * inserted_bases;
- unsigned short supporting_reads;
- unsigned short anti_supporting_reads;
- unsigned short final_counted_reads;
- unsigned short final_reads_mismatches;
- unsigned int global_event_id;
- float event_quality;
-} chromosome_event_t;
+ int is_precisely_called;
+ unsigned int event_Y_rough_small_abs;
+ unsigned int event_Z_rough_large_abs;
+
+ unsigned int small_side; // the base BEFORE the reversed sequence
+ unsigned int length;
+
+ unsigned int event_Y_number; // event_no in the event space.
+ unsigned int event_Z_number;
+
+ unsigned int all_sup_D;
+ unsigned int max_sup_E;
+} inversion_result_t;
struct reassmebly_window_allele
{
@@ -161,11 +164,11 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
int write_indel_final_results(global_context_t * context);
int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, char event_type, chromosome_event_t ** return_buffer);
-void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote [...]
+void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, int is_PE, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subre [...]
void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int event_no);
void remove_neighbour(global_context_t * global_context);
-int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context , int pair_number, char * read_name_1 , char * read_text_1 ,char * qual_text_1 , int read_len_1, int read_len_2, int is_second_read, int best_read_id, int is_paired_unmapped);
+int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context , int pair_number, char * read_name_1 , char * read_text_1 ,char * qual_text_1 , int read_len_1, int read_len_2, int is_second_read, int best_read_id, int is_paired_unmapped, mapping_result_t * current_res, mapping_result_t * mate_res);
int finalise_long_insertions(global_context_t * global_context);
// This function sets the global context with default values.
@@ -182,4 +185,8 @@ chromosome_event_t * reallocate_event_space(global_context_t* global_context,thr
int there_are_events_in_range(char * bitmap, unsigned int pos, int sec_len);
int anti_supporting_read_scan(global_context_t * global_context);
+
+int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name);
+
+chromosome_event_t * local_add_indel_event(global_context_t * global_context, thread_context_t * thread_context, HashTable * event_table, char * read_text, unsigned int left_edge, int indels, int score_supporting_read_added, int is_ambiguous, int mismatched_bases);
#endif
diff --git a/src/core-interface-aligner.c b/src/core-interface-aligner.c
index 153f374..9fce73d 100644
--- a/src/core-interface-aligner.c
+++ b/src/core-interface-aligner.c
@@ -32,25 +32,31 @@ static struct option long_options[] =
{"unique", no_argument, 0, 'u'},
{"color-convert", no_argument, 0, 'b'},
{"multi", required_argument, 0, 'B'},
- {"hamming", no_argument, 0, 'H'},
+ {"type", required_argument, 0, 't'},
{"quality", no_argument, 0, 'Q'},
{"trim5", required_argument, 0, '5'},
{"trim3", required_argument, 0, '3'},
{"memoryMultiplex", required_argument, 0, 0},
{"rg", required_argument, 0, 0},
+ {"gzFASTQinput", no_argument, 0, 0},
{"rg-id", required_argument, 0, 0},
- {"BAMoutput", no_argument, 0, 0},
+ {"SAMoutput", no_argument, 0, 0},
{"BAMinput", no_argument, 0, 0},
+ {"fast", no_argument, 0, 0},
{"SAMinput", no_argument, 0, 0},
{"reportPairedMultiBest", no_argument, 0, 0},
- {"reportFusions", no_argument, 0, 0},
- {"gzFASTQinput", no_argument, 0, 0},
+ {"sv", no_argument, 0, 0},
{"extraColumns", no_argument, 0, 0},
{"forcedPE", no_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
{"accurateFusions", no_argument, 0, 0},
+ {"SVdetection", no_argument, 0, 0},
{"maxMismatches", required_argument, 0, 'M'},
- {"minDistanceBetweenVariants", required_argument, 0, 0},
+ {"minMappedLength", required_argument, 0, 0},
+ {"maxVoteSimples", required_argument, 0, 0},
+ {"complexIndels", no_argument, 0, 0},
+ {"minVoteCutoff", required_argument, 0, 0},
+ {"maxRealignLocations", required_argument, 0, 0},
{0, 0, 0, 0}
};
@@ -60,148 +66,110 @@ void print_usage_core_aligner()
SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION);
SUBREADputs("Usage:");
SUBREADputs("");
- SUBREADputs(" ./subread-align [options] -i <index_name> -r <input> -o <output>");
+ SUBREADputs(" ./subread-align [options] -i <index_name> -r <input> -o <output> -t <type>");
SUBREADputs("");
SUBREADputs("Required arguments:");
SUBREADputs(" ");
- SUBREADputs(" -i --index <index> base name of the index.");
- SUBREADputs(" ");
- SUBREADputs(" -r --read <input> name of the input file(FASTQ/FASTA format by default");
- SUBREADputs(" . See below for more supported formats). Both base-");
- SUBREADputs(" space and color-space read data are supported. For");
- SUBREADputs(" paired-end reads, this gives the first read file");
- SUBREADputs(" and the other read file should be specified using");
- SUBREADputs(" the -R option.");
- SUBREADputs(" ");
- SUBREADputs("Optional general arguments:");
- SUBREADputs(" ");
- SUBREADputs(" -o --output <output> name of the output file(SAM format by default). If");
- SUBREADputs(" not provided, mapping results will be output to the");
- SUBREADputs(" standard output (stdout).");
+ SUBREADputs(" -i <string> Base name of the index.");
SUBREADputs("");
- SUBREADputs(" -n --subreads <int> number of selected subreads, 10 by default.");
+ SUBREADputs(" -r <string> Name of the input file. Input formats including gzipped");
+ SUBREADputs(" fastq, fastq, and fasta can be automatically detected. If");
+ SUBREADputs(" paired-end, this should give the name of file including");
+ SUBREADputs(" first reads.");
SUBREADputs(" ");
- SUBREADputs(" -m --minmatch <int> consensus threshold (minimal number of consensus");
- SUBREADputs(" subreads required) for reporting a hit. If paired-");
- SUBREADputs(" end read data are provided, this gives the consensus");
- SUBREADputs(" threshold for the read which receives more votes");
- SUBREADputs(" than the other read from the same pair. 3 by default");
+ SUBREADputs(" -t <int> Type of input sequencing data. Its values include");
+ SUBREADputs(" 0: RNA-seq data");
+ SUBREADputs(" 1: genomic DNA-seq data.");
SUBREADputs(" ");
- SUBREADputs(" -T --threads <int> number of threads, 1 by default.");
+ SUBREADputs("Optional arguments:");
SUBREADputs(" ");
- SUBREADputs(" -I --indel <int> number of indels allowed, 5 by default. Indels of up");
- SUBREADputs(" to 200bp long can be detected.");
- SUBREADputs(" ");
- SUBREADputs(" -B --multi <int> Specify the maximal number of equally-best mapping");
- SUBREADputs(" locations allowed to be reported for each read. 1");
- SUBREADputs(" by default. Allowed values are between 1 to 16");
- SUBREADputs(" (inclusive). 'NH' tag is used to indicate how many");
- SUBREADputs(" alignments are reported for the read and 'HI' tag");
- SUBREADputs(" is used for numbering the alignments reported for");
- SUBREADputs(" the same read, in the output. Note that -u option");
- SUBREADputs(" takes precedence over -B.");
+ SUBREADputs(" -o <string> Name of the output file. By default, the output is in BAM");
+ SUBREADputs(" format.");
SUBREADputs("");
- SUBREADputs(" -P --phred <3:6> the format of Phred scores in input files, '3' for");
- SUBREADputs(" phred+33 and '6' for phred+64. '3' by default.");
+ SUBREADputs(" -n <int> Number of selected subreads, 10 by default.");
SUBREADputs("");
- SUBREADputs(" -u --unique only uniquely mapped reads will be reported (reads");
- SUBREADputs(" mapped to multiple locations in the reference genome");
- SUBREADputs(" will not be reported). This option can be used");
- SUBREADputs(" together with option '-H' or '-Q'.");
+ SUBREADputs(" -m <int> Consensus threshold for reporting a hit (minimal number of");
+ SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
+ SUBREADputs(" the consensus threshold for the anchor read. 3 by default");
SUBREADputs("");
- SUBREADputs(" -Q --quality using mapping quality scores to break ties when more");
- SUBREADputs(" than one best mapping location is found.");
+ SUBREADputs(" -T <int> Number of CPU threads used, 1 by default.");
SUBREADputs("");
- SUBREADputs(" -H --hamming using Hamming distance to break ties when more than");
- SUBREADputs(" one best mapping location is found.");
+ SUBREADputs(" -I <int> Maximum length (in bp) of indels that can be detected. 5 by");
+ SUBREADputs(" default. The program can detect indels of up to 200bp long.");
SUBREADputs("");
- SUBREADputs(" -b --color-convert convert color-space read bases to base-space read");
- SUBREADputs(" bases in the mapping output. Note that the mapping");
- SUBREADputs(" itself will still be performed at color-space.");
+ SUBREADputs(" -B <int> Maximal number of equally-best mapping locations to be");
+ SUBREADputs(" reported. 1 by default. Note that -u option takes precedence");
+ SUBREADputs(" over -B.");
SUBREADputs("");
- SUBREADputs(" -M --maxMismatches <int> Specify the maximum number of mis-matched bases");
- SUBREADputs(" allowed in the alignment. 3 by default. Mis-matches");
- SUBREADputs(" found in soft-clipped bases are not counted.");
- SUBREADputs(" ");
- SUBREADputs(" --reportFusions report discovered genomic fusion events such as");
- SUBREADputs(" chimeras. Discovered fusions will be saved to a file");
- SUBREADputs(" (*.fusions.txt). Detailed mapping results for fusion");
- SUBREADputs(" reads will be saved to the SAM/BAM output file as");
- SUBREADputs(" well. Secondary alignments of fusion reads will be");
- SUBREADputs(" saved to the following optional fields: CC(Chr),");
- SUBREADputs(" CP(Position), CG(CIGAR) and CT(strand). Note that");
- SUBREADputs(" each fusion read occupies only one row in the");
- SUBREADputs(" SAM/BAM output file.");
+ SUBREADputs(" -P <3:6> Format of Phred scores in input files, '3' for phred+33 and");
+ SUBREADputs(" '6' for phred+64. '3' by default.");
SUBREADputs("");
- SUBREADputs(" --trim5 <int> trim off <int> number of bases from 5' end of each");
- SUBREADputs(" read. 0 by default.");
+ SUBREADputs(" -u Report uniquely mapped reads only. Number of matched bases (");
+ SUBREADputs(" for RNA-seq) or mis-matched bases(for genomic DNA-seq) is");
+ SUBREADputs(" used to break the tie.");
SUBREADputs("");
- SUBREADputs(" --trim3 <int> trim off <int> number of bases from 3' end of each");
- SUBREADputs(" read. 0 by default.");
+ SUBREADputs(" -b Convert color-space read bases to base-space read bases in");
+ SUBREADputs(" the mapping output. Note that read mapping is performed at");
+ SUBREADputs(" color-space.");
SUBREADputs("");
- SUBREADputs(" --rg-id <string> specify the read group ID. If specified,the read");
- SUBREADputs(" group ID will be added to the read group header");
- SUBREADputs(" field and also to each read in the mapping output.");
+ SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
+ SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
+ SUBREADputs(" clipped bases are not counted.");
SUBREADputs("");
- SUBREADputs(" --rg <string> add a <tag:value> to the read group (RG) header in");
- SUBREADputs(" in the mapping output.");
+ SUBREADputs(" --sv Detect structural variants (eg. long indel, inversion,");
+ SUBREADputs(" duplication and translocation) and report breakpoints. Refer");
+ SUBREADputs(" to Users Guide for breakpoint reporting.");
SUBREADputs("");
- SUBREADputs(" --gzFASTQinput specify that the input read data is in gzipped");
- SUBREADputs(" FASTQ/FASTA format.");
+ SUBREADputs(" --SAMinput Input reads are in SAM format.");
SUBREADputs("");
- SUBREADputs(" --SAMinput specify that the input read data is in SAM format.");
+ SUBREADputs(" --BAMinput Input reads are in BAM format.");
SUBREADputs("");
- SUBREADputs(" --BAMinput specify that the input read data is in BAM format.");
+ SUBREADputs(" --SAMoutput Save mapping result in SAM format.");
SUBREADputs("");
- SUBREADputs(" --BAMoutput specify that mapping results are saved into a BAM");
- SUBREADputs(" format file.");
+ SUBREADputs(" --trim5 <int> Trim off <int> number of bases from 5' end of each read. 0");
+ SUBREADputs(" by default.");
SUBREADputs("");
- SUBREADputs(" --DPGapOpen <int> a numeric value giving the penalty for opening a");
- SUBREADputs(" gap when using the Smith-Waterman dynamic");
- SUBREADputs(" programming algorithm to detect insertions and");
- SUBREADputs(" deletions. The Smith-Waterman algorithm is only");
- SUBREADputs(" applied for those reads which are found to contain");
- SUBREADputs(" insertions or deletions. -1 by default.");
+ SUBREADputs(" --trim3 <int> Trim off <int> number of bases from 3' end of each read. 0");
+ SUBREADputs(" by default.");
SUBREADputs("");
- SUBREADputs(" --DPGapExt <int> a numeric value giving the penalty for extending the");
- SUBREADputs(" gap, used by the Smith-Waterman algorithm. 0 by");
- SUBREADputs(" default.");
+ SUBREADputs(" --rg-id <string> Add read group ID to the output.");
SUBREADputs("");
- SUBREADputs(" --DPMismatch <int> a numeric value giving the penalty for mismatches,");
- SUBREADputs(" used by the Smith-Waterman algorithm. 0 by default.");
+ SUBREADputs(" --rg <string> Add <tag:value> to the read group (RG) header in the output.");
SUBREADputs("");
- SUBREADputs(" --DPMatch <int> a numeric value giving the score for matches used by");
- SUBREADputs(" the Smith-Waterman algorithm. 2 by default.");
+ SUBREADputs(" --DPGapOpen <int> Penalty for gap opening in short indel detection. -1 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs(" -v output version of the program.");
+ SUBREADputs(" --DPGapExt <int> Penalty for gap extension in short indel detection. 0 by");
+ SUBREADputs(" default.");
SUBREADputs("");
+ SUBREADputs(" --DPMismatch <int> Penalty for mismatches in short indel detection. 0 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs("Optional arguments for paired-end reads:");
+ SUBREADputs(" --DPMatch <int> Score for matched bases in short indel detection. 2 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs(" -R --read2 <input> name of the second input file. The program will then");
- SUBREADputs(" be switched to the paired-end read mapping mode.");
+ SUBREADputs(" --complexIndels Detect multiple short indels that occur concurrently in a");
+ SUBREADputs(" small genomic region (these indels could be as close as 1bp");
+ SUBREADputs(" apart).");
SUBREADputs("");
- SUBREADputs(" -p --minmatch2 <int> consensus threshold for the read which receives less");
- SUBREADputs(" votes than the other read from the same pair, 1 by");
- SUBREADputs(" default.");
+ SUBREADputs(" -v Output version of the program.");
SUBREADputs("");
- SUBREADputs(" -d --mindist <int> minimum fragment/template length, 50bp by default.");
+ SUBREADputs("Optional arguments for paired-end reads:");
SUBREADputs("");
- SUBREADputs(" -D --maxdist <int> maximum fragment/template length, 600bp by default.");
+ SUBREADputs(" -R <string> Name of the file including second reads.");
SUBREADputs("");
- SUBREADputs(" -S --order <ff:fr:rf> orientation of the two read from the same pair,");
- SUBREADputs(" 'fr' by default.");
+ SUBREADputs(" -p <int> Consensus threshold for the non-anchor read (receiving less");
+ SUBREADputs(" votes than the anchor read from the same pair). 1 by");
+ SUBREADputs(" default.");
SUBREADputs("");
+ SUBREADputs(" -d <int> Minimum fragment/insert length, 50bp by default.");
SUBREADputs("");
- SUBREADputs("Advanced arguments:");
+ SUBREADputs(" -D <int> Maximum fragment/insert length, 600bp by default.");
SUBREADputs("");
- SUBREADputs(" --minDistanceBetweenVariants <int> Minimum allowed distance between two");
- SUBREADputs(" neighboring variants (or junctions in RNA-seq data)");
- SUBREADputs(" within the same read. 16 by default. The value");
- SUBREADputs(" should be greater than 0 and less than the length");
- SUBREADputs(" of the read.");
+ SUBREADputs(" -S <ff:fr:rf> Orientation of first and second reads, 'fr' by default (");
+ SUBREADputs(" forward/reverse).");
SUBREADputs("");
- SUBREADputs("For more information about these arguments, please refer to the User Manual.");
+ SUBREADputs("Refer to Users Manual for detailed description to the arguments. ");
SUBREADputs("");
}
@@ -222,9 +190,11 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.max_mismatch_junction_reads = 3;
global_context->config.use_dynamic_programming_indel = 1;
+
// config.extending_search_indels is changed from 1 to 0 on 10/mar/2014
global_context->config.extending_search_indels = 0;
- global_context->config.big_margin_record_size = 9;
+ global_context->config.limited_tree_scan = 0 ;
+ //global_context->config.big_margin_record_size = 9;
if(argc<2)
{
@@ -271,8 +241,14 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
break;
case 'B':
global_context->config.multi_best_reads = atoi(optarg);
+
if(global_context->config.multi_best_reads<1)
global_context->config.multi_best_reads=1;
+
+ global_context->config.reported_multi_best_reads = global_context->config.multi_best_reads;
+
+ global_context->config.max_vote_combinations = max(global_context->config.max_vote_combinations, global_context->config.reported_multi_best_reads + 1);
+ global_context->config.max_vote_simples = max(global_context->config.max_vote_simples, global_context->config.reported_multi_best_reads + 1);
break;
case 'H':
global_context->config.use_hamming_distance_break_ties = 1;
@@ -313,7 +289,7 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
case 'T':
global_context->config.all_threads = atoi(optarg);
if(global_context->config.all_threads <1) global_context->config.all_threads = 1;
- if(global_context->config.all_threads >32) global_context->config.all_threads = 32;
+ if(global_context->config.all_threads > MAX_THREADS) global_context->config.all_threads = MAX_THREADS;
break;
case 'r':
@@ -375,7 +351,15 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
global_context->config.minimum_subread_for_second_read = atoi(optarg);
break;
case 't':
- sprintf(global_context->config.temp_file_prefix, "%s/core-temp-sum-%06u-%05u", optarg, getpid(), rand());
+ if(strcmp(optarg, "1") == 0){
+ global_context->config.experiment_type = CORE_EXPERIMENT_DNASEQ;
+ }else if(strcmp(optarg, "0") == 0){
+ global_context->config.experiment_type = CORE_EXPERIMENT_RNASEQ;
+ }else{
+ SUBREADprintf("Error: unknown experiment type:%s (only 0 and 1 are allowed)\n", optarg);
+ return -1;
+ }
+
break;
case 'F':
global_context->config.is_second_iteration_running = 0;
@@ -402,19 +386,15 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
strcat(global_context->config.read_group_txt, "\t");
strcat(global_context->config.read_group_txt, optarg);
}
- else if(strcmp("BAMoutput", long_options[option_index].name)==0)
+ else if(strcmp("SAMoutput", long_options[option_index].name)==0)
{
- global_context->config.is_BAM_output = 1;
+ global_context->config.is_BAM_output = 0;
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
global_context->config.is_BAM_input = 1;
global_context->config.is_SAM_file_input = 1;
}
- else if(strcmp("gzFASTQinput", long_options[option_index].name)==0)
- {
- global_context->config.is_gzip_fastq=1;
- }
else if(strcmp("extraColumns", long_options[option_index].name)==0)
{
global_context->config.SAM_extra_columns=1;
@@ -428,28 +408,53 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.report_multiple_best_in_pairs = 1;
}
+ else if(strcmp("fast", long_options[option_index].name)==0)
+ {
+ global_context -> config.fast_run = 1;
+ }
else if(strcmp("ignoreUnmapped", long_options[option_index].name)==0)
{
global_context->config.ignore_unmapped_reads = 1;
}
- else if(strcmp("reportFusions", long_options[option_index].name)==0)
+ else if(strcmp("sv", long_options[option_index].name)==0)
{
- global_context->config.is_rna_seq_reads = 1;
+ global_context->config.do_breakpoint_detection = 1;
global_context->config.do_fusion_detection = 1;
global_context->config.prefer_donor_receptor_junctions = 0;
- global_context->config.do_big_margin_filtering_for_reads = 1;
+ //global_context->config.do_big_margin_filtering_for_reads = 1;
}
- else if(strcmp("minDistanceBetweenVariants", long_options[option_index].name)==0)
+ else if(strcmp("minMappedLength", long_options[option_index].name)==0)
{
- int newdist = atoi(optarg);
- newdist = max(newdist, 1);
- newdist = min(newdist, MAX_READ_LENGTH);
- global_context->config.realignment_minimum_variant_distance = newdist;
+ global_context->config.min_mapped_fraction = atoi(optarg);
}
else if(strcmp("accurateFusions", long_options[option_index].name)==0)
{
global_context->config.more_accurate_fusions = 1;
}
+ else if(strcmp("SVdetection", long_options[option_index].name)==0)
+ {
+ global_context -> config.do_structural_variance_detection = 1;
+ }
+ else if(strcmp("maxRealignLocations", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_combinations = atoi(optarg);
+ global_context->config.multi_best_reads = atoi(optarg);
+ }
+ else if(strcmp("maxVoteSimples", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_simples = atoi(optarg);
+ }
+ else if(strcmp("complexIndels", long_options[option_index].name)==0)
+ {
+ global_context->config.maximise_sensitivity_indel = 1;
+ global_context->config.realignment_minimum_variant_distance = 1;
+ global_context->config.max_indel_length = 16;
+ }
+ else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_number_cutoff = atoi(optarg);
+ }
+
break;
case '?':
default:
@@ -463,9 +468,17 @@ int parse_opts_aligner(int argc , char ** argv, global_context_t * global_contex
if(global_context->config.more_accurate_fusions)
{
global_context->config.high_quality_base_threshold = 999999;
- global_context->config.max_mismatch_junction_reads = 0;
- global_context->config.do_big_margin_filtering_for_junctions = 1;
- global_context->config.total_subreads = 20;
+ //#warning "============ REMOVE THE NEXT LINE ======================"
+ global_context->config.show_soft_cliping = 1;
+ //#warning "============ REMOVE ' + 3' FROM NEXT LINE =============="
+ global_context->config.max_mismatch_junction_reads = 0 + 3;
+
+ //#warning "============ REMOVE ' - 1' FROM NEXT LINE =============="
+ global_context->config.do_big_margin_filtering_for_junctions = 1 - 1;
+ global_context->config.total_subreads = 28;
+
+ //#warning "============ REMOVE THE NEXT LINE BEFORE RELEASE ==============="
+ //global_context->config.multi_best_reads = 1;
}
if(global_context->config.is_SAM_file_input) global_context->config.phred_score_format = FASTQ_PHRED33;
@@ -488,7 +501,7 @@ int main_align(int argc , char ** argv)
{
#endif
-// printf("SIZE_OF_ALN=%d\n", sizeof(alignment_result_t));
+// printf("SIZE_OF_ALN=%d\n", sizeof(mapping_result_t));
// printf("SIZE_OF_VOT=%d\n", sizeof(voting_context_t));
return core_main(argc, argv, parse_opts_aligner);
}
diff --git a/src/core-interface-subjunc.c b/src/core-interface-subjunc.c
index 9343d24..b328449 100644
--- a/src/core-interface-subjunc.c
+++ b/src/core-interface-subjunc.c
@@ -33,14 +33,19 @@ static struct option long_options[] =
{"multi", required_argument, 0, 'B'},
{"rg", required_argument, 0, 0},
{"rg-id", required_argument, 0, 0},
+ {"gzFASTQinput", no_argument, 0, 0},
{"unique", no_argument, 0, 'u'},
- {"BAMoutput", no_argument, 0, 0},
+ {"SAMoutput", no_argument, 0, 0},
{"BAMinput", no_argument, 0, 0},
{"SAMinput", no_argument, 0, 0},
{"hamming", no_argument, 0, 'H'},
{"quality", no_argument, 0, 'Q'},
- {"dnaseq", no_argument, 0, 0},
- {"gzFASTQinput", no_argument, 0, 0},
+ {"fast", no_argument, 0, 0},
+ {"DPMismatch", required_argument, 0, 'X'},
+ {"DPMatch", required_argument, 0, 'Y'},
+ {"DPGapOpen", required_argument, 0, 'G'},
+ {"DPGapExt", required_argument, 0, 'E'},
+ {"extendIndelDetection", no_argument, 0, 0},
{"allJunctions", no_argument, 0, 0},
{"memoryMultiplex", required_argument, 0, 0},
{"ignoreUnmapped", no_argument, 0, 0},
@@ -50,7 +55,13 @@ static struct option long_options[] =
{"reportPairedMultiBest", no_argument, 0, 0},
{"maxMismatches", required_argument, 0, 'M'},
{"exonicSubreadFrac", required_argument, 0, 0},
- {"minDistanceBetweenVariants", required_argument, 0, 0},
+ {"SVdetection", no_argument, 0, 0},
+ {"maxVoteSimples", required_argument, 0, 0},
+ {"maxRealignLocations", required_argument, 0, 0},
+ {"minVoteCutoff", required_argument, 0, 0},
+ {"minMappedFraction", required_argument, 0, 0},
+ {"disableBigMargin", no_argument, 0, 0},
+ {"complexIndels", no_argument, 0, 0},
{0, 0, 0, 0}
};
@@ -64,140 +75,101 @@ void print_usage_core_subjunc()
SUBREADputs("");
SUBREADputs("Required arguments:");
SUBREADputs("");
- SUBREADputs(" -i --index <index> base name of the index.");
- SUBREADputs("");
- SUBREADputs(" -r --read <input> name of the input file(FASTQ/FASTA format by default");
- SUBREADputs(" . See below for more supported formats). Both base-");
- SUBREADputs(" space and color-space read data are supported. For");
- SUBREADputs(" paired-end reads, this gives the first read file");
- SUBREADputs(" and the other read file should be specified using");
- SUBREADputs(" the -R option.");
- SUBREADputs("");
- SUBREADputs("Optional general arguments:");
- SUBREADputs("");
- SUBREADputs(" -o --output <output> name of the output file(SAM format by default). If");
- SUBREADputs(" not provided, mapping results will be output to the");
- SUBREADputs(" standard output (stdout).");
- SUBREADputs("");
- SUBREADputs(" -n --subreads <int> number of selected subreads, 14 by default.");
- SUBREADputs("");
- SUBREADputs(" -m --minmatch <int> consensus threshold (minimal number of consensus");
- SUBREADputs(" subreads required) for reporting a hit. If paired-");
- SUBREADputs(" end read data are provided, this gives the consensus");
- SUBREADputs(" threshold for the read which receives more votes");
- SUBREADputs(" than the other read from the same pair. 1 by default");
- SUBREADputs("");
- SUBREADputs(" -T --threads <int> number of threads/CPUs used, 1 by default.");
- SUBREADputs("");
- SUBREADputs(" -I --indel <int> number of indels allowed, 5 by default. Indels of up");
- SUBREADputs(" to 200bp long can be detected.");
- SUBREADputs("");
- SUBREADputs(" -B --multi <int> Specify the maximal number of equally-best mapping");
- SUBREADputs(" locations allowed to be reported for each read. 1");
- SUBREADputs(" by default. Allowed values are between 1 to 16");
- SUBREADputs(" (inclusive). 'NH' tag is used to indicate how many");
- SUBREADputs(" alignments are reported for the read and 'HI' tag");
- SUBREADputs(" is used for numbering the alignments reported for");
- SUBREADputs(" the same read, in the output. Note that -u option");
- SUBREADputs(" takes precedence over -B.");
- SUBREADputs("");
- SUBREADputs(" -P --phred <3:6> the format of Phred scores used in input files, '3'");
- SUBREADputs(" for phred+33 and '6' for phred+64. '3' by default.");
- SUBREADputs("");
- SUBREADputs(" -u --unique only uniquely mapped reads will be reported (reads");
- SUBREADputs(" mapped to multiple locations in the reference genome");
- SUBREADputs(" will not be reported). This option can be used");
- SUBREADputs(" together with option '-H' or '-Q'.");
- SUBREADputs("");
- SUBREADputs(" -Q --quality using mapping quality scores to break ties when more");
- SUBREADputs(" than one best mapping location is found.");
- SUBREADputs("");
- SUBREADputs(" -H --hamming using Hamming distance to break ties when more than");
- SUBREADputs(" one best mapping location is found.");
- SUBREADputs("");
- SUBREADputs(" -b --color-convert convert color-space read bases to base-space read");
- SUBREADputs(" bases in the mapping output. Note that the mapping");
- SUBREADputs(" itself will still be performed at color-space.");
- SUBREADputs(" ");
- SUBREADputs(" -M --maxMismatches <int> Specify the maximum number of mis-matched bases");
- SUBREADputs(" allowed in the alignment. 3 by default. Mis-matches");
- SUBREADputs(" found in soft-clipped bases are not counted.");
- SUBREADputs(" ");
-// SUBREADputs(" --disableBigMargin disable big margin for calling junctions.");
-// SUBREADputs(" ");
- SUBREADputs(" --dnaseq specify that the input read data are genomic DNA");
- SUBREADputs(" sequencing data. When specified, the program will");
- SUBREADputs(" perform read alignments and also detect fusion");
- SUBREADputs(" events such as chimeras. Discovered fusions will");
- SUBREADputs(" be saved to a file (*.fusions.txt). Detailed");
- SUBREADputs(" mapping results for fusion reads will be saved to");
- SUBREADputs(" the SAM/BAM output file as well. Secondary");
- SUBREADputs(" alignments of fusion reads will be saved to the");
- SUBREADputs(" following optional fields: CC(Chr), CP(Position),");
- SUBREADputs(" CG(CIGAR) and CT(strand). Note that each fusion");
- SUBREADputs(" read occupies only one row in the SAM/BAM output");
- SUBREADputs(" file.");
- SUBREADputs(" ");
- SUBREADputs(" --allJunctions this option should only be used for RNA-seq data.");
- SUBREADputs(" If specified, the program will report non-canonical");
- SUBREADputs(" exon-exon junctions and fusions (eg. chimeras), in");
- SUBREADputs(" addition to canonical exon-exon junctions. Non-");
- SUBREADputs(" canonical junctions and fusions are reported in the");
- SUBREADputs(" same format as that in `--dnaseq' option.");
- SUBREADputs(" ");
- SUBREADputs(" --trim5 <int> trim off <int> number of bases from 5' end of each");
- SUBREADputs(" read. 0 by default.");
- SUBREADputs("");
- SUBREADputs(" --trim3 <int> trim off <int> number of bases from 3' end of each");
- SUBREADputs(" read. 0 by default.");
- SUBREADputs("");
- SUBREADputs(" --rg-id <string> specify the read group ID. If specified,the read");
- SUBREADputs(" group ID will be added to the read group header");
- SUBREADputs(" field and also to each read in the mapping output.");
- SUBREADputs("");
- SUBREADputs(" --rg <string> add a <tag:value> to the read group (RG) header in");
- SUBREADputs(" in the mapping output.");
- SUBREADputs("");
- SUBREADputs(" --gzFASTQinput specify that the input read data is in gzipped");
- SUBREADputs(" FASTQ/FASTA format.");
- SUBREADputs("");
- SUBREADputs(" --SAMinput specify that the input read data is in SAM format.");
- SUBREADputs("");
- SUBREADputs(" --BAMinput specify that the input read data is in BAM format.");
- SUBREADputs("");
- SUBREADputs(" --BAMoutput specify that mapping results are saved into a BAM");
- SUBREADputs(" format file.");
- SUBREADputs("");
- SUBREADputs(" -v output version of the program.");
+ SUBREADputs(" -i <index> Base name of the index.");
SUBREADputs("");
+ SUBREADputs(" -r <string> Name of the input file. Input formats including gzipped");
+ SUBREADputs(" fastq, fastq, and fasta can be automatically detected. If");
+ SUBREADputs(" paired-end, this should give the name of file including");
+ SUBREADputs(" first reads.");
SUBREADputs("");
- SUBREADputs("Optional arguments for paired-end reads:");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Name of the output file. By default, the output is in BAM");
+ SUBREADputs(" format.");
+ SUBREADputs("");
+ SUBREADputs(" -n <int> Number of selected subreads, 14 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -m <int> Consensus threshold for reporting a hit (minimal number of");
+ SUBREADputs(" subreads that map in consensus) . If paired-end, this gives");
+ SUBREADputs(" the consensus threshold for the anchor read. 1 by default");
+ SUBREADputs("");
+ SUBREADputs(" -T <int> Number of CPU threads used, 1 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -I <int> Maximum length (in bp) of indels that can be detected. 5 by");
+ SUBREADputs(" default. The program can detect indels of up to 200bp long.");
+ SUBREADputs("");
+ SUBREADputs(" -B <int> Maximal number of equally-best mapping locations to be");
+ SUBREADputs(" reported. 1 by default. Note that -u option takes precedence");
+ SUBREADputs(" over -B.");
+ SUBREADputs("");
+ SUBREADputs(" -P <3:6> Format of Phred scores used in input files, '3' for phred+33");
+ SUBREADputs(" and '6' for phred+64. '3' by default.");
+ SUBREADputs("");
+ SUBREADputs(" -u Report uniquely mapped reads only. Number of mis-matched");
+ SUBREADputs(" bases is used to break the tie.");
+ SUBREADputs("");
+ SUBREADputs(" -b Convert color-space read bases to base-space read bases in");
+ SUBREADputs(" the mapping output. Note that read mapping is performed at");
+ SUBREADputs(" color-space.");
+ SUBREADputs("");
+ SUBREADputs(" -M <int> Specify the maximum number of mis-matched bases allowed in");
+ SUBREADputs(" the alignment. 3 by default. Mis-matches found in soft-");
+ SUBREADputs(" clipped bases are not counted.");
+ SUBREADputs("");
+ SUBREADputs(" --SAMinput Input reads are in SAM format.");
+ SUBREADputs("");
+ SUBREADputs(" --BAMinput Input reads are in BAM format.");
SUBREADputs("");
- SUBREADputs(" -R --read2 <input> name of the second input file from paired-end data. ");
- SUBREADputs(" The program will then be switched to paired-end read");
- SUBREADputs(" mapping mode.");
+ SUBREADputs(" --SAMoutput Save mapping result in SAM format.");
SUBREADputs("");
- SUBREADputs(" -p --minmatch2 <int> consensus threshold for the read which receives less");
- SUBREADputs(" votes than the other read from the same pair, 1 by");
- SUBREADputs(" default.");
+ SUBREADputs(" --trim5 <int> Trim off <int> number of bases from 5' end of each read. 0");
+ SUBREADputs(" by default.");
SUBREADputs("");
- SUBREADputs(" -d --mindist <int> minimum fragment/template length, 50bp by default.");
+ SUBREADputs(" --trim3 <int> Trim off <int> number of bases from 3' end of each read. 0");
+ SUBREADputs(" by default.");
SUBREADputs("");
- SUBREADputs(" -D --maxdist <int> maximum fragment/template length, 600bp by default.");
+ SUBREADputs(" --rg-id <string> Add read group ID to the output.");
SUBREADputs("");
- SUBREADputs(" -S --order <ff:fr:rf> specifying if the first/second reads are forward");
- SUBREADputs(" or reversed, 'fr' by default");
+ SUBREADputs(" --rg <string> Add <tag:value> to the read group (RG) header in the output.");
SUBREADputs("");
+ SUBREADputs(" --DPGapOpen <int> Penalty for gap opening in short indel detection. -1 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs("Advanced arguments:");
+ SUBREADputs(" --DPGapExt <int> Penalty for gap extension in short indel detection. 0 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs(" --minDistanceBetweenVariants <int> Minimum allowed distance between two");
- SUBREADputs(" neighboring variants (or junctions in RNA-seq data)");
- SUBREADputs(" within the same read. 16 by default. The value");
- SUBREADputs(" should be greater than 0 and less than the length");
- SUBREADputs(" of the read.");
+ SUBREADputs(" --DPMismatch <int> Penalty for mismatches in short indel detection. 0 by");
+ SUBREADputs(" default.");
SUBREADputs("");
- SUBREADputs("For more information about these arguments, please refer to the User Manual.");
+ SUBREADputs(" --DPMatch <int> Score for matched bases in short indel detection. 2 by");
+ SUBREADputs(" default.");
+ SUBREADputs("");
+ SUBREADputs(" --allJunctions Detect exon-exon junctions (both canonical and non-canonical");
+ SUBREADputs(" junctions) and structural variants in RNA-seq data. Refer to");
+ SUBREADputs(" Users Guide for reporting of junctions and fusions.");
+ SUBREADputs("");
+ SUBREADputs(" --complexIndels Detect multiple short indels that occur concurrently in a");
+ SUBREADputs(" small genomic region (these indels could be as close as 1bp");
+ SUBREADputs(" apart).");
+ SUBREADputs("");
+ SUBREADputs(" -v Output version of the program.");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments for paired-end reads:");
+ SUBREADputs("");
+ SUBREADputs(" -R <string> Name of the file including second reads.");
+ SUBREADputs("");
+ SUBREADputs(" -p <int> Consensus threshold for the non-anchor read (receiving less");
+ SUBREADputs(" votes than the anchor read from the same pair). 1 by");
+ SUBREADputs(" default.");
+ SUBREADputs("");
+ SUBREADputs(" -d <int> Minimum fragment/insert length, 50bp by default.");
+ SUBREADputs("");
+ SUBREADputs(" -D <int> Maximum fragment/insert length, 600bp by default.");
+ SUBREADputs("");
+ SUBREADputs(" -S <ff:fr:rf> Orientation of first and second reads, 'fr' by default (");
+ SUBREADputs(" forward/reverse).");
+ SUBREADputs("");
+ SUBREADputs("Refer to Users Manual for detailed description to the arguments.");
SUBREADputs("");
@@ -219,9 +191,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.ambiguous_mapping_tolerance = 39 - 20 ;
global_context->config.extending_search_indels = 0;
global_context->config.do_fusion_detection =0;
- global_context->config.use_dynamic_programming_indel = 0;
+ global_context->config.use_dynamic_programming_indel = 1;
- global_context->config.is_rna_seq_reads = 1;
+ global_context->config.do_breakpoint_detection = 1;
global_context->config.total_subreads = 14;
global_context->config.minimum_subread_for_first_read =1;
global_context->config.minimum_subread_for_second_read = 1;
@@ -229,23 +201,37 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.high_quality_base_threshold = 990000;
global_context->config.do_big_margin_filtering_for_junctions = 1;
global_context->config.report_no_unpaired_reads = 0;
+ global_context->config.experiment_type = CORE_EXPERIMENT_RNASEQ;
+ //#warning " ========================= REMOVE ' + 1 ' FROM THE NEXT LINE !! =========================="
global_context->config.limited_tree_scan = 0;
global_context->config.use_hamming_distance_in_exon = 0;
- global_context->config.big_margin_record_size = 24;
+ //global_context->config.big_margin_record_size = 24;
if(argc<2)
{
print_usage_core_subjunc();
return -1;
}
- while ((c = getopt_long (argc, argv, "vExsJ1:2:S:L:AHd:D:n:m:p:P:R:r:i:l:o:G:T:I:t:B:bQFcuUfM:3:5:9:?", long_options, &option_index)) != -1)
+ while ((c = getopt_long (argc, argv, "vxsJ1:2:S:L:AHd:D:n:m:p:P:R:r:i:l:o:G:Y:E:X:T:I:B:bQFcuUfM:3:5:9:?", long_options, &option_index)) != -1)
{
switch(c)
{
case 'v':
core_version_number("Subjunc");
return -1;
+ case 'G':
+ global_context->config.DP_penalty_create_gap = atoi(optarg);
+ break;
+ case 'Y':
+ global_context->config.DP_match_score = atoi(optarg);
+ break;
+ case 'E':
+ global_context->config.DP_penalty_extend_gap = atoi(optarg);
+ break;
+ case 'X':
+ global_context->config.DP_mismatch_penalty = atoi(optarg);
+ break;
case '3':
global_context->config.read_trim_3 = atoi(optarg);
break;
@@ -259,7 +245,7 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
strncpy(global_context->config.first_read_file, optarg, MAX_FILE_NAME_LENGTH-1);
break;
case 'J':
- global_context->config.show_soft_cliping = 1;
+ global_context->config.show_soft_cliping = 0;
break;
case 'Q':
global_context->config.use_quality_score_break_ties = 1;
@@ -273,11 +259,6 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
case 'A':
global_context->config.report_sam_file = 0;
break;
- case 'E':
- global_context->config.max_mismatch_exonic_reads = 200;
- global_context->config.max_mismatch_junction_reads = 200;
-
- break;
case 'S':
global_context->config.is_first_read_reversed = optarg[0]=='r'?1:0;
global_context->config.is_second_read_reversed = optarg[1]=='f'?0:1;
@@ -310,7 +291,7 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
case 'T':
global_context->config.all_threads = atoi(optarg);
if(global_context->config.all_threads <1) global_context->config.all_threads = 1;
- if(global_context->config.all_threads >32) global_context->config.all_threads = 32;
+ if(global_context->config.all_threads > MAX_THREADS) global_context->config.all_threads = MAX_THREADS;
break;
case 'M':
@@ -363,20 +344,21 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
case 'p':
global_context->config.minimum_subread_for_second_read = atoi(optarg);
break;
- case 't':
- sprintf(global_context->config.temp_file_prefix, "%s/core-temp-sum-%06u-%05u", optarg, getpid(), rand());
- break;
case 'F':
global_context->config.is_second_iteration_running = 0;
global_context->config.report_sam_file = 0;
break;
case 'B':
- //global_context->config.is_first_iteration_running = 0;
- //strcpy(global_context->config.medium_result_prefix, optarg);
global_context->config.multi_best_reads = atoi(optarg);
+
if(global_context->config.multi_best_reads<1)
global_context->config.multi_best_reads=1;
+ global_context->config.reported_multi_best_reads = global_context->config.multi_best_reads;
+
+ global_context->config.max_vote_combinations = max(global_context->config.max_vote_combinations, global_context->config.reported_multi_best_reads + 1);
+ global_context->config.max_vote_simples = max(global_context->config.max_vote_simples, global_context->config.reported_multi_best_reads + 1);
+
break;
case 'c':
global_context->config.space_type = GENE_SPACE_COLOR;
@@ -400,9 +382,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
strcat(global_context->config.read_group_txt, "\t");
strcat(global_context->config.read_group_txt, optarg);
}
- else if(strcmp("BAMoutput", long_options[option_index].name)==0)
+ else if(strcmp("SAMoutput", long_options[option_index].name)==0)
{
- global_context->config.is_BAM_output = 1;
+ global_context->config.is_BAM_output = 0;
}
else if(strcmp("BAMinput", long_options[option_index].name)==0)
{
@@ -424,9 +406,9 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
{
global_context->config.SAM_extra_columns=1;
}
- else if(strcmp("gzFASTQinput", long_options[option_index].name)==0)
+ else if(strcmp("minMappedFraction", long_options[option_index].name)==0)
{
- global_context->config.is_gzip_fastq=1;
+ global_context->config.min_mapped_fraction = atoi(optarg);
}
else if(strcmp("relaxMismatchedBases", long_options[option_index].name)==0)
{
@@ -444,7 +426,15 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.minimum_exonic_subread_fraction = atof(optarg);
else SUBREADprintf("WARNING: unknown parameter: --exonicSubreadFrac '%s'\n", optarg);
}
- else if(strcmp("minDistanceBetweenVariants", long_options[option_index].name)==0)
+ else if(strcmp("fast", long_options[option_index].name)==0)
+ {
+ global_context -> config.fast_run = 1;
+ }
+ else if(strcmp("SVdetection", long_options[option_index].name)==0)
+ {
+ global_context -> config.do_structural_variance_detection = 1;
+ }
+ else if(strcmp("minDistanceBetweenVariants", long_options[option_index].name)==0)
{
int newdist = atoi(optarg);
newdist = max(newdist, 1);
@@ -456,20 +446,36 @@ int parse_opts_subjunc(int argc , char ** argv, global_context_t * global_contex
global_context->config.do_big_margin_filtering_for_junctions = 0;
global_context->config.limited_tree_scan = 0;
}
- else if(strcmp("dnaseq", long_options[option_index].name)==0 || strcmp("allJunctions", long_options[option_index].name)==0)
+ else if(strcmp("maxVoteSimples", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_simples = atoi(optarg);
+ }
+ else if(strcmp("maxRealignLocations", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_combinations = atoi(optarg);
+ global_context->config.multi_best_reads = atoi(optarg);
+ }
+ else if(strcmp("complexIndels", long_options[option_index].name)==0)
+ {
+ global_context->config.maximise_sensitivity_indel = 1;
+ global_context->config.realignment_minimum_variant_distance = 1;
+ global_context->config.max_indel_length = 16;
+ }
+ else if(strcmp("disableBigMargin", long_options[option_index].name)==0)
+ {
+ global_context->config.big_margin_record_size = 0;
+ }
+ else if(strcmp("extendIndelDetection", long_options[option_index].name)==0)
+ {
+ global_context->config.extending_search_indels = 1;
+ }
+ else if(strcmp("minVoteCutoff", long_options[option_index].name)==0)
+ {
+ global_context->config.max_vote_number_cutoff = atoi(optarg);
+ }
+ else if(strcmp("allJunctions", long_options[option_index].name)==0)
{
global_context->config.do_fusion_detection = 1;
- if(strcmp("dnaseq", long_options[option_index].name)==0)
- global_context->config.prefer_donor_receptor_junctions = 0;
- //global_context->config.report_multi_mapping_reads = 1 ;
- global_context->config.limited_tree_scan = 1 ;
-
- // To maximise sensitivity of junction detection:
- // 1, Disable big margin for junctions.
- // 2, Disable limited tree scan.
- // 3, Disable neighbour removal.
- // 4, Disable big margin >=2 for minor locations in process_votes (searcg iii or jjj in core-junctions.c).
- // 5, Take a look at the flanking region length cutoff (>= 20% of the read length in new_junction in core-junctions.c, search 0.8000)
}
break;
diff --git a/src/core-junction-V2.c b/src/core-junction-V2.c
deleted file mode 100644
index c6f7c63..0000000
--- a/src/core-junction-V2.c
+++ /dev/null
@@ -1,2922 +0,0 @@
-/***************************************************************
-
- The Subread software package is free software package:
- you can redistribute it and/or modify it under the terms
- of the GNU General Public License as published by the
- Free Software Foundation, either version 3 of the License,
- or (at your option) any later version.
-
- Subread is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty
- of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-
- See the GNU General Public License for more details.
-
- Authors: Drs Yang Liao and Wei Shi
-
- ***************************************************************/
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <assert.h>
-#include "subread.h"
-#include "sublog.h"
-#include "gene-value-index.h"
-#include "gene-algorithms.h"
-#include "input-files.h"
-#include "core.h"
-#include "core-indel.h"
-#include "core-junction.h"
-
-int localPointerCmp_forbed(const void *pointer1, const void *pointer2)
-{
- paired_exon_key *p1 = (paired_exon_key *)pointer1;
- paired_exon_key *p2 = (paired_exon_key *)pointer2;
- return !((p1-> big_key == p2 -> big_key) && (p2-> small_key == p1-> small_key));
-}
-
-unsigned long localPointerHashFunction_forbed(const void *pointer)
-{
- paired_exon_key *p = (paired_exon_key *)pointer;
- return p-> big_key ^ p-> small_key ^ (p->big_key>> 15);
-}
-
-int localPointerCmp_forpos(const void *pointer1, const void *pointer2)
-{
- return pointer1 != pointer2;
-}
-
-unsigned long localPointerHashFunction_forpos(const void *pointer)
-{
-
- return (unsigned long) pointer & 0xffffffff;
-}
-
-
-typedef struct{
- unsigned int piece_main_abs_offset;
- unsigned int piece_minor_abs_offset;
- int piece_main_masks;
- short piece_main_coverage_start;
- short piece_main_coverage_end;
-
- short piece_main_hamming_match;
- short piece_main_read_quality;
- short piece_minor_hamming_match;
- short piece_minor_read_quality;
- short intron_length;
-
- char *piece_main_indel_record;
- short piece_main_indels;
- short piece_minor_indel_offset;
- unsigned char piece_main_votes;
- unsigned char piece_minor_votes;
-
- short piece_minor_coverage_start;
- short piece_minor_coverage_end;
- short split_point;
- char is_GT_AG_donors;
- char is_donor_found;
- char is_strand_jumped;
-
- unsigned long long int Score_H;
- unsigned int Score_L;
-} select_junction_record_t;
-
-
-// read_head_abs_pos is the offset of the FIRST WANTED base.
-void search_events_to_front(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_head_abs_offset, short remainder_len, short sofar_matched)
-{
- short tested_read_pos;
-
- HashTable * event_table = NULL;
- chromosome_event_t * event_space = NULL;
-
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
-
- if(thread_context)
- {
- event_table = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
- else
- {
- event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
-
-
- int event_search_method;
- if(global_context -> config.do_fusion_detection)
- event_search_method = EVENT_SEARCH_BY_BOTH_SIDES;
- else
- event_search_method = EVENT_SEARCH_BY_SMALL_SIDE;
-
- // tested_read_pos is the index of the first base unwanted!
- if(MAX_EVENTS_IN_READ - 1> explain_context -> tmp_search_sections)
- for(tested_read_pos = 1 + 15 ; tested_read_pos <= remainder_len; tested_read_pos++)
- {
- int xk1, matched_bases_to_site;
- chromosome_event_t *site_events[MAX_EVENT_ENTRIES_PER_SITE+1];
-
- int jump_penalty = 0;
-
- unsigned potential_event_pos;
- if(explain_context -> current_is_strand_jumped)
- potential_event_pos = read_head_abs_offset - tested_read_pos +1;
- else
- potential_event_pos = read_head_abs_offset + tested_read_pos -1;
- int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION , site_events);
- /*if(memcmp(explain_context->read_name, "HKJMKOB02G7RDV",14) == 0)
- {
- printf("FOUND THE EVENT FRONT:%d at %u\n", site_events_no, potential_event_pos);
- if(site_events_no)
- printf("EVENT0_type = %d\n", site_events[0]->event_type);
- }*/
-
- //if(explain_context -> pair_number==2074) printf("FF OFFSET=%d; LEDGE=%u; FOUND=%d\n", tested_read_pos, potential_event_pos, site_events_no);
- if(!site_events_no)continue;
-
- unsigned int tested_chro_begin;
- if(explain_context -> current_is_strand_jumped)
- tested_chro_begin = read_head_abs_offset - tested_read_pos + 1;
- else
- tested_chro_begin = read_head_abs_offset;
-
- matched_bases_to_site = match_chro(read_text, value_index, tested_chro_begin , tested_read_pos, explain_context -> current_is_strand_jumped, global_context -> config.space_type);
-
-
- //if(memcmp(explain_context->read_name, "HKJMKOB02G7RDV",14) == 0)
- // printf("JUMP?%d > %d\n", (1+matched_bases_to_site)*10000 / tested_read_pos , 9000);
-
- if((1+matched_bases_to_site)*10000/tested_read_pos > 9000)
- for(xk1 = 0; xk1 < site_events_no ; xk1++)
- {
- chromosome_event_t * tested_event = site_events[xk1];
-
- // note that these two values are the index of the first wanted base.
- unsigned int new_read_head_abs_offset;
-
- if(global_context -> config.do_fusion_detection && tested_event->event_type == CHRO_EVENT_TYPE_FUSION)
- new_read_head_abs_offset = (potential_event_pos == tested_event -> event_large_side)?tested_event -> event_small_side:tested_event -> event_large_side;
- else
- new_read_head_abs_offset = tested_event -> event_large_side;
-
-
- short new_remainder_len = remainder_len - tested_read_pos + min(0, tested_event->indel_length);
-
- //int is_ambiguous = tested_event -> is_ambiguous;
-
- if(new_remainder_len>0)// && (new_remainder_len>8 || !is_ambiguous))
- {
- //if(explain_context -> pair_number==2074) printf("JUMPPED IN!\n");
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_end = explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start + tested_read_pos;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].event_after_section = tested_event;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].is_connected_to_large_side = (potential_event_pos == tested_event -> event_large_side);
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_start = tested_read_pos - min(0, tested_event -> indel_length);
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].abs_offset_for_start = new_read_head_abs_offset;
- explain_context -> tmp_jump_length += (tested_event->event_large_side - tested_event->event_small_side);
-
- //if(tested_event->event_type == CHRO_EVENT_TYPE_FUSION) jump_penalty = 1;
- //else if(tested_event->event_type == CHRO_EVENT_TYPE_JUNCTION) jump_penalty = 1;
-
- int current_is_jumped = explain_context -> current_is_strand_jumped ;
- if(tested_event -> event_type == CHRO_EVENT_TYPE_FUSION && tested_event -> is_strand_jumped)
- explain_context -> current_is_strand_jumped = !explain_context -> current_is_strand_jumped;
-
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].is_strand_jumped = explain_context -> current_is_strand_jumped;
-
- explain_context -> tmp_search_sections ++;
- search_events_to_front(global_context, thread_context, explain_context, read_text + tested_read_pos - min(0, tested_event->indel_length), qual_text + tested_read_pos - min(0, tested_event->indel_length), new_read_head_abs_offset, new_remainder_len, sofar_matched + matched_bases_to_site - jump_penalty);
- explain_context -> tmp_search_sections --;
-
- explain_context -> current_is_strand_jumped = current_is_jumped;
- explain_context -> tmp_jump_length -= (tested_event->event_large_side - tested_event->event_small_side);
- }
- //if(global_context ->config.limited_tree_scan) break;
- }
- if(global_context ->config.limited_tree_scan && explain_context -> full_read_len <= EXON_LONG_READ_LENGTH) break;
- }
-
- int whole_section_matched = match_chro(read_text , value_index, explain_context -> current_is_strand_jumped?read_head_abs_offset - remainder_len +1:read_head_abs_offset, remainder_len , explain_context -> current_is_strand_jumped, global_context -> config.space_type);
-
- if(whole_section_matched + sofar_matched > explain_context -> best_matching_bases|| (whole_section_matched + sofar_matched == explain_context -> best_matching_bases && explain_context -> best_jump_length > explain_context -> tmp_jump_length))
- {
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_end = explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start + remainder_len;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].event_after_section = NULL;
-
- explain_context -> best_matching_bases = whole_section_matched + sofar_matched ;
- explain_context -> front_search_confirmed_sections = explain_context -> tmp_search_sections +1;
- explain_context -> best_jump_length = explain_context -> tmp_jump_length;
- memcpy(explain_context -> front_search_junctions, explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
- }
-}
-
-// read_tail_abs_offset is actually the offset of the base next to the last base in read tail.
-// read_tail_pos is the FIRST UNWANTED BASE, after the read.
-void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched)
-{
- short tested_read_pos;
-
- HashTable * event_table = NULL;
- chromosome_event_t * event_space = NULL;
-
- if(thread_context)
- {
- event_table = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
- else
- {
- event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
-
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
-
- int event_search_method;
- if(global_context -> config.do_fusion_detection)
- event_search_method = EVENT_SEARCH_BY_BOTH_SIDES;
- else
- event_search_method = EVENT_SEARCH_BY_LARGE_SIDE;
-
-
- // minimum perfect section length is 1
- // tested_read_pos is the first WANTED BASE in section.
- if(MAX_EVENTS_IN_READ - 1> explain_context -> tmp_search_sections)
- for(tested_read_pos = read_tail_pos - 1 - 15 ; tested_read_pos >=0;tested_read_pos --)
- {
- int xk1, matched_bases_to_site;
- int jump_penalty = 0;
- chromosome_event_t *site_events[MAX_EVENT_ENTRIES_PER_SITE];
-
- int potential_event_pos;
-
- if(explain_context -> current_is_strand_jumped)
- potential_event_pos = read_tail_abs_offset + ( read_tail_pos - tested_read_pos);
- else
- potential_event_pos = read_tail_abs_offset - ( read_tail_pos - tested_read_pos);
-
-
- int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION , site_events);
- //if(explain_context -> pair_number==2074) printf("BF OFFSET=%d; REDGE=%u; FOUND=%d\n", tested_read_pos, potential_event_pos, site_events_no);
-
-
- /*if(memcmp(explain_context->read_name, "HKJMKOB02G7RDV",14) == 0)
- {
- printf("FOUND THE EVENT BACK:%d at %u\n", site_events_no, potential_event_pos);
- if(site_events_no)
- printf("EVENT0_type = %d\n", site_events[0]->event_type);
- }*/
-
- if(!site_events_no)continue;
-
- unsigned int tested_chro_begin;
- if(explain_context -> current_is_strand_jumped)
- tested_chro_begin = read_tail_abs_offset + 1;
- else
- tested_chro_begin = read_tail_abs_offset - (read_tail_pos - tested_read_pos);
-
- matched_bases_to_site = match_chro(read_text + tested_read_pos, value_index, tested_chro_begin , read_tail_pos - tested_read_pos, explain_context -> current_is_strand_jumped, global_context -> config.space_type);
-
- //if(memcmp(explain_context->read_name, "HKJMKOB02G7RDV",14) == 0)
- // printf("JUMP?%d > %d\n", (1+matched_bases_to_site)*10000 / (read_tail_pos - tested_read_pos) , 9000);
-
- if((1+matched_bases_to_site)*10000/(read_tail_pos - tested_read_pos) > 9000)
- for(xk1 = 0; xk1 < site_events_no ; xk1++)
- {
- chromosome_event_t * tested_event = site_events[xk1];
-
- // note that read_tail_pos is the first unwanted base.
- int new_read_tail_pos = tested_read_pos;
- if(tested_event->event_type == CHRO_EVENT_TYPE_INDEL) new_read_tail_pos += min(0, tested_event -> indel_length);
- // note that read_tail_abs_offset is the first unwanted base.
- unsigned int new_read_tail_abs_offset;
-
- if(global_context -> config.do_fusion_detection && tested_event->event_type == CHRO_EVENT_TYPE_FUSION)
- {
- new_read_tail_abs_offset = (potential_event_pos == tested_event -> event_small_side)? tested_event -> event_large_side : tested_event -> event_small_side;
- if(tested_event->is_strand_jumped + explain_context -> current_is_strand_jumped == 1)
- new_read_tail_abs_offset--;
- else
- new_read_tail_abs_offset++;
- }
- else
- new_read_tail_abs_offset = tested_event -> event_small_side + 1;
-
- //int is_ambiguous = tested_event -> is_ambiguous;
-
- if(new_read_tail_pos>0)// && (new_read_tail_pos>8 || !is_ambiguous))
- {
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start = tested_read_pos;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].event_after_section = tested_event;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].is_connected_to_large_side = (potential_event_pos == tested_event -> event_small_side);
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end = tested_read_pos + min(0, tested_event->indel_length);
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].abs_offset_for_start = new_read_tail_abs_offset;
- explain_context -> tmp_jump_length += (tested_event->event_large_side - tested_event->event_small_side);
-
- //if(tested_event->event_type == CHRO_EVENT_TYPE_FUSION) jump_penalty = 1;
- //else if(tested_event->event_type == CHRO_EVENT_TYPE_JUNCTION) jump_penalty = 1;
-
- int current_is_jumped = explain_context -> current_is_strand_jumped ;
- if(tested_event -> event_type == CHRO_EVENT_TYPE_FUSION && tested_event -> is_strand_jumped)
- explain_context -> current_is_strand_jumped = !explain_context -> current_is_strand_jumped;
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].is_strand_jumped = explain_context -> current_is_strand_jumped;
-
- explain_context -> tmp_search_sections ++;
-
- search_events_to_back(global_context, thread_context, explain_context, read_text , qual_text, new_read_tail_abs_offset , new_read_tail_pos, sofar_matched + matched_bases_to_site - jump_penalty);
- explain_context -> tmp_search_sections --;
-
- explain_context -> current_is_strand_jumped = current_is_jumped;
- explain_context -> tmp_jump_length -= (tested_event->event_large_side - tested_event->event_small_side);
-
- }
- //if(global_context ->config.limited_tree_scan) break;
- }
- if(global_context ->config.limited_tree_scan && explain_context -> full_read_len <= EXON_LONG_READ_LENGTH) break;
- }
-
- int whole_section_matched = match_chro(read_text , value_index, read_tail_abs_offset - (explain_context -> current_is_strand_jumped?-1:read_tail_pos), read_tail_pos , explain_context -> current_is_strand_jumped, global_context -> config.space_type);
-
- if(whole_section_matched + sofar_matched > explain_context -> best_matching_bases || (whole_section_matched + sofar_matched == explain_context -> best_matching_bases && explain_context -> best_jump_length > explain_context -> tmp_jump_length))
- {
- explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start = 0;
- explain_context -> best_matching_bases = whole_section_matched + sofar_matched ;
- explain_context -> back_search_confirmed_sections = explain_context -> tmp_search_sections +1;
- explain_context -> best_jump_length = explain_context -> tmp_jump_length;
- memcpy(explain_context -> back_search_junctions, explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
- }
-}
-
-int init_junction_tables(global_context_t * context)
-{
- return 0;
-}
-
-int destroy_junction_tables(global_context_t * context)
-{
- return 0;
-}
-int init_junction_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task)
-{
- return 0;
-}
-int finalise_junction_thread(global_context_t * global_context, thread_context_t * thread_context, int task)
-{
- return 0;
-}
-
-
-void insert_big_margin_record(global_context_t * global_context , unsigned char * big_margin_record, unsigned char votes, short read_pos_start, short read_pos_end, int read_len, int is_negative)
-{
- unsigned char read_pos_start_2 = (is_negative?read_len -read_pos_end:read_pos_start) ;
- unsigned char read_pos_end_2 = (is_negative?read_len -read_pos_start:read_pos_end);
- assert(votes>0);
-
- if(read_len>255)
- {
- read_pos_start_2>>=2;
- read_pos_end_2>>=2;
- }
-
- int xk1;
- for(xk1=0; xk1< global_context->config.big_margin_record_size / 3; xk1++)
- {
- if( votes >= big_margin_record[xk1*3])
- break;
- }
- if(xk1< global_context->config.big_margin_record_size / 3)
- {
- int xk2;
- for(xk2 = global_context->config.big_margin_record_size-4; xk2 >= xk1*3; xk2--)
- big_margin_record[xk2 + 3] = big_margin_record[xk2];
- big_margin_record[xk1*3+0] = votes;
- big_margin_record[xk1*3+1] = read_pos_start_2;
- big_margin_record[xk1*3+2] = read_pos_end_2;
- }
-}
-
-//#define voting_anchor_number 3
-void set_zero_votes(global_context_t * global_context, int pair_number, int is_second_read , int best_read_id)
-{
- if(best_read_id >= global_context->config.multi_best_reads) return;
- _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id)->selected_votes = 0;
-}
-
-
-void make_128bit_score(unsigned long long int * score_H, unsigned int * score_L, int is_paired_end, short Vote_Anchor_Major, short Vote_Anchor_Minor , short Vote_Second_Major, short Vote_Second_Minor, short Span , short HammingMatch,short Quality, unsigned int TLEM, int Intron)
-{
- ( * score_H) = 0LLU;
- ( * score_L) = 0;
-
- ( * score_H) += (is_paired_end?1LLU:0LLU)<<63;
- ( * score_H) += (1LLU*Vote_Anchor_Major&63)<<57;
- ( * score_H) += (1LLU*Vote_Anchor_Minor&31)<<52;
- ( * score_H) += (1LLU*Vote_Second_Major&63)<<46;
- ( * score_H) += (1LLU*Vote_Second_Minor&31)<<41;
-
- ( * score_H) += (1LLU*Span & 0xfff) << 29;
- ( * score_H) += (1LLU*HammingMatch & 0xfff) << 17;
- ( * score_H) += (1LLU*Quality & 0x1ff) << 8;
- ( * score_H) += 0xff & (TLEM >> 12);
-
-
- ( * score_L) += (TLEM & 0xfff) << 20;
- ( * score_L) += (Intron & 0xfffff);
-}
-
-int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, int pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand)
-{
- int i, j, kx1;
- int voting_anchor_number = global_context -> input_reads.is_paired_end_reads?10:global_context -> config.multi_best_reads;
-
- // each read nominates at most five anchors
- // the base combination of the two anchors is selected.
-
- select_junction_record_t read_1_anchors[voting_anchor_number];
- select_junction_record_t read_2_anchors[voting_anchor_number];
- int used_anchors_1=0, used_anchors_2=0, is_anchor_1_breakeven = 0, is_anchor_2_breakeven = 0;
- memset(read_1_anchors, 0, sizeof(select_junction_record_t)*voting_anchor_number);
- memset(read_2_anchors, 0, sizeof(select_junction_record_t)*voting_anchor_number);
-
- int is_second_read;
- int is_junction_found = 0;
- int all_max_votes = vote_1->max_vote;
- if(global_context -> input_reads.is_paired_end_reads)
- all_max_votes = max(vote_2->max_vote, all_max_votes);
-
-
- if(all_max_votes<global_context-> config.minimum_subread_for_first_read)
- return 0;
-
- for(is_second_read = 0; is_second_read < 1+global_context -> input_reads.is_paired_end_reads; is_second_read++)
- {
- gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
- int current_max_votes = current_vote -> max_vote;
- int total_used_anchors;
- select_junction_record_t * current_anchors = is_second_read?read_2_anchors:read_1_anchors;
-
- int curr_read_len = is_second_read?read_len_2:read_len_1;
- char * curr_read_text = is_second_read?read_text_2:read_text_1;
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
-
- // put main_piece to anchors.
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- {
- for (j=0; j< current_vote->items[i]; j++)
- {
- if(current_vote -> votes[i][j] >=current_max_votes)
- {
-
- int target_addr = 0;
- int is_break_even ;
- int hamming_match = 0, quality_score = 0;
-
- if(global_context -> config.use_hamming_distance_break_ties)
- hamming_match = match_chro_indel(curr_read_text, value_index , current_vote -> pos[i][j], curr_read_len, 0, global_context -> config.space_type, global_context -> config.max_indel_length, current_vote -> indel_recorder[i][j], global_context -> config.total_subreads);
- if(global_context -> config.use_quality_score_break_ties)
- quality_score = max(0,min(512,current_vote -> quality[i][j] / current_vote -> votes[i][j]-200));
-
- //printf("Q=%d\n", current_vote -> quality[i][j]);
-
- int main_piece_indels = 0;
-
- if(curr_read_len > EXON_LONG_READ_LENGTH){
- for(kx1=0; kx1<MAX_INDEL_SECTIONS; kx1++)
- {
- if(!current_vote -> indel_recorder[i][j][kx1*3]) break;
- main_piece_indels += (current_vote -> indel_recorder[i][j][kx1*3+2]);
- }
- }
-
- unsigned int test_score_L = 0;
- unsigned long long int test_score_H = 0;
- //int test_score = 20000000* current_vote -> votes[i][j] + this_extra_scores + (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) - 100 * (main_piece_indels);
-
- make_128bit_score(&test_score_H, &test_score_L, 0, current_vote -> votes[i][j], 0, 0, 0, (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) , hamming_match, quality_score, 0, 0);
-
- for(target_addr =0; target_addr<voting_anchor_number; target_addr++)
- if((current_anchors[target_addr].Score_H < test_score_H || (current_anchors[target_addr].Score_H == test_score_H && current_anchors[target_addr].Score_L < test_score_L ))|| ( current_vote -> pos[i][j] < current_anchors[target_addr].piece_main_abs_offset && current_anchors[target_addr].Score_H == test_score_H && current_anchors[target_addr].Score_L == test_score_L)) break;
-
- is_break_even = 0;
- if(current_anchors[0].Score_H == test_score_H && current_anchors[0].Score_L == test_score_L)
- is_break_even = 1;
- else if(current_anchors[0].Score_H < test_score_H || (current_anchors[0].Score_H == test_score_H && current_anchors[0].Score_L < test_score_L))
- {
- if(is_second_read) is_anchor_2_breakeven = 0;
- else is_anchor_1_breakeven = 0;
- }
-
- if(target_addr<voting_anchor_number-1)
- for(kx1=voting_anchor_number-1; kx1>target_addr; kx1--)
- memcpy(current_anchors+kx1, current_anchors+kx1-1, sizeof(select_junction_record_t));
-
- if(target_addr<voting_anchor_number)
- {
- memset(¤t_anchors[target_addr], 0, sizeof(select_junction_record_t));
- current_anchors[target_addr].piece_main_abs_offset = current_vote -> pos[i][j];
- current_anchors[target_addr].piece_main_coverage_start = current_vote -> coverage_start[i][j];
- current_anchors[target_addr].piece_main_coverage_end = current_vote -> coverage_end[i][j];
- current_anchors[target_addr].piece_main_votes = current_vote -> votes[i][j];
- current_anchors[target_addr].piece_main_indel_record = current_vote -> indel_recorder[i][j] ;
- current_anchors[target_addr].piece_main_indels = main_piece_indels;
- current_anchors[target_addr].piece_main_masks = current_vote -> masks[i][j];
- current_anchors[target_addr].piece_main_read_quality = quality_score;
- current_anchors[target_addr].piece_main_hamming_match = hamming_match;
-
- if(global_context -> config.use_hamming_distance_in_exon)
- {
- int found_indels , found_inde_pos;
-
- int matchingness_count = match_indel_chro_to_front(curr_read_text, value_index, current_vote -> pos[i][j] , curr_read_len, &found_indels, &found_inde_pos, global_context -> config.max_indel_length, 0);
-
- if(matchingness_count*1000 >= curr_read_len*800)
- {
- current_anchors[target_addr].piece_main_coverage_start = 1;
- current_anchors[target_addr].piece_main_coverage_end = curr_read_len-1;
- }
-
- }
- current_anchors[target_addr].Score_H = test_score_H;
- current_anchors[target_addr].Score_L = test_score_L;
- }
- if(is_break_even)
- {
- if(is_second_read) is_anchor_2_breakeven = 1;
- else is_anchor_1_breakeven = 1;
- }
- }
- if(current_vote -> votes[i][j] >=current_max_votes-2 && (global_context->config.do_big_margin_filtering_for_junctions || global_context->config.do_big_margin_filtering_for_reads || global_context->config.do_big_margin_reporting))
- insert_big_margin_record(global_context, _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read) ,current_vote -> votes[i][j], current_vote -> coverage_start[i][j], current_vote -> coverage_end[i][j], is_second_read?read_len_2:read_len_1, is_negative_strand);
- }
- }
-
- for(kx1=0; kx1<voting_anchor_number; kx1++)
- if(!current_anchors[kx1].piece_main_votes)break;
- total_used_anchors = kx1;
-
- if(is_second_read)
- used_anchors_2 = total_used_anchors;
- else
- used_anchors_1 = total_used_anchors;
-
- for(kx1=0; kx1<total_used_anchors; kx1++)
- {
- select_junction_record_t * current_anchor = ¤t_anchors[kx1];
- //if((current_anchors[kx1].piece_main_coverage_end - current_anchors[kx1].piece_main_coverage_start)*10000 > curr_read_len * 8000)continue;
-
- if(global_context->config.is_rna_seq_reads || global_context->config.do_fusion_detection)
- {
- unsigned int max_score_L = current_anchor ->Score_L;
- unsigned long long int max_score_H = current_anchor ->Score_H;
-
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for (j=0; j< current_vote->items[i]; j++)
- {
- if(current_vote -> pos[i][j] == current_anchor->piece_main_abs_offset) continue; // myself
- if(current_vote -> votes[i][j] > current_anchor->piece_main_votes) continue;
- if(current_vote -> votes[i][j] == current_anchor->piece_main_votes && current_vote -> pos[i][j] > current_anchor->piece_main_abs_offset) continue;
-
- long long int dist = current_vote -> pos[i][j];
- dist -= current_anchor->piece_main_abs_offset;
- int is_strand_jumped = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)!=(current_vote -> masks[i][j] & IS_NEGATIVE_STRAND);
-
- if(!global_context->config.do_fusion_detection)
- { // if it is junction detection, then remove long-distance halves and wrongly ordered halves.
- assert(!is_strand_jumped);
- if(abs(dist)> global_context->config.maximum_intron_length) continue;
- if(current_anchors[kx1].piece_main_coverage_start == current_vote -> coverage_start[i][j])continue;
- if(current_anchors[kx1].piece_main_coverage_end == current_vote -> coverage_end[i][j])continue;
-
- if(current_anchors[kx1].piece_main_coverage_start > current_vote -> coverage_start[i][j])
- {
- if(current_anchor->piece_main_abs_offset < current_vote -> pos[i][j])continue;
- }
- else
- {
- if(current_anchor->piece_main_abs_offset > current_vote -> pos[i][j])continue;
- }
- }
-
- int minor_hamming_match = 0;
- if(global_context -> config.use_hamming_distance_break_ties)
- minor_hamming_match = match_chro_indel(curr_read_text, value_index , current_vote -> pos[i][j], curr_read_len, 0, global_context -> config.space_type, global_context -> config.max_indel_length, current_vote -> indel_recorder[i][j], global_context -> config.total_subreads);
-
- int minor_read_quality = 0;
- if(global_context -> config.use_quality_score_break_ties)
- minor_read_quality = min(1000, current_vote -> quality[i][j] / current_vote -> votes[i][j]);
-
- unsigned long long int new_score_H = 0;
- unsigned int new_score_L = 0 ;
- make_128bit_score(&new_score_H, &new_score_L, 0, current_anchor->piece_main_votes , current_vote -> votes[i][j], 0, 0, (current_anchor->piece_main_coverage_end -current_anchor->piece_main_coverage_start) + (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) , current_anchors[kx1].piece_main_hamming_match + minor_hamming_match, current_anchors[kx1].piece_main_read_quality + minor_read_quality , 0 , 1024*1024-1-abs(dist));
-
- //new_score = current_anchors[kx1].piece_main_extra_scores + max(500000-abs(dist),0) + current_anchor -> piece_main_votes * 20000000 + current_vote -> votes[i][j] * 20000000 + (current_anchor->piece_main_coverage_end - current_anchor->piece_main_coverage_start) - 100 * (current_anchors[kx1].piece_main_indels);
-
- if(new_score_H > max_score_H||(new_score_H == max_score_H && new_score_L> max_score_L))
- {
- int final_split_point, is_GT_AG_donors, is_donor_found;
- int donors_found_score;
- int minor_indel_offset=0;
-
- if(is_strand_jumped)
- {
-
- // both guess_start and guess_end have to be translated to "reversed" read manner.
-
- int minor_cover_end_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_end[i][j]:(curr_read_len - current_vote -> coverage_start[i][j]);
- int minor_cover_start_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_start[i][j]:(curr_read_len - current_vote -> coverage_end[i][j]);
- int main_cover_end_as_reversed = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].piece_main_coverage_end:(curr_read_len - current_anchors[kx1].piece_main_coverage_start);
- int main_cover_start_as_reversed = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].piece_main_coverage_start:(curr_read_len - current_anchors[kx1].piece_main_coverage_end);
-
- // no long overlap
- int overlapped ;
- if(main_cover_start_as_reversed > minor_cover_start_as_reversed)
- overlapped = minor_cover_end_as_reversed - main_cover_start_as_reversed;
- else
- overlapped = main_cover_end_as_reversed - minor_cover_start_as_reversed;
-
- if(overlapped > 14) continue;
-
- int guess_start_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
- (minor_cover_end_as_reversed - 15): (main_cover_end_as_reversed - 15);
-
- int guess_end_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
- (main_cover_start_as_reversed + 15): (minor_cover_start_as_reversed + 15);
-
- int is_left_half_negative = 0 != ((current_anchor->piece_main_abs_offset>current_vote -> pos[i][j]?current_vote -> masks[i][j]:current_anchors[kx1].piece_main_masks)&IS_NEGATIVE_STRAND);
- int is_right_half_negative = !is_left_half_negative;
-
- int is_left_on_left_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed) + (current_anchor->piece_main_abs_offset > current_vote -> pos[i][j]) !=1;
-
- unsigned int left_half_abs_offset = min(current_vote -> pos[i][j],current_anchor->piece_main_abs_offset);
- unsigned int right_half_abs_offset = max(current_vote -> pos[i][j],current_anchor->piece_main_abs_offset);
-
- donors_found_score = donor_jumped_score(global_context, thread_context, left_half_abs_offset, right_half_abs_offset , max(0, guess_start_as_reversed) , min( guess_end_as_reversed, curr_read_len), curr_read_text, curr_read_len, is_left_half_negative, is_right_half_negative, is_left_on_left_as_reversed , is_second_read, & final_split_point, & is_GT_AG_donors, & is_donor_found);
- }
- else
- {
-
- char * chro_name_left, *chro_name_right;
- unsigned int chro_pos_left,chro_pos_right;
- // no long overlap
- int overlapped ;
- if(current_anchors[kx1].piece_main_coverage_start > current_vote -> coverage_start[i][j])
- overlapped = current_vote -> coverage_end[i][j] - current_anchors[kx1].piece_main_coverage_start;
- else
- overlapped = current_anchors[kx1].piece_main_coverage_end - current_vote -> coverage_start[i][j];
-
- if(overlapped > 14) continue;
- if(abs(dist)<6) continue;
- locate_gene_position( current_anchor->piece_main_abs_offset , &global_context -> chromosome_table, &chro_name_left, &chro_pos_left);
- locate_gene_position( current_vote -> pos[i][j] , &global_context -> chromosome_table, &chro_name_right, &chro_pos_right);
- if(chro_name_right!=chro_name_left) continue;
-
- int guess_start = (current_anchors[kx1].piece_main_coverage_start > current_vote -> coverage_start[i][j])?
- (current_vote -> coverage_end[i][j] - 15): (current_anchors[kx1].piece_main_coverage_end - 15);
-
- int guess_end = (current_anchors[kx1].piece_main_coverage_start < current_vote -> coverage_start[i][j])?
- (current_vote -> coverage_start[i][j] + 15): (current_anchors[kx1].piece_main_coverage_start + 15);
-
- if(global_context -> config.do_fusion_detection && !(current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND))
- // if for fusion, the current read must have been reversed.
- // hence, it is now changed to "main half" view.
- reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
-
- int normally_arranged = 1!=(current_anchors[kx1].piece_main_coverage_start > current_vote -> coverage_start[i][j]) + (current_anchor->piece_main_abs_offset > current_vote -> pos[i][j]);
- int left_indel_offset=0, right_indel_offset=0;
-
- int kx2;
- if(curr_read_len > EXON_LONG_READ_LENGTH){
- for(kx2=0; kx2<MAX_INDEL_SECTIONS; kx2++)
- {
- if(!current_vote -> indel_recorder[i][j][kx2*3]) break;
- minor_indel_offset += (current_vote -> indel_recorder[i][j][kx2*3+2]);
- }
- if(current_anchor->piece_main_abs_offset< current_vote -> pos[i][j])
- {
- left_indel_offset=current_anchor->piece_main_indels;
- right_indel_offset=minor_indel_offset;
- }
- else
- {
- right_indel_offset=current_anchor->piece_main_indels;
- left_indel_offset=minor_indel_offset;
-
- }
-
-
- // the section having a smaller coordinate will have indel_offset !=0
- // the section having a larger coordiname MUST HAVE indel_offset == 0
- right_indel_offset=0;
- }
-
- donors_found_score = donor_score(global_context, thread_context, min(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]),max(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]), left_indel_offset, right_indel_offset, normally_arranged , max(0, guess_start) , min( guess_end, curr_read_len), curr_read_text, curr_read_len, is_second_read, & final_split_point, & is_GT_AG_donors, & is_donor_found);
-
- if(global_context -> config.do_fusion_detection && !(current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND))
- // changed back.
- reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
-
- }
-
-
-//printf("MINORV=%d\tDONOR_FOUND=%d\n", current_vote -> votes[i][j], donors_found_score);
-
-
- if(donors_found_score)
- {
- if(global_context -> config.do_fusion_detection && (!current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND) && !is_strand_jumped)
- final_split_point = curr_read_len - final_split_point;
-
- current_anchors[kx1].piece_minor_abs_offset = current_vote -> pos[i][j];
- current_anchors[kx1].piece_minor_votes = current_vote -> votes[i][j];
- current_anchors[kx1].piece_minor_coverage_start = current_vote -> coverage_start[i][j];
- current_anchors[kx1].piece_minor_coverage_end = current_vote -> coverage_end[i][j];
- current_anchors[kx1].piece_minor_hamming_match = minor_hamming_match;
- current_anchors[kx1].piece_minor_read_quality = minor_read_quality;
- current_anchors[kx1].piece_minor_indel_offset = minor_indel_offset;
- current_anchors[kx1].intron_length = abs(dist);
- current_anchors[kx1].Score_H = new_score_H;
- current_anchors[kx1].Score_L = new_score_L;
- current_anchors[kx1].split_point = final_split_point;
- current_anchors[kx1].is_GT_AG_donors = is_GT_AG_donors;
- current_anchors[kx1].is_donor_found = is_donor_found;
- if(!is_donor_found)is_junction_found = is_donor_found;
- current_anchors[kx1].is_strand_jumped = is_strand_jumped ;
- max_score_H = new_score_H;
- max_score_L = new_score_L;
- }
- }
- }
- }
- if(current_anchors[kx1].is_strand_jumped)
- {
- // If "is_strand_jumped" is true, all coordinates so far are on the best voted strands (must be differnet strands, namely they're very likely to be overlapped).
- current_anchors[kx1].piece_minor_coverage_start = curr_read_len - current_anchors[kx1].piece_minor_coverage_end;
- current_anchors[kx1].piece_minor_coverage_end = curr_read_len - current_anchors[kx1].piece_minor_coverage_start;
-
- // Split_point is now the "negative strand read" view. It has to be changed to "main piece" view
- current_anchors[kx1].split_point = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].split_point:(curr_read_len-current_anchors[kx1].split_point);
- }
- }
- }
-
- int is_paired_end_selected = (global_context -> input_reads.is_paired_end_reads && is_result_in_PE( _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0) ));
- int best_read_id_r1 ;
- int best_read_id_r2 =0;
- for(best_read_id_r1=0; best_read_id_r1<global_context->config.multi_best_reads; best_read_id_r1++)
- if(_global_retrieve_alignment_ptr(global_context, pair_number, 0, best_read_id_r1)->selected_votes<1)break;
-
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
-
- for(best_read_id_r2=0; best_read_id_r2<global_context->config.multi_best_reads; best_read_id_r2++)
- if(_global_retrieve_alignment_ptr(global_context, pair_number, 1, best_read_id_r2)->selected_votes<1)break;
-
- for(i=0; i<used_anchors_1; i++)
- for(j=0; j<used_anchors_2; j++)
- {
- long long int dist;
- //int all_votes = read_1_anchors[i].piece_main_votes + read_1_anchors[i].piece_minor_votes + read_2_anchors[j].piece_main_votes + read_2_anchors[j].piece_minor_votes;
-
- dist = read_1_anchors[i].piece_main_abs_offset;
- dist -= read_2_anchors[j].piece_main_abs_offset;
-
- if(read_1_anchors[i].piece_main_abs_offset > read_2_anchors[j].piece_main_abs_offset) dist += read_len_1;
- else dist -= read_len_2;
-
- // the two ends of a segment must conform to the order.
-
- unsigned long long int new_score_H = 0;
- unsigned int new_score_L = 0;
-
- int SUM_COVERAGE = read_1_anchors[i].piece_minor_coverage_end - read_1_anchors[i].piece_minor_coverage_start +
- read_2_anchors[j].piece_minor_coverage_end - read_2_anchors[j].piece_minor_coverage_start +
- read_1_anchors[i].piece_main_coverage_end - read_1_anchors[i].piece_main_coverage_start +
- read_2_anchors[j].piece_main_coverage_end - read_2_anchors[j].piece_main_coverage_start ;
-
- int SUM_HAMMING = read_1_anchors[i].piece_main_hamming_match +
- read_1_anchors[i].piece_main_hamming_match +
- read_2_anchors[j].piece_minor_hamming_match +
- read_2_anchors[j].piece_minor_hamming_match ;
-
- int SUM_QUAL = read_1_anchors[i].piece_main_read_quality +
- read_1_anchors[i].piece_main_read_quality +
- read_2_anchors[j].piece_minor_read_quality +
- read_2_anchors[j].piece_minor_read_quality ;
-
- int SUM_OF_INTRONS = 1024*1024-1 - read_1_anchors[i].intron_length - read_2_anchors[j].intron_length;
- int dist_adjust = max(0, 1024*1024-1-abs(dist));
-
-
- int anchor_major_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_1_anchors[i].piece_main_votes :read_2_anchors[j].piece_main_votes;
- int anchor_minor_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_1_anchors[i].piece_minor_votes :read_2_anchors[j].piece_minor_votes;
- int second_major_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_2_anchors[j].piece_main_votes :read_1_anchors[i].piece_main_votes;
- int second_minor_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_2_anchors[j].piece_minor_votes :read_1_anchors[i].piece_minor_votes;
-
- make_128bit_score(&new_score_H, &new_score_L,1, anchor_major_votes, anchor_minor_votes, second_major_votes, second_minor_votes, SUM_COVERAGE , SUM_HAMMING , SUM_QUAL, dist_adjust , SUM_OF_INTRONS);
-
- //unsigned int new_score = dist_adjust + read_1_anchors[i].single_score + read_2_anchors[j].single_score - 100*(read_1_anchors[i].piece_main_indels + read_2_anchors[j].piece_main_indels)/2;
-
-
- if(global_context->config.is_rna_seq_reads && (read_1_anchors[i].piece_minor_votes || read_2_anchors[j].piece_minor_votes))
- {
- if(((dist < 0 && is_negative_strand) || (dist > 0 && !is_negative_strand)) && !global_context -> config.do_fusion_detection )
- continue;
- if(abs(dist) > global_context->config.maximum_pair_distance + 100000)
- continue;
- }
- else
- {
- if(((dist < 0 && is_negative_strand) || (dist > 0 && !is_negative_strand)) && !global_context -> config.do_fusion_detection )
- continue;
-
- if(abs(dist) > global_context->config.maximum_pair_distance || abs(dist) < global_context->config.minimum_pair_distance)
- continue;
- }
-
- alignment_result_t * alignment_1_best = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- alignment_result_t * alignment_2_best = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
-
-
- if(new_score_H > alignment_1_best -> Score_H || (new_score_H == alignment_1_best-> Score_H && new_score_L >= alignment_1_best-> Score_L))
- {
- if(new_score_H > alignment_1_best-> Score_H || new_score_L > alignment_1_best-> Score_L)
- {
- best_read_id_r1 = 0;
- best_read_id_r2 = 0;
-
-
- alignment_1_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- alignment_2_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- }
- else
- {
- //printf("SET_BE: %d ; S=%16llx+%16llX\n", pair_number, new_score_H, new_score_L);
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- alignment_2_best -> result_flags |= CORE_IS_BREAKEVEN;
- }
-
- int r1_used_subreads = max(vote_1 -> all_used_subreads, alignment_1_best->used_subreads_in_vote );
- int r2_used_subreads = max(vote_2 -> all_used_subreads, alignment_2_best->used_subreads_in_vote );
-
- set_alignment_result(global_context, pair_number, 0, best_read_id_r1, read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_1_anchors[i].piece_main_indel_record, read_1_anchors[i].piece_main_coverage_start, read_1_anchors[i].piece_main_coverage_end, 0!=(read_1_anchors[i].piece_main_masks & IS_NEGATIVE_STRAND), read_1_anchors[i].piece_minor_abs_offset, read_1_anchors[i].piece_minor_votes, read_1_anchors[i].piece_minor_coverage_start, read_1_anchors[i].pie [...]
- set_alignment_result(global_context, pair_number, 1, best_read_id_r2, read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes , read_2_anchors[j].piece_main_indel_record, read_2_anchors[j].piece_main_coverage_start, read_2_anchors[j].piece_main_coverage_end, 0!=(read_2_anchors[j].piece_main_masks & IS_NEGATIVE_STRAND), read_2_anchors[j].piece_minor_abs_offset, read_2_anchors[j].piece_minor_votes, read_2_anchors[j].piece_minor_coverage_start, read_2_anchors[j].pie [...]
-
- alignment_1_best -> Score_H = new_score_H;
- alignment_1_best -> Score_L = new_score_L;
- alignment_2_best -> Score_H = new_score_H;
- alignment_2_best -> Score_L = new_score_L;
-
- is_paired_end_selected = 1;
-
- best_read_id_r1 += 1;
- best_read_id_r2 += 1;
-
- set_zero_votes(global_context, pair_number,0 , best_read_id_r1);
- set_zero_votes(global_context, pair_number,1 , best_read_id_r2);
- }
- }
- }
-
- if(!is_paired_end_selected)
- {
- alignment_result_t * alignment_1_best = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- for(i=0; i<used_anchors_1; i++)
- {
- if((read_1_anchors[i].Score_H > alignment_1_best -> Score_H) || (read_1_anchors[i].Score_H == alignment_1_best -> Score_H && read_1_anchors[i].Score_L >= alignment_1_best -> Score_L))
- {
- if(read_1_anchors[i].Score_H > alignment_1_best -> Score_H || read_1_anchors[i].Score_L > alignment_1_best -> Score_L )
- {
- best_read_id_r1 = 0;
-
- if(is_anchor_1_breakeven)
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- else
- alignment_1_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- }
- else
- {
- if(read_1_anchors[i].piece_main_abs_offset > _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0)->selected_position && global_context->config.multi_best_reads == 1)
- best_read_id_r1 = 0;
-
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- }
-
- alignment_1_best -> Score_H = read_1_anchors[i].Score_H;
- alignment_1_best -> Score_L = read_1_anchors[i].Score_L;
-
- //printf("BEST_ID_R1=%d\n",best_read_id_r1);
-
- // TODO: add result at best_read_id_r1
- alignment_result_t * r1_result = _global_retrieve_alignment_ptr(global_context, pair_number, 0, best_read_id_r1);
- int r1_used_subreads = max(vote_1 -> all_used_subreads, r1_result->used_subreads_in_vote );
- set_alignment_result(global_context, pair_number, 0, best_read_id_r1, read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_1_anchors[i].piece_main_indel_record, read_1_anchors[i].piece_main_coverage_start, read_1_anchors[i].piece_main_coverage_end, 0!=(read_1_anchors[i].piece_main_masks & IS_NEGATIVE_STRAND), read_1_anchors[i].piece_minor_abs_offset, read_1_anchors[i].piece_minor_votes, read_1_anchors[i].piece_minor_coverage_start, read_1_anchors[i].piec [...]
-
- best_read_id_r1 += 1;
- set_zero_votes(global_context, pair_number,0 , best_read_id_r1);
- /*if(memcmp(read_name_1, "HKJMKOB02G7RDV",14) == 0){
-
- printf("MAX_VOTES=%d\t\tSTART_POS=%u\t\tMINOR_VOTES=%d\t\tMINOR_POS=%u\n", read_1_anchors[i].piece_main_votes, read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_minor_votes, read_1_anchors[i].piece_minor_abs_offset);
- print_votes(vote_1, global_context -> config.index_prefix);
- }*/
- }
- }
-
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
-
- alignment_result_t * alignment_2_best = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- for(j=0; j<used_anchors_2; j++)
- {
- if(read_2_anchors[j].Score_H > alignment_2_best -> Score_H || (read_2_anchors[j].Score_H == alignment_2_best -> Score_H && read_2_anchors[j].Score_L >= alignment_2_best -> Score_L))
- {
- if(read_2_anchors[j].Score_H > alignment_2_best -> Score_H || read_2_anchors[j].Score_L > alignment_2_best-> Score_L )
- {
- best_read_id_r2 = 0;
-
- if(is_anchor_2_breakeven)
- alignment_2_best -> result_flags |= CORE_IS_BREAKEVEN;
- else
- alignment_2_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- }
- else
- {
- if(read_2_anchors[j].piece_main_abs_offset > _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0)->selected_position && global_context->config.multi_best_reads == 1)
- best_read_id_r2 = 0;
- alignment_2_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- //printf("SET_BE_2: %d\n", pair_number);
- }
-
- alignment_2_best -> Score_H = read_2_anchors[j].Score_H;
- alignment_2_best -> Score_L = read_2_anchors[j].Score_L;
- is_paired_end_selected = 0;
-
- // TODO: add result at best_read_id_r2
- alignment_result_t * r2_result = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- int r2_used_subreads = max(vote_2 -> all_used_subreads, r2_result->used_subreads_in_vote );
- set_alignment_result(global_context, pair_number, 1, best_read_id_r2, read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes , read_2_anchors[j].piece_main_indel_record, read_2_anchors[j].piece_main_coverage_start, read_2_anchors[j].piece_main_coverage_end, 0!=(read_2_anchors[j].piece_main_masks & IS_NEGATIVE_STRAND), read_2_anchors[j].piece_minor_abs_offset, read_2_anchors[j].piece_minor_votes, read_2_anchors[j].piece_minor_coverage_start, read_2_anchors[j].pie [...]
-
- best_read_id_r2 += 1;
- set_zero_votes(global_context, pair_number,1 , best_read_id_r2);
- }
- }
- }
-
- }
-
-
- alignment_result_t * tmp_result = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- if(tmp_result->selected_votes <1)
- {
- tmp_result -> used_subreads_in_vote = max(vote_1 -> all_used_subreads, tmp_result -> used_subreads_in_vote );
- tmp_result -> noninformative_subreads_in_vote = max(vote_1 -> noninformative_subreads, tmp_result -> noninformative_subreads_in_vote);
- }
-
-
- tmp_result = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- if(tmp_result->selected_votes <1 && global_context -> input_reads.is_paired_end_reads)
- {
-
- tmp_result->used_subreads_in_vote = max(vote_2 -> all_used_subreads, tmp_result->used_subreads_in_vote );
- tmp_result->noninformative_subreads_in_vote = max(vote_2 -> noninformative_subreads, tmp_result->noninformative_subreads_in_vote);
- }
- return 0;
-}
-
-int explain_read(global_context_t * global_context, thread_context_t * thread_context, int pair_number, int read_len, char * read_name , char *read_text, char *qual_text, int is_second_read, int best_read_id, int is_negative_strand)
-{
- explain_context_t explain_context;
-
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
-
- if(global_context -> config.do_big_margin_reporting || global_context -> config.do_big_margin_filtering_for_reads)
- {
- int current_repeated_times = is_ambiguous_voting(global_context, pair_number, is_second_read, current_result->selected_votes, current_result->confident_coverage_start, current_result->confident_coverage_end, read_len, (current_result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0);
- if(current_repeated_times>1) return 0;
- }
-
-
-
- memset(&explain_context,0, sizeof(explain_context_t));
-
- explain_context.full_read_len = read_len;
- explain_context.full_read_text = read_text;
- explain_context.full_qual_text = qual_text;
- explain_context.read_name = read_name;
- explain_context.is_confirmed_section_negative_strand = is_negative_strand ;
- explain_context.pair_number = pair_number;
- explain_context.is_second_read = is_second_read ;
- explain_context.best_read_id = best_read_id;
-
-
- unsigned int back_search_tail_position, front_search_start_position;
- unsigned short back_search_read_tail, front_search_read_start;
-
-
- back_search_read_tail = min(explain_context.full_read_len , current_result -> confident_coverage_end );//- 5;
- back_search_tail_position = current_result -> selected_position + back_search_read_tail + current_result -> indels_in_confident_coverage;
-
- explain_context.tmp_search_junctions[0].read_pos_end = back_search_read_tail;
- explain_context.tmp_search_junctions[0].abs_offset_for_start = back_search_tail_position;
-
- explain_context.tmp_jump_length = 0;
- explain_context.best_jump_length = 0xffff0000;
-
- search_events_to_back(global_context, thread_context, &explain_context, read_text , qual_text, back_search_tail_position , back_search_read_tail, 0);
-
- if(explain_context.back_search_confirmed_sections>0)
- {
-
- short last_section_length = explain_context.back_search_junctions[0].read_pos_end - explain_context.back_search_junctions[0].read_pos_start;
-
- front_search_read_start = explain_context.back_search_junctions[0].read_pos_start;
- front_search_start_position = explain_context.back_search_junctions[0].abs_offset_for_start - last_section_length;
-
- int last_sec = explain_context.back_search_confirmed_sections-1;
-
- current_result -> selected_position = explain_context.back_search_junctions[last_sec].abs_offset_for_start - explain_context.back_search_junctions[last_sec].read_pos_end + explain_context.back_search_junctions[last_sec].read_pos_start;
-
- }
- else
- {
- front_search_read_start = current_result -> confident_coverage_start + 5;
- front_search_start_position = current_result -> selected_position + front_search_read_start;
- }
-
-
- // clean the temporary results
- explain_context.tmp_search_sections = 0;
- explain_context.best_matching_bases = 0;
- memset(explain_context.tmp_search_junctions, 0, sizeof(perfect_section_in_read_t ) * MAX_EVENTS_IN_READ);
-
- explain_context.tmp_search_junctions[0].read_pos_start = front_search_read_start;
- explain_context.tmp_search_junctions[0].abs_offset_for_start = front_search_start_position;
- explain_context.tmp_jump_length = 0;
- explain_context.best_jump_length = 0xffff0000;
- search_events_to_front(global_context, thread_context, &explain_context, read_text + front_search_read_start, qual_text + front_search_read_start, front_search_start_position,read_len - front_search_read_start , 0);
-
- // calc
- finalise_explain_CIGAR(global_context, thread_context, &explain_context);
-
- return 0;
-}
-
-int find_soft_clipping(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail)
-{
- #define SOFT_CLIPPING_WINDOW_SIZE 6
- #define SOFT_CLIPPING_MAX_ERROR 1
-
- char window_matched[SOFT_CLIPPING_WINDOW_SIZE];
- int x0,x1,x2;
-
- memset(window_matched, 0 , SOFT_CLIPPING_WINDOW_SIZE);
-
- for(x0=0;x0 < test_len; x0++)
- {
-
- if(search_to_tail) x1 = test_len -1 -x0;
- else x1=x0;
- char ref_value = gvindex_get(current_value_index, mapped_pos + x1);
- int sum_matched=0;
- for(x2 = SOFT_CLIPPING_WINDOW_SIZE - 1; x2 > 0; x2--)
- {
- window_matched[x2] = window_matched[x2-1];
- sum_matched += window_matched[x2];
- }
- window_matched[0] = (ref_value == read_text[x1]);
- sum_matched += window_matched[0];
-
- // find the first matched base, such that the matched bases >= SOFT_CLIPPING_WINDOW_SIZE - SOFT_CLIPPING_MAX_ERROR if this base is added into the window.
- if(window_matched[0])
- {
- if(sum_matched > SOFT_CLIPPING_WINDOW_SIZE - SOFT_CLIPPING_MAX_ERROR)
- {
- return max(0 , x0 - SOFT_CLIPPING_WINDOW_SIZE);
- }
- }
-
- }
- return 0;
-}
-
-// read_head_abs_offset is the first WANTED base in read.
-// If the first section in read is reversed, read_head_abs_offset is the LAST WANTED bases in this section. (the abs offset of the first base in the section is actually larger than read_head_abs_offset)
-int final_CIGAR_quality(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qual_text, int read_len, char * cigar_string, unsigned long read_head_abs_offset, int is_read_head_reversed, int * mismatched_bases)
-{
- int cigar_cursor = 0;
- int read_cursor = 0;
- unsigned int current_perfect_section_abs = read_head_abs_offset;
- int rebuilt_read_len = 0;
- float all_matched_bases = 0;
- gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
- int current_reversed = is_read_head_reversed;
- int all_perfect_length = 0;
- int is_First_M = 1;
- int head_soft_clipped = -1, tail_soft_clipped = -1;
-
- unsigned int tmp_int = 0;
- while(1)
- {
- char nch = cigar_string[cigar_cursor++];
- if(!nch)break;
- if(isdigit(nch))
- tmp_int = tmp_int*10+(nch-'0');
- else{
- if(nch == 'M' || nch == 'S')
- {
- char *qual_text_cur;
- if(qual_text[0])qual_text_cur = qual_text+read_cursor;
- else qual_text_cur = NULL;
-
- float section_qual = match_base_quality(current_value_index, read_text+read_cursor, current_perfect_section_abs, qual_text_cur, tmp_int, current_reversed, global_context->config.phred_score_format , mismatched_bases, global_context -> config.high_quality_base_threshold);
- all_matched_bases += section_qual;
- rebuilt_read_len += tmp_int;
- all_perfect_length += tmp_int;
-
- int is_Last_M = (cigar_string[cigar_cursor]==0);
-
- // find "J" sections if it is the first M
- if(is_First_M && global_context -> config.show_soft_cliping)
- {
- head_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text, current_perfect_section_abs, tmp_int, 0);
- if(head_soft_clipped == tmp_int) head_soft_clipped = 0;
- }
- if(is_Last_M && global_context -> config.show_soft_cliping)
- {
- tail_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text + read_cursor, current_perfect_section_abs, tmp_int, 1);
- if(tail_soft_clipped == tmp_int) tail_soft_clipped = 0;
- }
- if(is_Last_M && is_First_M && tail_soft_clipped+head_soft_clipped >= tmp_int-1)
- {
- head_soft_clipped=0;
- tail_soft_clipped=0;
- }
- is_First_M=0;
-
-
- read_cursor += tmp_int;
-
- //move to the NEXT UNWANTED ABS OFFSET.
- if(current_reversed)
- current_perfect_section_abs --;
- else
- current_perfect_section_abs += tmp_int;
-
-
- }
- else if(nch == 'I')
- {
- rebuilt_read_len += tmp_int;
- read_cursor += tmp_int;
- all_matched_bases += tmp_int;
- }
- else if(nch == 'D')
- {
- if(!current_reversed)
- current_perfect_section_abs += tmp_int;
- }
- else if(tolower(nch) == 'n')
- {
- current_perfect_section_abs += tmp_int;
- if(nch == 'n') current_reversed = !current_reversed;
- }
- else if(tolower(nch) == 'b')
- {
- current_perfect_section_abs -= tmp_int;
- if(nch == 'b') current_reversed = !current_reversed;
- }
- tmp_int = 0;
- }
- }
-
- assert(rebuilt_read_len == read_len);
-
-
- if(global_context -> config.show_soft_cliping && (head_soft_clipped>0 || tail_soft_clipped>0))
- {
- char new_cigar_tmp[100];
- is_First_M=1;
- new_cigar_tmp[0]=0;
- cigar_cursor = 0;
- while(1)
- {
- char nch = cigar_string[cigar_cursor++];
-
- if(!nch)break;
- if(isdigit(nch))
- tmp_int = tmp_int*10+(nch-'0');
- else{
- char cigar_piece [30];
- cigar_piece[0]=0;
-
- if(nch == 'M')
- {
- char cigar_tiny [11];
- int is_Last_M = (cigar_string[cigar_cursor]==0);
- if(is_First_M && head_soft_clipped>0)
- {
- tmp_int -= head_soft_clipped;
- sprintf(cigar_tiny,"%dS",head_soft_clipped);
- strcat(cigar_piece, cigar_tiny);
- }
- if(is_Last_M && tail_soft_clipped>0)
- {
- tmp_int -= tail_soft_clipped;
- }
- sprintf(cigar_tiny,"%dM",tmp_int);
- strcat(cigar_piece, cigar_tiny);
- if(is_Last_M && tail_soft_clipped>0)
- {
- sprintf(cigar_tiny,"%dS",tail_soft_clipped);
- strcat(cigar_piece, cigar_tiny);
- }
- is_First_M = 0;
- }
- else
- {
- sprintf(cigar_piece, "%u%c", tmp_int, nch);
- }
-
- strcat(new_cigar_tmp, cigar_piece);
- tmp_int = 0;
- }
- }
-
- strcpy(cigar_string, new_cigar_tmp);
- }
-
- return 100+(int)(all_matched_bases*100/read_len);
-}
-
-// this function also adds final_counting_reads in chromosome_events.
-int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context)
-{
- int xk1;
- char tmp_cigar[100];
- chromosome_event_t * to_be_supported [20];
- short flanking_size_left[20], flanking_size_right[20];
- int to_be_supported_count = 0;
- int is_junction_read = 0;
- int total_perfect_matched_sections = 0;
- alignment_result_t * result = _global_retrieve_alignment_ptr(global_context, explain_context->pair_number, explain_context->is_second_read, explain_context-> best_read_id);
-
-
- tmp_cigar[0]=0;
- // reverse the back_search results
- for(xk1=0; xk1<explain_context -> back_search_confirmed_sections/2; xk1++)
- {
- perfect_section_in_read_t tmp_exp;
- memcpy(&tmp_exp, &explain_context -> back_search_junctions[xk1], sizeof(perfect_section_in_read_t));
- memcpy(&explain_context -> back_search_junctions[xk1], &explain_context -> back_search_junctions[explain_context -> back_search_confirmed_sections - xk1 - 1] , sizeof(perfect_section_in_read_t));
- memcpy(&explain_context -> back_search_junctions[explain_context -> back_search_confirmed_sections - xk1 - 1] , &tmp_exp , sizeof(perfect_section_in_read_t));
- }
-
- // adding indel lengths in read lengths and relocate sections
- // note that the last section in back results has the same strand of the main piece.
- int is_first_section_negative = (result ->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
- for(xk1=0; xk1<explain_context -> back_search_confirmed_sections; xk1++)
- {
- int section_length = explain_context -> back_search_junctions[xk1].read_pos_end - explain_context -> back_search_junctions[xk1].read_pos_start;
- unsigned int new_start_pos;
-
- if(explain_context -> back_search_junctions[xk1].is_strand_jumped)
- // the "strand_jumped" section do not need to move
- // however, the "abs_offset_for_start" is actually for the last base in this section.
- // this does not metter if we compare the reversed read to the chromosome.
- // "abs_offset_for_start" is the first UNWANTED base (smaller than the first WANTED base)
- new_start_pos = explain_context -> back_search_junctions[xk1].abs_offset_for_start +1;
- else
- // "abs_offset_for_start" is the first UNWANTED base. By subtracting the length, it becomes the first WANTED base.
- new_start_pos = explain_context -> back_search_junctions[xk1].abs_offset_for_start - section_length;
-
- explain_context -> back_search_junctions[xk1].abs_offset_for_start = new_start_pos;
- if(explain_context -> back_search_junctions[xk1].event_after_section
- && explain_context -> back_search_junctions[xk1].event_after_section->is_strand_jumped) is_first_section_negative=!is_first_section_negative;
- }
-
- // build CIGAR
- int is_cigar_overflow = 0;
- for(xk1 = 0; xk1 < explain_context -> back_search_confirmed_sections + explain_context -> front_search_confirmed_sections -1; xk1++)
- {
- char piece_cigar[20];
- int read_pos_start, read_pos_end;
- perfect_section_in_read_t * current_section, *next_section = NULL;
-
- int is_front_search = 0;
- if(xk1 >= explain_context -> back_search_confirmed_sections || xk1 == explain_context -> back_search_confirmed_sections -1)
- {
- current_section = &explain_context -> front_search_junctions[xk1 - explain_context -> back_search_confirmed_sections +1];
- if(xk1 - explain_context -> back_search_confirmed_sections +2 < explain_context -> front_search_confirmed_sections)
- next_section = &explain_context -> front_search_junctions[xk1 - explain_context -> back_search_confirmed_sections +2];
-
- is_front_search = 1;
- }
- else
- {
- current_section = &explain_context -> back_search_junctions[xk1];
- if(xk1+1 < explain_context -> back_search_confirmed_sections)
- next_section = &explain_context -> back_search_junctions[xk1+1];
- }
-
-
- read_pos_start = current_section -> read_pos_start;
- read_pos_end = current_section -> read_pos_end;
- chromosome_event_t *event_after = current_section -> event_after_section;
-
- sprintf(piece_cigar, "%dM", (read_pos_end - read_pos_start));
- total_perfect_matched_sections += (read_pos_end - read_pos_start);
- flanking_size_left[xk1] = (read_pos_end - read_pos_start);
-
- if(xk1<explain_context -> back_search_confirmed_sections + explain_context -> front_search_confirmed_sections -2)
- assert(event_after);
-
- if(xk1>0)
- flanking_size_right[xk1-1] = (read_pos_end - read_pos_start);
-
- if(event_after)
- {
- if(event_after -> event_type == CHRO_EVENT_TYPE_INDEL)
- sprintf(piece_cigar+strlen(piece_cigar), "%d%c", abs(event_after->indel_length), event_after->indel_length>0?'D':'I');
- else if(event_after -> event_type == CHRO_EVENT_TYPE_JUNCTION||event_after -> event_type == CHRO_EVENT_TYPE_FUSION)
- {
- char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
- if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
-
- // the distance in CIGAR is the NEXT UNWANTED BASE of piece#1 to the FIRST WANTED BASE in piece#2
- int delta_one ;
- if(current_section -> is_strand_jumped + current_section -> is_connected_to_large_side == 1) delta_one = 1;
- else delta_one = -1;
-
- // if it is from front_search, the event side points to the first WANTED base of the next section; it should be moved to the last WANTED base the next section if the next section is jumped.
- if(next_section && (event_after -> is_strand_jumped + current_section -> is_strand_jumped==1))
- {
- if(is_front_search)
- {
- if(current_section -> is_connected_to_large_side)
- delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
- else
- delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
- }
- else
- {
- if(current_section -> is_connected_to_large_side)
- delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
- else
- delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
- }
- }
-
- sprintf(piece_cigar+strlen(piece_cigar), "%d%c", event_after -> event_large_side - event_after -> event_small_side + delta_one, jump_mode);
- is_junction_read ++;
- }
- to_be_supported[to_be_supported_count++] = event_after;
- }
- strcat(tmp_cigar, piece_cigar);
- if(strlen(tmp_cigar)>80){
- is_cigar_overflow=1;
- break;
- }
- }
-
- int mismatch_bases = 0, isCigarOK = 0;
-
- if(is_cigar_overflow) sprintf(tmp_cigar, "%dM", explain_context -> full_read_len);
-
- unsigned int final_position = explain_context -> back_search_junctions[0].abs_offset_for_start;
- int final_qual = final_CIGAR_quality(global_context, thread_context, explain_context -> full_read_text, explain_context -> full_qual_text, explain_context -> full_read_len , tmp_cigar, final_position, is_first_section_negative != ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0), &mismatch_bases);
-
- //if(memcmp(explain_context->read_name, "HKJMKOB02G7RDV",14) == 0)printf("POS=%u\tCIGAR=%s\tMM=%d\tQUAL=%d\n", final_position , tmp_cigar, mismatch_bases, final_qual);
-
- int applied_mismatch = is_junction_read? global_context->config.max_mismatch_junction_reads:global_context->config.max_mismatch_exonic_reads ;
- if(explain_context->full_read_len > EXON_LONG_READ_LENGTH)
- applied_mismatch = ((((explain_context->full_read_len+1)<<16) / 100) * applied_mismatch)>>16;
- if(mismatch_bases <= applied_mismatch)
- {
- int compressed_len;
- result -> final_quality = final_qual;
- result -> selected_position = final_position;
- if(((result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) != is_first_section_negative)
- {
- assert(0);
- result -> cigar_string[0]=0xff;
- compressed_len = cigar2bincigar(tmp_cigar, result -> cigar_string + 1, CORE_MAX_CIGAR_LEN - 1);
- }
- else
- compressed_len = cigar2bincigar(tmp_cigar, result -> cigar_string, CORE_MAX_CIGAR_LEN);
-
- // commit the change to the chromosome_events
- if(compressed_len>0)
- {
- for(xk1= 0; xk1 < to_be_supported_count; xk1++)
- {
- if(thread_context)
- ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> final_counted_reads_array [ to_be_supported [xk1] -> global_event_id] ++;
- else
- to_be_supported [xk1] -> final_counted_reads ++;
- if(to_be_supported [xk1] -> event_type !=CHRO_EVENT_TYPE_INDEL)
- {
- short current_event_flanking_left = flanking_size_left[xk1];
- short current_event_flanking_right = flanking_size_right[xk1];
- to_be_supported [xk1] -> junction_flanking_left = max(to_be_supported [xk1] -> junction_flanking_left, current_event_flanking_left);
- to_be_supported [xk1] -> junction_flanking_right = max(to_be_supported [xk1] -> junction_flanking_right, current_event_flanking_right);
- }
- }
-
- result -> result_flags |= CORE_IS_FULLY_EXPLAINED;
- isCigarOK=1;
- }
- }
- //printf("BRRRRRID=%d; MM=%d; CIGAR=%s<%d> QUAL=%d\n", explain_context-> best_read_id, mismatch_bases, tmp_cigar, isCigarOK, final_qual);
-
- if(!isCigarOK)
- {
- result -> final_quality = final_qual;
- result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
- result -> Score_H &= 0x7fffffffffffffffllu;
- }
- return 0;
-}
-
-
-
-
-#define ceq(c,t) ((c)[0]==(t)[0] && (c)[1]==(t)[1])
-#define c2eq(ch1, ch2, tg1, tg2) ((ceq(ch1, tg1) && ceq(ch2, tg2)) || (ceq(ch1, tg2) && ceq(ch2, tg1)) )
-
-int paired_chars_full_core(char * ch1, char * ch2, int is_reverse)
-{
- if (c2eq(ch1, ch2, "GT", "AG") || c2eq(ch1, ch2, "CT", "AC"))
- {
- if (is_reverse) if (ceq(ch1, "AG") || ceq(ch1, "AC")) return 2;
- if (!is_reverse) if (ceq(ch1, "CT") || ceq(ch1, "GT")) return 2;
- }
- else if ( c2eq(ch1, ch2,"GC","AG") || c2eq(ch1, ch2,"GC","CT") || c2eq(ch1, ch2,"AT","AC") || c2eq(ch1, ch2,"GT","AT"))
- {
- if (is_reverse) if (ceq(ch1, "GC") || ceq(ch1, "AT") || ceq(ch1, "AG") || ceq(ch1, "AC")) return 1;
- if (!is_reverse) if (ceq(ch1, "GC") || ceq(ch1, "AT") ||ceq(ch1, "GT") || ceq(ch1, "CT")) return 1;
- }
- return 0;
-}
-
-int paired_chars_part_core(char * ch1, char * ch2, int is_reverse)
-{
- if (c2eq(ch1, ch2, "GT", "AG") || c2eq(ch1, ch2, "CT", "AC"))
- {
- if (is_reverse)
- {
- if (ceq(ch1, "AG") || ceq(ch1, "AC")) return 1;
- }else
- if (ceq(ch1, "CT") || ceq(ch1, "GT")) return 1;
- }
- return 0;
-}
-
-#define paired_chars paired_chars_part_core
-
-
-#define is_donor_chars_full(cc) (((cc)[0]=='G' && (cc)[1]=='T') || \
- ((cc)[0]=='A' && (cc)[1]=='G') || \
- ((cc)[0]=='A' && (cc)[1]=='C') || \
- ((cc)[0]=='C' && (cc)[1]=='T') || \
- ((cc)[0]=='G' && (cc)[1]=='C') || \
- ((cc)[0]=='A' && (cc)[1]=='T') || \
- ((cc)[0]=='A' && (cc)[1]=='C') )
-
-
-#define is_donor_chars_part(cc) (((cc)[0]=='G' && (cc)[1]=='T') || \
- ((cc)[0]=='A' && (cc)[1]=='G') || \
- ((cc)[0]=='A' && (cc)[1]=='C') || \
- ((cc)[0]=='C' && (cc)[1]=='T'))
-
-#define is_donor_chars is_donor_chars_part
-
-
-
-
-int is_ambiguous_voting(global_context_t * global_context, int pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative)
-{
- int xk1;
- int encounter = 0;
-
- if(is_negative)
- {
- int tmp = max_start;
- max_start = read_len - max_end;
- max_end = read_len - tmp;
- }
-
- if(read_len > 255)
- {
- max_start = max_start>>2;
- max_end = max_end>>2;
- }
-
- unsigned char * big_margin_record = _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read);
-
- for(xk1 = 0; xk1 < global_context->config.big_margin_record_size/3 ; xk1++)
- {
- if(!big_margin_record[xk1*3])break;
-
- if((big_margin_record[xk1*3]) >= max_vote -1) // actually, max-1
- if(big_margin_record[xk1*3+1] >= max_start - 2 && big_margin_record[xk1*3+2] <= max_end + 1)
- encounter++;
-
- }
- if(encounter>1) return encounter;
- return 0;
-}
-
-#define JUNCTION_CONFIRM_WINDOW 17
-// This function implements the same function of donor_score, except that the two halves are from different strands.
-// Both halves are forced to positive strand and the split point is found.
-// Note that the donor/receptor sides are still expected for distinguishing between Fusion Breaks and Fusion Junctions.
-
-// Note that the read_text is on reversed mode. The guess points are on reversed mode too.
-// "Left" and "Right" means the left/right half in the "reversed" read.
-int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int normally_arranged, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found)
-{
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
- // guess_end is the index of the first UNWANTED BASE.
- int most_likely_point_as_reversed = (guess_start+guess_end)/2;
-
- int selected_real_split_point = -1, selected_junction_strand = -1;
- //char donor_left[2], donor_right[2];
-
- int best_score = -111111;
-
- int real_split_point_i;
- int real_split_point_numbers = guess_end - guess_start;
-
- char positive_read[MAX_READ_LENGTH+1];
- strcpy(positive_read, read_text) ;
- reverse_read(positive_read, read_len, global_context->config.space_type);
-
- for(real_split_point_i = 0 ; real_split_point_i < real_split_point_numbers; real_split_point_i++)
- {
- int left_should_match, right_should_match;
- int left_should_not_match, right_should_not_match;
- int real_split_point_as_reversed = (real_split_point_i % 2)?-((real_split_point_i+1)/2):((1+real_split_point_i)/2);
- real_split_point_as_reversed += most_likely_point_as_reversed;
-
- if(real_split_point_as_reversed > read_len-JUNCTION_CONFIRM_WINDOW)continue;
- if(real_split_point_as_reversed < JUNCTION_CONFIRM_WINDOW)continue;
-
- int is_donor_test_ok=0;
-
- if(normally_arranged)
- {
- unsigned int small_pos_test_begin = left_virtualHead_abs_offset + (is_left_half_negative?real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW:(read_len - real_split_point_as_reversed));
- char * small_pos_read_begin = (is_left_half_negative?read_text:positive_read) + (is_left_half_negative?
- (real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW) :
- (read_len - real_split_point_as_reversed)
- );
-
- unsigned int large_pos_test_begin = right_virtualHead_abs_offset + (is_right_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
- char * large_pos_read_begin = (is_right_half_negative?read_text:positive_read) + (is_right_half_negative?
- (real_split_point_as_reversed) :
- (read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
-
- left_should_match = match_chro(small_pos_read_begin , value_index , small_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- right_should_match = match_chro(large_pos_read_begin , value_index , large_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- left_should_not_match = right_should_not_match = 0;
- //match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + real_split_point - JUNCTION_CONFIRM_WINDOW , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
-
- }
- else
- {
- unsigned int small_pos_test_begin = left_virtualHead_abs_offset + (is_left_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
- char * small_pos_read_begin = (is_left_half_negative?read_text:positive_read) + (is_left_half_negative?
- (real_split_point_as_reversed):(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
-
- unsigned int large_pos_test_begin = right_virtualHead_abs_offset + (is_right_half_negative?(real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW):(read_len - real_split_point_as_reversed));
- char * large_pos_read_begin = (is_right_half_negative?read_text:positive_read) + (is_right_half_negative?
- (real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW):(read_len - real_split_point_as_reversed));
-
- left_should_match = match_chro(small_pos_read_begin , value_index , small_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- right_should_match = match_chro(large_pos_read_begin , value_index , large_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- left_should_not_match = right_should_not_match = 0;
-
- }
-
- if(left_should_match + right_should_match >= JUNCTION_CONFIRM_WINDOW*2 -1 &&
- left_should_not_match <= JUNCTION_CONFIRM_WINDOW -3 && right_should_not_match <= JUNCTION_CONFIRM_WINDOW -3)
- {
- int test_score = is_donor_test_ok*500+left_should_match + right_should_match - left_should_not_match - right_should_not_match;
- if(test_score > best_score)
- {
- selected_real_split_point = real_split_point_as_reversed;
- best_score = test_score;
- }
- }
- }
-
- if(best_score>0)
- {
- *final_split_point = selected_real_split_point;
- *is_donor_found = best_score>=500;
- *is_GT_AG_strand = selected_junction_strand;
- return best_score;
- }
- return 0;
-}
-
-
-int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indel_offset, int right_indel_offset, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found)
-{
-
-
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
- int need_donor_test = global_context->config.is_rna_seq_reads;
-
- // guess_end is the index of the first UNWANTED BASE.
- int most_likely_point = (guess_start+guess_end)/2;
-
- // "split_point" is the first base NOT IN piece 1; it is also the first base IN piece 2.
- int selected_real_split_point = -1, selected_junction_strand = -1;
- char donor_left[3], donor_right[3];
-
-
- int best_score = -111111;
-
- int real_split_point_i;
- int real_split_point_numbers = guess_end - guess_start;
-
- //printf("TESTDON: LR=%d; RR=%d\n", left_indel_offset, right_indel_offset);
-
- for(real_split_point_i = 0 ; real_split_point_i < real_split_point_numbers; real_split_point_i++)
- {
- int left_should_match, right_should_match;
- int left_should_not_match, right_should_not_match;
- int real_split_point = (real_split_point_i % 2)?-((real_split_point_i+1)/2):((1+real_split_point_i)/2);
- real_split_point += most_likely_point;
- int is_donor_test_ok = 0;
-
- if(real_split_point > read_len-JUNCTION_CONFIRM_WINDOW)continue;
- if(real_split_point < JUNCTION_CONFIRM_WINDOW)continue;
-
- assert(left_virtualHead_abs_offset<right_virtualHead_abs_offset);
-
- if(normally_arranged)
- {
- gvindex_get_string (donor_left, value_index, left_virtualHead_abs_offset + real_split_point + left_indel_offset, 2, 0);
- gvindex_get_string (donor_right, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset - 2, 2, 0);
- }
- else
- {
- gvindex_get_string (donor_left, value_index, right_virtualHead_abs_offset + real_split_point + left_indel_offset, 2, 0);
- gvindex_get_string (donor_right, value_index, left_virtualHead_abs_offset + real_split_point + right_indel_offset - 2, 2, 0);
- }
- is_donor_test_ok = is_donor_chars(donor_left) && is_donor_chars(donor_right) && paired_chars(donor_left, donor_right,0);
-
-
- donor_left[2]=0; donor_right[2]=0;
- // printf("TESTDON: %s %s; OFFSET=%d; DON_OK=%d; NORMAL=%d; LEFT_OFF=%d; RIGHT_OFF=%d\n", donor_left, donor_right, real_split_point_i, is_donor_test_ok, normally_arranged, left_indel_offset, right_indel_offset);
-
- if(is_donor_test_ok || !need_donor_test)
- {
- if(normally_arranged)
- {
- left_should_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + real_split_point - JUNCTION_CONFIRM_WINDOW + left_indel_offset , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- right_should_match = match_chro(read_text + real_split_point, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
-
- left_should_not_match = match_chro(read_text + real_split_point, value_index, left_virtualHead_abs_offset + real_split_point + left_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- right_should_not_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset - JUNCTION_CONFIRM_WINDOW, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- }
- else
- {
- right_should_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, right_virtualHead_abs_offset + right_indel_offset + real_split_point - JUNCTION_CONFIRM_WINDOW , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- left_should_match = match_chro(read_text + real_split_point, value_index, left_virtualHead_abs_offset + real_split_point + left_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
-
- right_should_not_match = match_chro(read_text + real_split_point, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- left_should_not_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + left_indel_offset + real_split_point - JUNCTION_CONFIRM_WINDOW, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
-
- }
-
- //printf("!! TESTDON: M=%d,%d MM=%d,%d\n", left_should_match,+right_should_match,left_should_not_match,right_should_not_match);
- if(left_should_match +right_should_match >= 2*JUNCTION_CONFIRM_WINDOW-1 &&
- left_should_not_match <= JUNCTION_CONFIRM_WINDOW -5 && right_should_not_match <= JUNCTION_CONFIRM_WINDOW -5)
- {
- int test_score = is_donor_test_ok*3000+left_should_match + right_should_match - left_should_not_match - right_should_not_match;
- if(test_score > best_score)
- {
- selected_junction_strand = (donor_left[0]=='G' || donor_right[1]=='G');
- selected_real_split_point = real_split_point;
- best_score = test_score;
- }
- }
- }
- }
- if(best_score>0)
- {
- *final_split_point = selected_real_split_point;
- *is_donor_found = best_score>=2900;
- *is_GT_AG_strand = selected_junction_strand;
- return best_score;
- }
- return 0;
-
-}
-
-
-void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id)
-{
- alignment_result_t * result =_global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
- subjunc_result_t * subjunc_result =_global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id);
-
-
- if(read_len > EXON_LONG_READ_LENGTH)
- core_search_short_exons(global_context, thread_context, read_text, qual_text, read_len, result -> selected_position, (subjunc_result -> minor_votes < 1)? result -> selected_position:subjunc_result -> minor_position, result -> confident_coverage_start, result -> confident_coverage_end);
-
- if(subjunc_result -> minor_votes < 1)return;
- if(result -> selected_votes < global_context->config.minimum_subread_for_first_read)return;
-
- if(global_context->config.do_big_margin_filtering_for_junctions)
- {
- // if(2999302633 == result -> selected_position)
- // printf("P0\n");
- if(is_ambiguous_voting(global_context, pair_number, is_second_read, result->selected_votes, result -> confident_coverage_start, result -> confident_coverage_end, read_len, (result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0))return;
- // if(2999302633 == result -> selected_position)
- // printf("P1\n");
- }
-
- /*
- if(2999302633 == result -> selected_position)
- {
- printf("MAIN_POS=%u; MINOR_POS=%u\n", result -> selected_position, subjunc_result -> minor_position);
- printf("SPLIT=%d\n", subjunc_result->split_point);
- }*/
-
- unsigned int left_virtualHead_abs_offset = min(result -> selected_position, subjunc_result -> minor_position);
- unsigned int right_virtualHead_abs_offset = max(result -> selected_position, subjunc_result -> minor_position);
-
- int selected_real_split_point = subjunc_result->split_point;
- int is_GT_AG_donors = result->result_flags & 0x3;
- int is_donor_found = is_GT_AG_donors<3;
- int is_strand_jumped = (result->result_flags & CORE_IS_STRAND_JUMPED)?1:0;
-
- if(selected_real_split_point>0)
- {
- unsigned int left_edge_wanted, right_edge_wanted;
-
- if(is_strand_jumped)
- {
-
- // recover the "negative view" splicing point location
- int S = (result->result_flags & CORE_IS_NEGATIVE_STRAND) ? selected_real_split_point : (read_len - selected_real_split_point);
- int Sbar = read_len - S;
-
- int is_abnormal_as_reversed = (subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) == 1;
- if(!(result->result_flags & CORE_IS_NEGATIVE_STRAND)) is_abnormal_as_reversed = !is_abnormal_as_reversed;
- int is_small_half_negative = ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) + (subjunc_result->minor_position < result->selected_position) ==1;
-
- if(is_abnormal_as_reversed && is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + S;
- right_edge_wanted = right_virtualHead_abs_offset + Sbar;
- }
- else if(is_abnormal_as_reversed && !is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + Sbar - 1;
- right_edge_wanted = right_virtualHead_abs_offset + S - 1;
- }
- else if(!is_abnormal_as_reversed && is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + S - 1;
- right_edge_wanted = right_virtualHead_abs_offset + Sbar - 1;
- }
- else // if(!is_abnormal_as_reversed && !is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + Sbar;
- right_edge_wanted = right_virtualHead_abs_offset + S;
- }
- }
- else
- {
- int selected_real_split_point_for_left = selected_real_split_point;
- int selected_real_split_point_for_right = selected_real_split_point;
- if((subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) == 1) //abnormally arranged halves
- selected_real_split_point_for_right --;
- else // normally arranged halves
- selected_real_split_point_for_left --;
-
-
-
- int minor_indel_offset = (subjunc_result->double_indel_offset & 0xf);
- int major_indel_offset = (subjunc_result->double_indel_offset >> 4) & 0xf;
- if(major_indel_offset>=8)major_indel_offset=-(16-major_indel_offset);
- //assert(minor_indel_offset==0);
- //assert(major_indel_offset==0);
-
- left_edge_wanted = left_virtualHead_abs_offset + selected_real_split_point_for_left + ((result -> selected_position > subjunc_result -> minor_position)?minor_indel_offset: major_indel_offset);
- right_edge_wanted = right_virtualHead_abs_offset + selected_real_split_point_for_right;
- }
-
-
- //insert event
- HashTable * event_table = NULL;
- chromosome_event_t * event_space = NULL;
- if(thread_context)
- {
- event_table = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
- else
- {
- event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
-
- // note that selected_real_split_point is the first UNWANTED base after left half.
-
- chromosome_event_t * found = NULL;
- chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- int found_events = search_event(global_context, event_table, event_space, left_edge_wanted , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
-
- if(found_events)
- {
- int kx1;
- for(kx1 = 0; kx1 < found_events ; kx1++)
- {
- if(search_return[kx1] -> event_large_side == right_edge_wanted)
- {
- found = search_return[kx1];
- break;
- }
- }
- }
-
- if(found) found -> supporting_reads ++;
- else
- {
- int event_no;
-
-
- if(thread_context)
- event_no = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> total_events ++;
- else
- event_no = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> total_events ++;
-
-
- event_space = reallocate_event_space(global_context, thread_context, event_no);
-
- chromosome_event_t * new_event = event_space+event_no;
- memset(new_event,0,sizeof(chromosome_event_t));
- new_event -> event_small_side = left_edge_wanted;
- new_event -> event_large_side = right_edge_wanted;
-
-
- if(is_donor_found &&(!is_strand_jumped) && right_edge_wanted - left_edge_wanted <= global_context -> config.maximum_intron_length
- && (subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) !=1)
- {
- new_event -> is_negative_strand= !is_GT_AG_donors;
- new_event -> event_type = CHRO_EVENT_TYPE_JUNCTION;
-
- new_event -> supporting_reads = 1;
- new_event -> indel_length = 0;
-
- put_new_event(event_table, new_event , event_no);
-
- }
- else
- {
- if(global_context -> config.do_fusion_detection)
- {
- new_event -> event_type = CHRO_EVENT_TYPE_FUSION;
- new_event -> is_strand_jumped = is_strand_jumped;
-
- new_event -> supporting_reads = 1;
- new_event -> indel_length = 0;
-
- put_new_event(event_table, new_event , event_no);
- }
- }
- }
- }
-}
-
-int write_fusion_final_results(global_context_t * global_context)
-{
- indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
- char fn2 [MAX_FILE_NAME_LENGTH];
-
- snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.fuse", global_context->config.output_prefix);
- FILE * ofp = f_subr_open(fn2, "wb");
-
- int xk1;
- //unsigned int all_junctions = 0;
- int no_sup_juncs = 0;
- int all_juncs = 0;
-
- for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++)
- {
- char * chro_name_left,* chro_name_right;
- unsigned int chro_pos_left, chro_pos_right;
- chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
- if(event_body -> event_type != CHRO_EVENT_TYPE_FUSION)
- continue;
-
- all_juncs++;
- if(event_body->final_counted_reads<1)
- {
- no_sup_juncs++;
- continue;
- }
- locate_gene_position( event_body -> event_small_side , &global_context -> chromosome_table, &chro_name_left, &chro_pos_left);
- locate_gene_position( event_body -> event_large_side , &global_context -> chromosome_table, &chro_name_right, &chro_pos_right);
-
- chro_pos_left++;
-
- fprintf(ofp, "%s\t%u\t%s\t%u\t%c\t%d\n", chro_name_left, chro_pos_left, chro_name_right, chro_pos_right, event_body -> is_strand_jumped?'X':'=', event_body -> final_counted_reads);
- }
-
- fclose(ofp);
- return 0;
-}
-int write_junction_final_results(global_context_t * global_context)
-{
-
- int no_sup_juncs = 0;
-
- indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
- char fn2 [MAX_FILE_NAME_LENGTH];
-
- snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.bed", global_context->config.output_prefix);
- FILE * ofp = f_subr_open(fn2, "wb");
-
- int xk1;
- unsigned int all_junctions = 0;
-
- for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++)
- {
- char * chro_name_left,* chro_name_right;
- unsigned int chro_pos_left, chro_pos_right;
- chromosome_event_t * event_body = indel_context -> event_space_dynamic +xk1;
- if(event_body -> event_type != CHRO_EVENT_TYPE_JUNCTION)
- continue;
- if(event_body->final_counted_reads<1)
- {
- no_sup_juncs++;
- continue;
- }
-
- locate_gene_position( event_body -> event_small_side , &global_context -> chromosome_table, &chro_name_left, &chro_pos_left);
- locate_gene_position( event_body -> event_large_side , &global_context -> chromosome_table, &chro_name_right, &chro_pos_right);
-
- chro_pos_left++;
-
- unsigned int feature_start = max(0, chro_pos_left - event_body -> junction_flanking_left );
- unsigned int feature_end = chro_pos_right + event_body -> junction_flanking_right;
-
- all_junctions ++;
-
- fprintf(ofp,"%s\t%u\t%u\tJUNC%08u\t%d\t%c\t%u\t%u\t%d,0,%d\t2\t%d,%d\t0,%u\n", chro_name_left, feature_start, feature_end,
- all_junctions, event_body -> final_counted_reads, event_body->is_negative_strand?'-':'+',
- feature_start, feature_end, event_body->is_negative_strand?0:255, event_body->is_negative_strand?255:0,
- event_body -> junction_flanking_left, event_body -> junction_flanking_right, feature_end-feature_start-event_body -> junction_flanking_right);
-
- }
-
- fclose(ofp);
- global_context -> all_junctions = all_junctions;
- //printf("Non-support juncs=%d; Final juncs = %d\n", no_sup_juncs, all_junctions);
- return 0;
-}
-
-
-
-void get_chro_2base(char *buf, gene_value_index_t * index, unsigned int pos, int is_negative_strand)
-{
- gvindex_get_string (buf, index, pos, 2, is_negative_strand);
-}
-
-
-int paired_chars_part(char * ch1, char * ch2, int is_reverse)
-{
- if (c2eq(ch1, ch2, "GT", "AG") || c2eq(ch1, ch2, "CT", "AC"))
- {
- if (is_reverse) if (ceq(ch1, "AG") || ceq(ch1, "AC")) return 1;
- if (!is_reverse) if (ceq(ch1, "CT") || ceq(ch1, "GT")) return 1;
- }
- return 0;
-}
-#define is_donar_chars_part(cc) (((cc)[0]=='G' && (cc)[1]=='T') || \
- ((cc)[0]=='A' && (cc)[1]=='G') || \
- ((cc)[0]=='A' && (cc)[1]=='C') || \
- ((cc)[0]=='C' && (cc)[1]=='T'))
-
-
-#define SHORT_EXON_MIN_LENGTH 18
-#define EXON_EXTENDING_SCAN 0
-#define SHORT_EXON_WINDOW 6
-#define SHORT_EXON_EXTEND 5000
-
-void core_search_short_exons(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qualityb0, int rl, unsigned int P1_Pos, unsigned int P2_Pos, short read_coverage_start, short read_coverage_end)
-{
- char inb[1201], qualityb[1201];
- if ( (rl <= EXON_LONG_READ_LENGTH ) && (!EXON_EXTENDING_SCAN)) return;
- //return;
- gene_value_index_t * base_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
- assert(base_index!=NULL);
- //insert event
- HashTable * event_table = NULL;
- chromosome_event_t * event_space = NULL;
- if(thread_context)
- {
- event_table = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
- else
- {
- event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
-
- strcpy(inb, read_text);
- strcpy(qualityb, qualityb0);
-
- unsigned int pos_small=min(P1_Pos, P2_Pos), pos_big = max(P1_Pos, P2_Pos);
-
- int max_score , test_score;
- unsigned int best_j1_edge=0 , best_j2_edge=0;
- int need_to_test = 0;
-
-//////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-// SCAN TO THE HEAD /////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-
- if (read_coverage_start > SHORT_EXON_MIN_LENGTH)
- {
- max_score = -1;
-
- int need_check2 = 1;
- if(qualityb[0])
- {
- float head_quality = read_quality_score(qualityb , SHORT_EXON_MIN_LENGTH , global_context->config.phred_score_format);
- if(head_quality < 6 )
- need_check2 = 0;
- }
-
-
- if(need_check2)
- if(SHORT_EXON_MIN_LENGTH *0.6 < match_chro(inb, base_index, pos_small, SHORT_EXON_MIN_LENGTH , 0, global_context->config.space_type))
- need_check2 = 0;
-
-
- if(need_check2)
- {
-
- int delta_pos, is_indel = 0;
- for(delta_pos=-3; delta_pos <=3; delta_pos ++)
- {
- if(match_chro(inb, base_index, pos_small + delta_pos, SHORT_EXON_MIN_LENGTH , 0, global_context->config.space_type) >= SHORT_EXON_MIN_LENGTH*.7)
- {
- is_indel = 1;
- break;
- }
- }
- // The head of the read is incorrect. Do we need to search a long way?
- // See if there is a donor in the head area.
- int test_donor_pos;
- char cc[3];
- cc[2]=0;
-
- if(!is_indel)
- for(test_donor_pos = SHORT_EXON_MIN_LENGTH ; test_donor_pos < read_coverage_start ; test_donor_pos ++)
- {
- get_chro_2base(cc, base_index, pos_small + test_donor_pos, 0);
- if(is_donar_chars_part(cc))
- {
- need_to_test = 1;
- break;
- }
- }
- }
- }
-
- max_score = -999;
- int max_is_GTAG = 0;
-
- if(need_to_test)
- {
- unsigned int test_end = pos_small - SHORT_EXON_EXTEND;
- if(SHORT_EXON_EXTEND > pos_small) test_end = 0;
-
- unsigned int new_pos = pos_small-16;
- while(1)
- {
- new_pos = match_chro_range(inb, base_index, new_pos, 7 , new_pos - test_end , SEARCH_BACK);
- if(new_pos==0xffffffff) break;
- // There is an exact match. See if the donor/receptors are matched.
- // new_pos is the new head position of the read.
- int splice_point;
- for(splice_point = SHORT_EXON_MIN_LENGTH; splice_point < read_coverage_start ; splice_point ++)
- {
- char cc[3];
- cc[2]=0;
- char cc2[3];
- cc2[2]=0;
-
- get_chro_2base(cc, base_index, pos_small + splice_point -2, 0);
- if(is_donar_chars_part(cc))
- {
- // <<< EXON---|CC2---INTRON---CC|---EXON
- get_chro_2base(cc2, base_index, new_pos + splice_point, 0);
- if(is_donar_chars_part(cc2) && paired_chars_part(cc2 , cc, 0))
- {
- int matched_in_exon_old = match_chro(inb + splice_point, base_index, pos_small + splice_point , SHORT_EXON_WINDOW , 0, global_context->config.space_type);
- int matched_in_exon_new = match_chro(inb, base_index, new_pos , splice_point, 0, global_context->config.space_type);
-
-
- test_score = 1000000+ (matched_in_exon_new )*10000 + matched_in_exon_old * 1000 + new_pos - test_end;
- if(test_score <= max_score) continue;
- max_score = test_score + 39999 ;
-
- if(matched_in_exon_new < splice_point || matched_in_exon_old < SHORT_EXON_WINDOW )
- continue;
-
- max_is_GTAG = (cc2[0]=='G' || cc2[1]=='G');
- //printf("EX CC=%s\tCC2=%s\tis_GTAG=%d\n",cc,cc2,max_is_GTAG);
- best_j1_edge = new_pos + splice_point - 1;
- best_j2_edge = pos_small + splice_point;
- }
- }
- }
- }
- }
-
-
- if(best_j1_edge>0)
- {
- int event_no;
- chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- chromosome_event_t * found = NULL;
-
- int found_events = search_event(global_context, event_table, event_space, best_j1_edge , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
-
- if(found_events)
- {
- int kx1;
- for(kx1 = 0; kx1 < found_events ; kx1++)
- {
- if(search_return[kx1] -> event_large_side == best_j2_edge)
- {
- found = search_return[kx1];
- break;
- }
- }
- }
-
- if(found) found -> supporting_reads ++;
- else
- {
- if(thread_context)
- event_no = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> total_events ++;
- else
- event_no = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> total_events ++;
-
- event_space = reallocate_event_space(global_context, thread_context, event_no);
-
- chromosome_event_t * new_event = event_space+event_no;
- memset(new_event,0,sizeof(chromosome_event_t));
- new_event -> event_small_side = best_j1_edge;
- new_event -> event_large_side = best_j2_edge;
- assert(best_j1_edge<best_j2_edge);
-
- new_event -> is_negative_strand= !max_is_GTAG;
- new_event -> event_type = CHRO_EVENT_TYPE_JUNCTION;
-
- new_event -> supporting_reads = 1;
- new_event -> indel_length = 0;
-
- put_new_event(event_table, new_event , event_no);
- }
- //printf("FOUND NEW JUNCTION HEAD: %u - %u\n", best_j1_edge, best_j2_edge);
- }
-
-
-//////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-// SCAN TO THE TAIL /////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////////////////////
-
- need_to_test = 0;
- max_score = -999;
-
-
- if (read_coverage_end< rl - SHORT_EXON_MIN_LENGTH)
- {
- int need_check2 = 1;
- if(qualityb[0])
- {
- float head_quality = read_quality_score(qualityb + rl - SHORT_EXON_MIN_LENGTH , SHORT_EXON_MIN_LENGTH , global_context->config.phred_score_format);
- if(head_quality < 6 )
- need_check2 = 0;
- }
-
-
- if(SHORT_EXON_MIN_LENGTH *0.6 < match_chro(inb + rl - SHORT_EXON_MIN_LENGTH, base_index, pos_big + rl - SHORT_EXON_MIN_LENGTH , SHORT_EXON_MIN_LENGTH , 0, global_context->config.space_type))
- need_check2 = 0;
- if(need_check2)
- {
- int delta_pos, is_indel = 0;
- for(delta_pos=-3; delta_pos <=3; delta_pos ++)
- {
- if(match_chro(inb + rl - SHORT_EXON_MIN_LENGTH, base_index, pos_big + rl - SHORT_EXON_MIN_LENGTH + delta_pos, SHORT_EXON_MIN_LENGTH , 0, global_context->config.space_type) >= SHORT_EXON_MIN_LENGTH*.7)
- {
- is_indel = 1;
- break;
- }
- }
- // The head of the read is incorrect. Do we need to search a long way?
- // See if there is a donor in the head area.
- int test_donor_pos;
- char cc[3];
- cc[2]=0;
-
- if(!is_indel)
- for(test_donor_pos = read_coverage_end ; test_donor_pos < rl ; test_donor_pos ++)
- {
- get_chro_2base(cc, base_index, pos_big + test_donor_pos, 0);
- if(is_donar_chars_part(cc))
- {
- need_to_test = 1;
- break;
- }
- }
- }
- }
-
- best_j1_edge = 0;
- max_is_GTAG = 0;
-
- if(need_to_test)
- {
- unsigned int test_end = pos_big + SHORT_EXON_EXTEND;
- if(test_end > base_index -> length + base_index -> start_point) test_end = base_index -> length + base_index -> start_point;
-
- unsigned int new_pos = pos_big +rl - SHORT_EXON_MIN_LENGTH +16;
-
- while(1)
- {
- new_pos = match_chro_range(inb + rl - SHORT_EXON_MIN_LENGTH, base_index, new_pos, 7 , test_end - new_pos , SEARCH_FRONT);
- if(new_pos==0xffffffff) break;
- // There is an exact match. See if the donor/receptors are matched.
- // (new_pos + SHORT_EXON_MIN_LENGTH -rl + splice_point) is the new exon start.
-
- int splice_point;
- for(splice_point = read_coverage_end ; splice_point < rl - SHORT_EXON_MIN_LENGTH; splice_point ++)
- {
- char cc[3];
- cc[2]=0;
- char cc2[3];
- cc2[2]=0;
-
- unsigned int new_pos_tail = (new_pos + SHORT_EXON_MIN_LENGTH -rl + splice_point);
-
- get_chro_2base(cc, base_index, pos_big + splice_point, 0);
- if(is_donar_chars_part(cc))
- {
- get_chro_2base(cc2, base_index, new_pos_tail -2, 0);
- if(is_donar_chars_part(cc2) && paired_chars_part(cc , cc2, 0))
- {
- int matched_in_exon_new = match_chro(inb + splice_point, base_index, new_pos_tail , rl - splice_point , 0, global_context->config.space_type);
- int matched_in_exon_old = match_chro(inb + splice_point - SHORT_EXON_WINDOW , base_index, pos_big + splice_point - SHORT_EXON_WINDOW , SHORT_EXON_WINDOW, 0, global_context->config.space_type);
-
- test_score = 1000000+ (matched_in_exon_new)*10000 + matched_in_exon_old * 1000 + test_end - new_pos;
- if(test_score <= max_score) continue;
- max_score = test_score + 39999;
-
- if(matched_in_exon_new < (rl - splice_point) || matched_in_exon_old < SHORT_EXON_WINDOW)
- continue;
-
- // EXON ---|CC---INTRON---CC2|--- EXON >>>
- max_is_GTAG = (cc[0]=='G'|| cc[1]=='G');
- best_j1_edge = pos_big + splice_point - 1;
- best_j2_edge = new_pos_tail;
- }
- }
- }
-
- }
- }
-
-
- if(best_j1_edge>0)
- {
- int event_no;
- chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- chromosome_event_t * found = NULL;
-
- int found_events = search_event(global_context, event_table, event_space, best_j1_edge , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
-
- if(found_events)
- {
- int kx1;
- for(kx1 = 0; kx1 < found_events ; kx1++)
- {
- if(search_return[kx1] -> event_large_side == best_j2_edge)
- {
- found = search_return[kx1];
- break;
- }
- }
- }
-
- if(found) found -> supporting_reads ++;
- else
- {
- if(thread_context)
- event_no = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> total_events ++;
- else
- event_no = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> total_events ++;
-
-
- event_space = reallocate_event_space(global_context, thread_context, event_no);
-
- chromosome_event_t * new_event = event_space+event_no;
- memset(new_event,0,sizeof(chromosome_event_t));
- new_event -> event_small_side = best_j1_edge;
- new_event -> event_large_side = best_j2_edge;
- assert(best_j1_edge<best_j2_edge);
-
- new_event -> is_negative_strand= !max_is_GTAG;
- new_event -> event_type = CHRO_EVENT_TYPE_JUNCTION;
-
- new_event -> supporting_reads = 1;
- new_event -> indel_length = 0;
-
- put_new_event(event_table, new_event , event_no);
- //printf("FOUND NEW JUNCTION TAIL: %u - %u\n", best_j1_edge, best_j2_edge);
- }
- }
-}
-
-
-
-
-
-
-
-
-
-int core_select_best_matching_halves_maxone(global_context_t * global_context, gene_vote_t * vote, unsigned int * best_pos1, unsigned int * best_pos2, int * best_vote1, int * best_vote2, char * is_abnormal, short * half_marks, int * is_reversed_halves, float accept_rate, int read_len, long long int hint_pos, int tolerable_bases, short * read_coverage_start, short * read_coverage_end, char * indel_in_p1, char * indel_in_p2, gehash_data_t max_pos, gene_vote_number_t max_votes, short max_st [...]
-{
- int best_splicing_point = -1, i,j;
- char * best_chro_name, is_reversed;
- unsigned int best_chro_pos;
- int selected_max_votes = -1;
-
-
- is_reversed = (max_mask & IS_NEGATIVE_STRAND)?1:0;
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for(j=0; j< vote->items[i]; j++)
- {
- char * chro_name;
- char is_partner_reversed;
- unsigned int chro_pos;
-
- int overlapped_len, overlap_start, overlap_end;
- // All logical conditions
-
- //if( (vote->votes[i][j] < vote-> coverage_start[i][j]) < 12 && (vote-> coverage_end[i][j] > rl - 12 )) continue;
-
- is_partner_reversed = (vote->masks [i][j] & IS_NEGATIVE_STRAND) ? 1:0;
- overlap_start = max(max_start , vote->coverage_start[i][j]);
- overlap_end = min(max_end , vote->coverage_end[i][j]);
- overlapped_len =overlap_end - overlap_start;
-
- int coverage_len = max_end - max_start + vote->coverage_end[i][j] - vote->coverage_start[i][j];
- if (overlapped_len >0)coverage_len -= overlapped_len;
- //SUBREADprintf("MAX: %d-%d OTHER %d-%d COV=%d OVLP=%d\n", max_start, max_end, vote->coverage_start[i][j], vote->coverage_end[i][j], coverage_len, overlapped_len);
-
-
-
- if(overlapped_len >=14)
- continue;
-
- long long int dist = vote->pos[i][j];
- dist -= max_pos;
-
- //SUBREADprintf ("D=%lld\n", abs(dist));
- if (abs(dist)<6)
- continue;
-
- int support_r1 = 1;
- int support_r2 = 1;
-
- if (max_votes < support_r1 || vote->votes[i][j]<support_r2)
- continue;
-
- // Same chromosome
- if ((vote->coverage_start[i][j] < max_start) + is_reversed == 1)
- {
- locate_gene_position(max_pos + read_len, &(global_context -> chromosome_table) , &best_chro_name, &best_chro_pos);
- locate_gene_position(vote->pos[i][j] , &(global_context -> chromosome_table), &chro_name, &chro_pos);
- }else
- {
- locate_gene_position(max_pos , &(global_context -> chromosome_table), &best_chro_name, &best_chro_pos);
- locate_gene_position(vote->pos[i][j] +read_len, &(global_context -> chromosome_table), &chro_name, &chro_pos);
- }
-
- if (chro_name != best_chro_name) // The pointers can be compared because they can be the same.
- continue;
-
- int is_fusion = 0;
-
- if(is_reversed != is_partner_reversed) is_fusion = 1;
-
- if( is_reversed && ((max_pos > vote->pos[i][j]) + (vote->coverage_start[i][j] < max_start) != 1))is_fusion = 1;
- if((! is_reversed) && ((max_pos > vote->pos[i][j]) + (vote->coverage_start[i][j] > max_start) != 1)) is_fusion = 1;
-
- if(abs(dist) > 500000 || chro_name != best_chro_name) continue;
-
- int test_vote_value ;
- test_vote_value = 8888888 + vote->votes[i][j]* 1000000 - abs(dist);
- if (hint_pos>=0)
- {
- long long int hint_dist = hint_pos;
- hint_dist -= vote->pos[i][j];
- if (abs (hint_dist) < 100000)
- test_vote_value += 100;
- if (abs (hint_dist) < 5000)
- test_vote_value += 100;
- }
-
- if (test_vote_value<selected_max_votes)continue;
- // Conditions of order of R3 and R5
- *half_marks &= ~IS_REVERSED_HALVES;
- if (vote->coverage_start[i][j] < max_start && (((max_pos < vote->pos[i][j]) && !is_reversed) || ((max_pos > vote->pos[i][j]) && is_reversed) ) )
- *half_marks |= IS_REVERSED_HALVES;
- if (vote->coverage_start[i][j] >= max_end && (((max_pos > vote->pos[i][j]) && !is_reversed) || ((max_pos < vote->pos[i][j]) && is_reversed) ) )
- *half_marks |= IS_REVERSED_HALVES;
-
- if (vote->coverage_start[i][j] < max_start)
- {
- (*half_marks) = (*half_marks) & ~IS_R1_CLOSE_TO_5;
- }
- else
- {
- (*half_marks) |= IS_R1_CLOSE_TO_5;
- }
-
- if(max_mask & IS_NEGATIVE_STRAND)
- *half_marks = (*half_marks) | IS_NEGATIVE_STRAND_R1;
- else
- *half_marks = (*half_marks) & ~IS_NEGATIVE_STRAND_R1;
-
- if(vote->masks[i][j] & IS_NEGATIVE_STRAND)
- *half_marks = (*half_marks) | IS_NEGATIVE_STRAND_R2;
- else
- *half_marks = (*half_marks) & ~IS_NEGATIVE_STRAND_R2;
-
-
-
- best_splicing_point = ((vote->coverage_start[i][j] < max_start)? (vote->coverage_end[i][j]):(max_end)) + ((vote->coverage_start[i][j] < max_start)? (max_start):(vote->coverage_start[i][j]));
-
-
- best_splicing_point /=2;
-
- * best_pos1 = max_pos ;
- * best_pos2 = vote->pos[i][j] ;
- * best_vote1 = max_votes ;
- * best_vote2 = vote->votes[i][j] ;
- * read_coverage_start = min(vote->coverage_start[i][j] , max_start);
- * read_coverage_end = max(vote->coverage_end[i][j] , max_end);
-
- * read_coverage_start = max_start;
- * read_coverage_end = max_end;
-
- int k;
- for(k=0; k<MAX_INDEL_TOLERANCE ; k+=3)
- if(!max_indel_recorder[k+3])break;
- * indel_in_p1 = max_indel_recorder[k+2];
-
- for(k=0; k<MAX_INDEL_TOLERANCE ; k+=3)
- if(!vote->indel_recorder[i][j][k+3])break;
- * indel_in_p2 = vote->indel_recorder[i][j][k+2];
-
-
- * is_reversed_halves = is_reversed;
-
- if (test_vote_value >=100)
- *half_marks = (*half_marks) | IS_PAIRED_HINTED;
- else
- *half_marks = (*half_marks) & ~(IS_PAIRED_HINTED);
-
- if (is_fusion)
- *half_marks = (*half_marks) | IS_FUSION;
- else
- *half_marks = (*half_marks) & ~( IS_FUSION);
-
-
- selected_max_votes = test_vote_value;
-
- }
- *best_select_max_votes = selected_max_votes ;
- return best_splicing_point;
-}
-
-
-
-int core_select_best_matching_halves(global_context_t * global_context , gene_vote_t * vote, unsigned int * best_pos1, unsigned int * best_pos2, int * best_vote1, int * best_vote2, char * is_abnormal, short * half_marks, int * is_reversed_halves, float accept_rate, int read_len, long long int hint_pos, int tolerable_bases, short * read_coverage_start, short * read_coverage_end, char * indel_in_p1, char * indel_in_p2 , int * max_cover_start, int * max_cover_end, int rl, int repeated_pos_b [...]
-{
- unsigned int tmp_best_pos1=0, tmp_best_pos2=0;
- int tmp_best_vote1=0, tmp_best_vote2=0, tmp_is_reversed_halves=0;
- char tmp_is_abnormal=0, tmp_indel_in_p1=0, tmp_indel_in_p2=0;
- short tmp_half_marks=0, tmp_read_coverage_start=0, tmp_read_coverage_end=0;
- int ret = 0, best_ret = 0;
-
- int i,j;
- int test_select_votes=-1, best_select_votes = 1000000;
- //int max_minor = 0;
-
- /*
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for(j=0; j< vote->items[i]; j++)
- {
- if(vote->votes[i][j] < vote->max_vote)continue;
- int ii,jj;
- for (ii=0; ii<GENE_VOTE_TABLE_SIZE;ii++)
- for(jj=0; jj< vote->items[ii]; jj++)
- {
- if(max_minor >= vote->votes[ii][jj]) continue;
- if(ii==i && jj==j)continue;
- long long int dist = vote->pos[ii][jj];
- dist =abs(dist - vote->pos[i][j]);
- if(dist > 500000)
- continue;
- max_minor = vote->votes[ii][jj];
- }
-
- }
-
- int encountered = 0;
-
-
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for(j=0; j< vote->items[i]; j++)
- {
- if(vote->votes[i][j] < vote->max_vote)continue;
- int ii,jj;
- for (ii=0; ii<GENE_VOTE_TABLE_SIZE;ii++)
- for(jj=0; jj< vote->items[ii]; jj++)
- {
- if(max_minor != vote->votes[ii][jj]) continue;
- if(ii==i && jj==j)continue;
- long long int dist = vote->pos[ii][jj];
- dist =abs(dist - vote->pos[i][j]);
- if(dist > 500000)
- continue;
- encountered++;
- }
-
- }
- */
-
- int repeated_pos = repeated_pos_base;
- int offset_shifting = (rl > 220)?4:0;
- //int encounter = 0;
-
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for(j=0; j< vote->items[i]; j++)
- {
- /*if((vote->votes[i][j] >= vote->max_vote -1) && (vote->max_coverage_start >= vote-> coverage_start[i][j] - EXON_MAX_BIGMARGIN_OVERLAPPING ) && (vote->max_coverage_end <= vote-> coverage_end[i][j] + EXON_MAX_BIGMARGIN_OVERLAPPING))
- encounter++;*/
- if(repeated_pos_base>=0 && vote->pos[i][j]<=index_valid_range)
- if(vote->votes[i][j] >= vote->max_vote && repeated_pos < repeated_pos_base+12)
- {
- repeat_record[repeated_pos] = (vote-> coverage_start[i][j] >> offset_shifting);
- repeat_record[repeated_pos+1] = (vote-> coverage_end[i][j] >> offset_shifting);
- repeat_record[repeated_pos+2] = (is_negative?0x80:0) | (vote->votes[i][j]&0x7f);
- repeated_pos+=3;
- }
- }
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for(j=0; j< vote->items[i]; j++)
- {
- if(repeated_pos_base>=0 && vote->pos[i][j]<=index_valid_range)
- if(vote->votes[i][j] == vote->max_vote -1 && repeated_pos < repeated_pos_base+12)
- {
- repeat_record[repeated_pos] = (vote-> coverage_start[i][j] >> offset_shifting);
- repeat_record[repeated_pos+1] = (vote-> coverage_end[i][j] >> offset_shifting);
- repeat_record[repeated_pos+2] = (is_negative?0x80:0) | (vote->votes[i][j]&0x7f);
- repeated_pos+=3;
- }
- }
-
-
- /*
- if(encounter>=2)
- return 0;
- */
-
- ret = core_select_best_matching_halves_maxone(global_context, vote, &tmp_best_pos1, &tmp_best_pos2, &tmp_best_vote1, &tmp_best_vote2, &tmp_is_abnormal,&tmp_half_marks, &tmp_is_reversed_halves, accept_rate, read_len, hint_pos, tolerable_bases, &tmp_read_coverage_start, &tmp_read_coverage_end, &tmp_indel_in_p1, &tmp_indel_in_p2, vote -> max_position, vote->max_vote, vote-> max_coverage_start, vote-> max_coverage_end, vote-> max_mask, vote->max_indel_recorder, &test_select_votes, rl);
- test_select_votes += vote->max_vote*1000000;
- //SUBREADprintf("TSV=%d\n",test_select_votes);
-
- if(test_select_votes > best_select_votes)
- {
- best_select_votes = test_select_votes;
- *best_pos1 = tmp_best_pos1;
- *best_pos2 = tmp_best_pos2;
- *is_reversed_halves= tmp_is_reversed_halves;
-
- *best_vote1 = tmp_best_vote1;
- *best_vote2 = tmp_best_vote2;
- *is_abnormal = tmp_is_abnormal;
- *indel_in_p1 = tmp_indel_in_p1;
- *indel_in_p2 = tmp_indel_in_p2;
-
- *half_marks = tmp_half_marks;
- *read_coverage_start = tmp_read_coverage_start;
- *read_coverage_end = tmp_read_coverage_end;
-
- * max_cover_start = vote-> max_coverage_start;
- * max_cover_end = vote-> max_coverage_end;
- best_ret = ret;
- }
- return best_ret;
-}
-
-
-
-#define EXON_DONOR_TEST_WINDOW 17
-
-
-// pos1 must be small than pos2.
-int core13_test_donor(char *read, int read_len, unsigned int pos1, unsigned int pos2, int guess_break_point, char negative_strand, int test_range, char is_soft_condition, int EXON_INDEL_TOLERANCE, int* real_break_point, gene_value_index_t * my_value_array_index, int indel_offset1, int indel_offset2, int is_reversed, int space_type, int * best_donor_score, int * is_GTAG)
-{
- int bps_pos_x;
- int search_start = guess_break_point - test_range ;
- int search_end = guess_break_point + test_range ;
- char h1_2ch[3], h2_2ch[3];
-
- h1_2ch[2] = h2_2ch[2]=0;
- search_start=max(10, search_start);
- search_end = min(read_len-10, search_end);
- int best_break = -1;
- int min_x = -9099;
-
- for (bps_pos_x = search_start; bps_pos_x < search_end ; bps_pos_x ++)
- {
- int paired_score = 0;
- get_chro_2base(h1_2ch, my_value_array_index, pos1 - indel_offset1+ bps_pos_x , is_reversed);
- get_chro_2base(h2_2ch, my_value_array_index, pos2 - 2 - indel_offset2 + bps_pos_x, is_reversed);
-
-
- //if(!is_reversed)
- //SUBREADprintf("C1=%s @%u, C2=%s @%u\n",h1_2ch, pos1 + bps_pos_x, h2_2ch,pos2 - 2 + indel_offset + bps_pos_x);
- if(h1_2ch[0]==h2_2ch[0] && h1_2ch[1]==h2_2ch[1]) continue;
-
- if(is_donar_chars_part(h1_2ch) && is_donar_chars_part(h2_2ch))
- {
-
- paired_score = paired_chars_part(h1_2ch, h2_2ch, is_reversed);
-
- if(paired_score)
- {
- int m1, m2, x1, x2;
- int break_point_half = is_reversed?(read_len - bps_pos_x):bps_pos_x;
- int first_exon_end,second_half_start;
- int donar_conf_len = 0;
-
- donar_conf_len = min(break_point_half , EXON_DONOR_TEST_WINDOW);
- donar_conf_len = min(read_len - break_point_half, donar_conf_len);
- //SUBREADprintf("DONOR_CONF_LEN=%d\n", donar_conf_len);
-
- if (is_reversed)
- {
- first_exon_end = pos2 + bps_pos_x - indel_offset2;
- second_half_start = pos1 + bps_pos_x- indel_offset1;
-
- m1 = match_chro(read + break_point_half - donar_conf_len , my_value_array_index, first_exon_end, donar_conf_len, is_reversed, space_type);
- m2 = match_chro(read + break_point_half , my_value_array_index, second_half_start-donar_conf_len , donar_conf_len, is_reversed, space_type);
-
- x1 = match_chro(read + break_point_half , my_value_array_index, first_exon_end - donar_conf_len, donar_conf_len , is_reversed, space_type);
- x2 = match_chro(read + break_point_half - donar_conf_len , my_value_array_index, second_half_start , donar_conf_len, is_reversed, space_type);
- }
- else
- {
- first_exon_end = pos1 + bps_pos_x - indel_offset1;
- second_half_start = pos2 + bps_pos_x - indel_offset2;
-
- m1 = match_chro(read + break_point_half - donar_conf_len, my_value_array_index, first_exon_end-donar_conf_len , donar_conf_len, is_reversed, space_type);
- m2 = match_chro(read + break_point_half , my_value_array_index, second_half_start, donar_conf_len, is_reversed, space_type);
-
- x1 = match_chro(read + break_point_half , my_value_array_index, first_exon_end, donar_conf_len , is_reversed,space_type);
- x2 = match_chro(read + break_point_half - donar_conf_len, my_value_array_index, second_half_start - donar_conf_len, donar_conf_len , is_reversed,space_type);
- }
-
- #ifdef TEST_TARGET
- if(memcmp(read, TEST_TARGET, 15)==0)
- {
- SUBREADprintf("DONOR TEST STR=%s, %s ; pos=%d %d %d ; M=%d %d ; X=%d %d\n", h1_2ch, h2_2ch, bps_pos_x, indel_offset1, indel_offset2, m1, m2, x1, x2);
- }
- #endif
-
- int threshold = 3;
- if (paired_score == 1)
- threshold = 3;
-
- #ifdef QUALITY_KILL
- if (m1 >= donar_conf_len-1 && m2>=donar_conf_len-1 )
- if(x1<donar_conf_len - threshold && x2<donar_conf_len- threshold )
- #else
- if (m1 >= donar_conf_len-1 && m2>=donar_conf_len -1)
- if(x1<donar_conf_len - threshold && x2<donar_conf_len - threshold)
- #endif
- {
- int score = 3000-(x1 + x2) + (m1+ m2) ;
- if (min_x < score)
- {
- min_x = score;
- best_break = bps_pos_x;
- *is_GTAG = 1==((is_reversed) + (h1_2ch[0]=='G' || h1_2ch[1]=='G')); //"GT" or "AG"
- //printf("FL CC=%s\tCC2=%s\tis_GTAG=%d\tREV=%d\n",h1_2ch,h2_2ch,*is_GTAG, is_reversed);
- *best_donor_score = score;
- }
- }
- }
- }
- }
-
- if (best_break>0)
- {
- #ifdef TEST_TARGET
- if(memcmp(read, TEST_TARGET, 15)==0)
- SUBREADprintf("SELECRED!!!_BREAKPOINT=%d, RAW POS=%u,%u, R=%s\n", best_break, pos1 , pos2, read);
- #endif
- //SUBREADprintf ("FINAL BREAK: %d ; REV = %d\n ", best_break, is_reversed);
- *real_break_point = best_break;
- return 1;
- }
- else
- {
- #ifdef TEST_TARGET
- if(memcmp(read, TEST_TARGET, 15)==0)
- SUBREADprintf("KILLED!!!_BREAKPOINT=%d, R=%s\n", best_break+ pos1, read);
- #endif
- }
- return 0;
-}
-
-
-
-
-
-
-#define EXON_LARGE_WINDOW 60
-#define ACCEPTED_SUPPORT_RATE 0.3
-
-void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1)
-{
- int windows = full_rl / EXON_LARGE_WINDOW +1;
- float overlap = (1.0*windows * EXON_LARGE_WINDOW - full_rl) / (windows-1);
-
- int ww;
- int window_cursor = 0;
-
- HashTable * event_table = NULL;
- chromosome_event_t * event_space = NULL;
- if(thread_context)
- {
- event_table = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
- else
- {
- event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
- event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
- }
-
-
-
- for(ww=0; ww<windows;ww++)
- {
- window_cursor = (int)(ww * EXON_LARGE_WINDOW - ww * overlap);
- int read_len = EXON_LARGE_WINDOW;
- if(ww == windows-1)
- read_len = full_rl -window_cursor;
-
- float subread_step = 3.00001;
- int i;
- int subread_no;
- char * InBuff;
- InBuff = read + window_cursor;
- char tmp_char = InBuff[read_len];
- InBuff[read_len] = 0;
-
- init_gene_vote(vote_p1);
- for(subread_no=0; ; subread_no++)
- {
- int subread_offset1 = (int)(subread_step * (subread_no+1));
- subread_offset1 -= subread_offset1%GENE_SLIDING_STEP;
- subread_offset1 += GENE_SLIDING_STEP-1;
-
- for(i=0; i<GENE_SLIDING_STEP ; i++)
- {
- int subread_offset = (int)(subread_step * subread_no);
- subread_offset -= subread_offset%GENE_SLIDING_STEP -i;
-
- char * subread_string = InBuff + subread_offset;
- gehash_key_t subread_integer = genekey2int(subread_string, color_space);
-
- gehash_go_q(global_context->current_index, subread_integer , subread_offset, read_len,negative_strand, vote_p1, 1, 1, 21.9, 24, 5, subread_no, low_border, high_border - read_len);
- }
- if(subread_offset1 >= read_len -16)
- break;
- }
-
-
- if(1)
- {
- finalise_vote(vote_p1);
- select_best_vote(vote_p1);
- //print_votes(vote_p1, global_context -> config.index_prefix);
- unsigned int best_pos1=0;
- unsigned int best_pos2=0;
- int best_vote1=0;
- int best_vote2=0;
- char is_abnormal=0;
- short half_marks=0;
- int is_reversed_halves=0, max_cover_start=0, max_cover_end=0;
- char indel_in_p1=0, indel_in_p2=0;
- short read_coverage_start =0, read_coverage_end=0;
- gene_value_index_t * base_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
-
- int splice_point = core_select_best_matching_halves(global_context, vote_p1, &best_pos1, &best_pos2, &best_vote1, &best_vote2, &is_abnormal ,&half_marks, &is_reversed_halves, ACCEPTED_SUPPORT_RATE, read_len, -1, 0, &read_coverage_start, &read_coverage_end, &indel_in_p1, &indel_in_p2, &max_cover_start, &max_cover_end, read_len, -1 , 0, NULL , 0xffffffff);
-
- //printf("SP=%d; BV=%d; BV2=%d\n", splice_point, best_vote1, best_vote2);
- if (splice_point>0 && best_vote1 >= 1 && best_vote2>=1)
- {
- int test_real_break_point = -1, test_donor_score=-1;
- int is_GTAG = 0;
- int is_accepted = core13_test_donor(InBuff, read_len, min(best_pos1, best_pos2), max(best_pos1,best_pos2), splice_point, negative_strand, read_len/4, 0, 5, &test_real_break_point, base_index, 0, 0, negative_strand, color_space, &test_donor_score, &is_GTAG);
-
- if (is_accepted ){
- unsigned int pos_small = min(test_real_break_point+ best_pos1, test_real_break_point+ best_pos2) - 1;
- unsigned int pos_big = max(test_real_break_point+ best_pos1, test_real_break_point+ best_pos2);
-
- int event_no;
- chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- chromosome_event_t * found = NULL;
-
- int found_events = search_event(global_context, event_table, event_space, pos_small , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
-
- if(found_events)
- {
- int kx1;
- for(kx1 = 0; kx1 < found_events ; kx1++)
- {
- if(search_return[kx1] -> event_large_side == pos_big)
- {
- found = search_return[kx1];
- break;
- }
- }
- }
-
- if(found) found -> supporting_reads ++;
- else
- {
- if(thread_context)
- event_no = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> total_events ++;
- else
- event_no = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> total_events ++;
-
- event_space = reallocate_event_space(global_context, thread_context, event_no);
-
- chromosome_event_t * new_event = event_space+event_no;
- memset(new_event,0,sizeof(chromosome_event_t));
- new_event -> event_small_side = pos_small;
- new_event -> event_large_side = pos_big;
-
- new_event -> is_negative_strand= !is_GTAG;
- new_event -> event_type = CHRO_EVENT_TYPE_JUNCTION;
-
- new_event -> supporting_reads = 1;
- new_event -> indel_length = 0;
-
- put_new_event(event_table, new_event , event_no);
- //printf("ADD JUNCTION BY FRAGILE, %d-%d\n", pos_small, pos_big);
- }
-
-
- }
-
- }
- }
- InBuff[read_len] = tmp_char;
- }
-}
-
-
diff --git a/src/core-junction.c b/src/core-junction.c
index 0bba398..cf4dfb4 100644
--- a/src/core-junction.c
+++ b/src/core-junction.c
@@ -30,8 +30,14 @@
#include "core.h"
#include "core-indel.h"
#include "core-junction.h"
+#include "core-bigtable.h"
-#define TTTSNAME "V0112_0155:7:1101:14820:2862"
+#define TTTSNAME "V0112_0155:7:1308:1308:136442"
+
+unsigned int abs32uint(unsigned int x){
+ if(x > 0x7fffffff) x = (0xffffffff - x) + 1;
+ return x;
+}
int localPointerCmp_forbed(const void *pointer1, const void *pointer2)
{
@@ -69,6 +75,7 @@ typedef struct{
short piece_main_read_quality;
short piece_minor_hamming_match;
short piece_minor_read_quality;
+ int piece_minor_score;
short intron_length;
gene_vote_number_t *piece_main_indel_record;
@@ -86,13 +93,14 @@ typedef struct{
char is_strand_jumped;
char is_break_even;
- unsigned long long int Score_H;
- unsigned long long int Score_L;
+ //unsigned long long int Score_H;
+ //unsigned long long int Score_L;
} select_junction_record_t;
+
// read_head_abs_pos is the offset of the FIRST WANTED base.
-void search_events_to_front(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_head_abs_offset, short remainder_len, short sofar_matched, int suggested_movement)
+void search_events_to_front(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_head_abs_offset, short remainder_len, short sofar_matched, int suggested_movement, int do_not_jump)
{
short tested_read_pos;
@@ -122,11 +130,16 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
// tested_read_pos is the index of the first base unwanted!
- int move_start = global_context -> config.realignment_minimum_variant_distance;
+ int move_start = do_not_jump?0:global_context -> config.realignment_minimum_variant_distance;
if(suggested_movement) move_start = suggested_movement-1;
int is_junction_scanned = 0;
- if((global_context -> config.do_fusion_detection|| there_are_events_in_range(event_table->appendix1, read_head_abs_offset + 15, remainder_len - 15 )) &&
+ if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278", explain_context -> read_name) == 0)
+ {
+ SUBREADprintf("EVENT MAY HAVE FRONT=%d\t%d > %d\tPAIR_NO=%llu\n\nSCAN_START=%d\n", there_are_events_in_range(event_table->appendix1, read_head_abs_offset , remainder_len ), MAX_EVENTS_IN_READ-1, explain_context -> tmp_search_sections, explain_context -> pair_number, move_start);
+ }
+
+ if((global_context -> config.do_fusion_detection|| there_are_events_in_range(event_table->appendix1, read_head_abs_offset, remainder_len)) &&
MAX_EVENTS_IN_READ - 1 > explain_context -> tmp_search_sections)
for(tested_read_pos = move_start ; tested_read_pos <= remainder_len; tested_read_pos++)
{
@@ -141,18 +154,17 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
else
potential_event_pos = read_head_abs_offset + tested_read_pos -1;
+ int search_types = CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION;
+ int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , search_types , site_events);
- int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION , site_events);
- //#warning " ========== COMMENT THIS BLOCK =============="
- /*{
- printf("FOUND THE EVENT FRONT:%d at %u\n", site_events_no, potential_event_pos);
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ {
+ SUBREADprintf("FOUND THE EVENT FRONT:%d at %u\n", site_events_no, potential_event_pos);
if(site_events_no)
- printf("EVENT0_type = %d\n", site_events[0]->event_type);
- }*/
+ SUBREADprintf("EVENT0_type = %d\n", site_events[0]->event_type);
+ }
- //if(explain_context -> pair_number == 999999)
- // printf("FF OFFSET=%d; LEDGE=%u; FOUND=%d\n", tested_read_pos, potential_event_pos, site_events_no);
if(!site_events_no)continue;
unsigned int tested_chro_begin;
@@ -163,19 +175,29 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
matched_bases_to_site = match_chro(read_text, value_index, tested_chro_begin, tested_read_pos, explain_context -> current_is_strand_jumped, global_context -> config.space_type);
- //#warning "========= COMMENT TWO LINES ===================="
- //SUBREADprintf("MBASETOSITE=%d, tested_read_pos=%d\n", matched_bases_to_site, tested_read_pos);
- //SUBREADprintf("TXT=%s, tested_read_pos=%d\n", read_text, tested_chro_begin);
+ /*
+ #warning "========= COMMENT TWO LINES ===================="
+ SUBREADprintf("MBASETOSITE=%d, tested_read_pos=%d\n", matched_bases_to_site, tested_read_pos);
+ SUBREADprintf("TXT=%s, tested_read_pos=%d\n", read_text, tested_chro_begin);
+ */
int this_round_junction_scanned = 0;
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ SUBREADprintf("F_JUMP? match=%d / tested=%d\n", matched_bases_to_site , tested_read_pos);
- if(tested_read_pos >0 && (matched_bases_to_site)*10000/tested_read_pos > 9000)
+ //#warning "========= remove - 2000 from next line ============="
+ if(tested_read_pos >0 && ( matched_bases_to_site*10000/tested_read_pos > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
for(xk1 = 0; xk1 < site_events_no ; xk1++)
{
chromosome_event_t * tested_event = site_events[xk1];
- //if(explain_context -> pair_number == 23) printf("F_JUMP?%d > %d %s (%u) ; SEARCH_TAG=%u , EVENT=%u,%u\n", (1+matched_bases_to_site)*10000 / tested_read_pos , 9000, read_text, tested_chro_begin, potential_event_pos , tested_event -> event_small_side, tested_event -> event_large_side);
+ if(explain_context -> is_fully_covered && tested_event -> event_type == CHRO_EVENT_TYPE_FUSION && tested_event -> event_large_side - tested_event -> event_small_side > MAX_DELETION_LENGTH){
+ continue;
+ }
+ //if(explain_context -> pair_number == 23)
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ SUBREADprintf("F_JUMP?%d > %d %s (%u) ; SEARCH_TAG=%u , EVENT=%u,%u\n", (1+matched_bases_to_site)*10000 / tested_read_pos , 9000, read_text, tested_chro_begin, potential_event_pos , tested_event -> event_small_side, tested_event -> event_large_side);
// note that these two values are the index of the first wanted base.
unsigned int new_read_head_abs_offset;
@@ -201,11 +223,11 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
short new_remainder_len = remainder_len - tested_read_pos + min(0, tested_event->indel_length) - tested_event -> indel_at_junction;
- //int is_ambiguous = tested_event -> is_ambiguous;
- if(new_remainder_len>0)// && (new_remainder_len>8 || !is_ambiguous))
+ if(new_remainder_len>0)
{
//if(explain_context -> pair_number==2074) printf("JUMPPED IN!\n");
+
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_end = explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start + tested_read_pos;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].event_after_section = tested_event;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].is_connected_to_large_side = (potential_event_pos == tested_event -> event_large_side);
@@ -218,7 +240,7 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
int current_is_jumped = explain_context -> current_is_strand_jumped;
int current_sup_as_complex = explain_context -> tmp_min_support_as_complex;
int current_sup_as_simple = explain_context -> tmp_support_as_simple;
- int current_unsup_as_simple = explain_context -> tmp_min_unsupport;
+ //int current_unsup_as_simple = explain_context -> tmp_min_unsupport;
int current_pure_donor_found = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> tmp_support_as_simple = tested_event -> supporting_reads;
@@ -233,18 +255,22 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
explain_context -> tmp_search_sections ++;
+
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ SUBREADprintf("FRONT_ADD_EVENT : %s , %u ~ %u , INDELLEN=%d, TEST_READ_POS=%u, RPED=%u, ABSSTART=%u\n", explain_context -> read_name, tested_event -> event_small_side, tested_event -> event_large_side, tested_event -> indel_length, tested_read_pos, explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end, new_read_head_abs_offset);
+
//if(explain_context -> pair_number == 23){
//printf("JUMP_IN: %u ; STRAND=%c ; REMENDER=%d ; 0=%d 0=%d\n", new_read_head_abs_offset, tested_event -> is_strand_jumped?'X':'=', new_remainder_len, tested_event -> indel_length, tested_event -> indel_at_junction);
//}
//printf("SUGGEST_NEXT = %d (! %d)\n", tested_event -> connected_next_event_distance, tested_event -> connected_previous_event_distance);
- search_events_to_front(global_context, thread_context, explain_context, read_text + tested_event -> indel_at_junction + tested_read_pos - min(0, tested_event->indel_length), qual_text + tested_read_pos - min(0, tested_event->indel_length), new_read_head_abs_offset, new_remainder_len, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_next_event_distance);
+ search_events_to_front(global_context, thread_context, explain_context, read_text + tested_event -> indel_at_junction + tested_read_pos - min(0, tested_event->indel_length), qual_text + tested_read_pos - min(0, tested_event->indel_length), new_read_head_abs_offset, new_remainder_len, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_next_event_distance, 0);
explain_context -> tmp_search_sections --;
explain_context -> current_is_strand_jumped = current_is_jumped;
explain_context -> tmp_min_support_as_complex = current_sup_as_complex;
explain_context -> tmp_support_as_simple = current_sup_as_simple;
- explain_context -> tmp_min_unsupport = current_unsup_as_simple;
+ //explain_context -> tmp_min_unsupport = current_unsup_as_simple;
explain_context -> tmp_is_pure_donor_found_explain = current_pure_donor_found;
}
//if(global_context ->config.limited_tree_scan) break;
@@ -262,11 +288,15 @@ void search_events_to_front(global_context_t * global_context, thread_context_t
void new_explain_try_replace(global_context_t* global_context, thread_context_t * thread_context, explain_context_t * explain_context, int remainder_len, int search_to_back)
{
- int is_replace = 0;
+ int is_better_result = 0, is_same_best = 0;
+
+
+ if(0 && FIXLENstrcmp("R_chr901_166222_12M1D88M", explain_context -> read_name) == 0)
+ SUBREADprintf("TRY_REPLACE : MATCHED: BEST=%d, THIS=%d, IS_TO_BACK=%d, SECTIONS=%d, NEXT_EVENT[0]=%p, READ_LEN[0]=%d ~ %d\n", explain_context -> best_matching_bases , explain_context-> tmp_total_matched_bases, search_to_back, explain_context -> tmp_search_sections, explain_context -> tmp_search_junctions[0].event_after_section, explain_context -> tmp_search_junctions[0].read_pos_start, explain_context -> tmp_search_junctions[0].read_pos_end);
if(explain_context -> best_matching_bases < explain_context-> tmp_total_matched_bases)
{
- is_replace = 1;
+ is_better_result = 1;
explain_context -> best_is_complex = explain_context -> tmp_search_sections ;
explain_context -> is_currently_tie = 0;
explain_context -> best_support_as_simple = explain_context -> tmp_support_as_simple;
@@ -275,6 +305,7 @@ void new_explain_try_replace(global_context_t* global_context, thread_context_t
explain_context -> best_is_pure_donor_found_explain = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> second_best_matching_bases = max(explain_context -> second_best_matching_bases, explain_context -> best_matching_bases);
explain_context -> best_matching_bases = explain_context-> tmp_total_matched_bases ;
+
}
else if(explain_context -> best_matching_bases == explain_context-> tmp_total_matched_bases)
{
@@ -288,23 +319,28 @@ void new_explain_try_replace(global_context_t* global_context, thread_context_t
if(explain_context -> tmp_search_sections == 0)
{
if(explain_context -> tmp_min_unsupport >explain_context->best_min_support_as_complex){
- is_replace = 1;
+ is_better_result = 1;
explain_context->best_min_support_as_complex =explain_context -> tmp_min_unsupport;
explain_context -> best_is_pure_donor_found_explain = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> is_currently_tie = 0;
}
else if(explain_context -> tmp_min_unsupport == explain_context->best_min_support_as_complex)
+ {
explain_context -> is_currently_tie = 1;
+ is_same_best = 1;
+ }
}
else{
if(explain_context -> tmp_min_support_as_complex >explain_context->best_min_support_as_complex){
- is_replace = 1;
+ is_better_result = 1;
explain_context->best_min_support_as_complex =explain_context -> tmp_min_support_as_complex;
explain_context -> best_is_pure_donor_found_explain = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> is_currently_tie = 0;
}
- else if(explain_context -> tmp_min_support_as_complex == explain_context->best_min_support_as_complex)
+ else if(explain_context -> tmp_min_support_as_complex == explain_context->best_min_support_as_complex){
explain_context -> is_currently_tie = 1;
+ is_same_best = 1;
+ }
}
}
@@ -315,16 +351,17 @@ void new_explain_try_replace(global_context_t* global_context, thread_context_t
{
if(explain_context -> best_min_unsupport_as_simple >= explain_context -> best_support_as_simple+2)
{
- is_replace = 1;
+ is_better_result = 1;
explain_context -> best_min_support_as_complex = explain_context -> best_min_unsupport_as_simple;
explain_context -> best_is_pure_donor_found_explain = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> is_currently_tie = 0;
}
}
- else
+ //#warning "======= MAKE if(0) IS CORRECT BEFORE RELEASE ======"
+ else if(0)
if(explain_context -> best_min_unsupport_as_simple >= explain_context -> best_support_as_simple)
{
- is_replace = 1;
+ is_better_result = 1;
explain_context -> best_min_support_as_complex = explain_context -> best_min_unsupport_as_simple;
explain_context -> best_is_pure_donor_found_explain = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> is_currently_tie = 0;
@@ -333,29 +370,48 @@ void new_explain_try_replace(global_context_t* global_context, thread_context_t
}
else return;
- if(is_replace)
- {
-
+ if(is_better_result || is_same_best){
if(search_to_back){
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start = 0;
- explain_context -> back_search_confirmed_sections = explain_context -> tmp_search_sections +1;
- memcpy(explain_context -> back_search_junctions, explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
-
}else{
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_end = explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start + remainder_len;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].event_after_section = NULL;
+ }
+ }
- explain_context -> front_search_confirmed_sections = explain_context -> tmp_search_sections +1;
- memcpy(explain_context -> front_search_junctions, explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ SUBREADprintf("TRY_REPLACE_DESICION: BETTER=%d, SAME=%d\n", is_better_result, is_same_best);
+
+ if(is_better_result)
+ {
+ if(search_to_back){
+ explain_context -> all_back_alignments = 1;
+ explain_context -> result_back_junction_numbers[0] = explain_context -> tmp_search_sections +1;
+ memcpy(explain_context -> result_back_junctions[0], explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
+
+ }else{
+ explain_context -> all_front_alignments = 1;
+ explain_context -> result_front_junction_numbers[0] = explain_context -> tmp_search_sections +1;
+ memcpy(explain_context -> result_front_junctions[0], explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
}
+ }else if(is_same_best){
+ if(search_to_back && explain_context -> all_back_alignments < MAX_ALIGNMENT_PER_ANCHOR){
+ explain_context -> result_back_junction_numbers[explain_context -> all_back_alignments] = explain_context -> tmp_search_sections +1;
+ memcpy(explain_context -> result_back_junctions[explain_context -> all_back_alignments], explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
+ explain_context -> all_back_alignments ++;
+ }else if((!search_to_back) && explain_context -> all_front_alignments < MAX_ALIGNMENT_PER_ANCHOR){
+ explain_context -> result_front_junction_numbers[explain_context -> all_front_alignments] = explain_context -> tmp_search_sections +1;
+ memcpy(explain_context -> result_front_junctions[explain_context -> all_front_alignments], explain_context -> tmp_search_junctions , sizeof(perfect_section_in_read_t) * (explain_context -> tmp_search_sections +1));
+ explain_context -> all_front_alignments ++;
+ }
}
}
// read_tail_abs_offset is actually the offset of the base next to the last base in read tail.
// read_tail_pos is the FIRST UNWANTED BASE, after the read.
-void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movement)
+void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movement, int do_not_jump)
{
short tested_read_pos;
@@ -385,10 +441,17 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
int is_junction_scanned = 0;
// minimum perfect section length is 1
// tested_read_pos is the first WANTED BASE in section.
- int move_start = read_tail_pos - global_context -> config.realignment_minimum_variant_distance;
+ int move_start = read_tail_pos - (do_not_jump?0:global_context -> config.realignment_minimum_variant_distance);
if(suggested_movement) move_start = read_tail_pos - suggested_movement + 1;
- if(MAX_EVENTS_IN_READ - 1> explain_context -> tmp_search_sections && ( there_are_events_in_range(event_table -> appendix2, read_tail_abs_offset - read_tail_pos, read_tail_pos - 15)||global_context -> config.do_fusion_detection))
- for(tested_read_pos = read_tail_pos - global_context -> config.realignment_minimum_variant_distance; tested_read_pos >=0;tested_read_pos --)
+
+
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ {
+ SUBREADprintf("EVENT MAY HAVE BETWEEN (%u, %u) BACK=%d\t%d > %d\tPAIR_NO=%llu\nMOVE_START=%d\n", read_tail_abs_offset - read_tail_pos, read_tail_pos , there_are_events_in_range(event_table -> appendix2, read_tail_abs_offset - read_tail_pos, read_tail_pos), MAX_EVENTS_IN_READ-1, explain_context -> tmp_search_sections, explain_context -> pair_number, move_start);
+ }
+
+ if(MAX_EVENTS_IN_READ - 1> explain_context -> tmp_search_sections && ( there_are_events_in_range(event_table -> appendix2, read_tail_abs_offset - read_tail_pos, read_tail_pos)||global_context -> config.do_fusion_detection))
+ for(tested_read_pos = move_start; tested_read_pos >=0;tested_read_pos --)
{
int xk1, matched_bases_to_site;
int jump_penalty = 0;
@@ -402,17 +465,21 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
potential_event_pos = read_tail_abs_offset - ( read_tail_pos - tested_read_pos);
- int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION , site_events);
+ int search_types = CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION;
+ int site_events_no = search_event(global_context, event_table , event_space , potential_event_pos, event_search_method , search_types, site_events);
//if(explain_context -> pair_number==999999)
//printf("BF OFFSET=%d; READ_TAIL=%d; REDGE=%u; FOUND=%d\n", tested_read_pos, read_tail_pos, potential_event_pos, site_events_no);
- /*#warning "======= COMMENT THIS BLOCK ======="
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
{
- printf("FOUND THE EVENT BACK:%d at %u\n", site_events_no, potential_event_pos);
- if(site_events_no)
- printf("EVENT0_type = %d\n", site_events[0]->event_type);
- }*/
+ if(site_events_no) {
+ SUBREADprintf("FOUND THE EVENT BACK:%d at %u\t", site_events_no, potential_event_pos);
+ SUBREADprintf("EVENT0_type = %d\n", site_events[0]->event_type);
+ }else{
+ SUBREADprintf("NO EVENT BACK:%d at %u\n", site_events_no, potential_event_pos);
+ }
+ }
if(!site_events_no)continue;
@@ -424,15 +491,18 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
matched_bases_to_site = match_chro(read_text + tested_read_pos, value_index, tested_chro_begin , read_tail_pos - tested_read_pos, explain_context -> current_is_strand_jumped, global_context -> config.space_type);
- // if(explain_context->pair_number == 999999)
- // printf("B_JUMP?%d > %d TLEN=%d \n", (1+matched_bases_to_site)*10000 / (read_tail_pos - tested_read_pos) , 9000, read_tail_pos - tested_read_pos);
int this_round_junction_scanned = 0;
- if((read_tail_pos>tested_read_pos) && (matched_bases_to_site)*10000/(read_tail_pos - tested_read_pos) > 9000)
+ //#warning "========= remove - 2000 from next line ============="
+ if((read_tail_pos>tested_read_pos) && ( matched_bases_to_site*10000/(read_tail_pos - tested_read_pos) > 9000 - 2000 || global_context->config.maximise_sensitivity_indel) )
for(xk1 = 0; xk1 < site_events_no ; xk1++)
{
chromosome_event_t * tested_event = site_events[xk1];
+ if(explain_context -> is_fully_covered && tested_event -> event_type == CHRO_EVENT_TYPE_FUSION && tested_event -> event_large_side - tested_event -> event_small_side > MAX_DELETION_LENGTH){
+ continue;
+ }
+
if(global_context -> config.do_fusion_detection && tested_event -> event_type == CHRO_EVENT_TYPE_INDEL)
{
if(explain_context->current_is_strand_jumped){
@@ -447,6 +517,8 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
}
+ if(0 && strcmp("S_chr901_565784_72M8D28M", explain_context -> read_name) == 0)
+ SUBREADprintf("B_JUMP?%d > %d TLEN=%d \n", (1+matched_bases_to_site)*10000 / (read_tail_pos - tested_read_pos) , 9000, read_tail_pos - tested_read_pos);
// note that read_tail_pos is the first unwanted base.
int new_read_tail_pos = tested_read_pos;
@@ -465,10 +537,9 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
else
new_read_tail_abs_offset = tested_event -> event_small_side + 1;
- //int is_ambiguous = tested_event -> is_ambiguous;
new_read_tail_pos -= tested_event -> indel_at_junction;
- if(new_read_tail_pos>0)// && (new_read_tail_pos>8 || !is_ambiguous))
+ if(new_read_tail_pos>0)
{
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections].read_pos_start = tested_read_pos;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].event_after_section = tested_event;
@@ -476,13 +547,16 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end = tested_read_pos + min(0, tested_event->indel_length) - tested_event -> indel_at_junction;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].abs_offset_for_start = new_read_tail_abs_offset;
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) == 0)
+ SUBREADprintf("BACK_ADD_EVENT : %s , %u ~ %u , INDELLEN=%d, TEST_READ_POS=%u, RPED=%u, ABSSTART=%u\n", explain_context -> read_name, tested_event -> event_small_side, tested_event -> event_large_side, tested_event -> indel_length, tested_read_pos, explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].read_pos_end, new_read_tail_abs_offset);
+
if(tested_event->event_type == CHRO_EVENT_TYPE_FUSION) jump_penalty = 2;
//else if(tested_event->event_type == CHRO_EVENT_TYPE_JUNCTION) jump_penalty = 1;
int current_is_jumped = explain_context -> current_is_strand_jumped ;
int current_sup_as_complex = explain_context -> tmp_min_support_as_complex;
int current_sup_as_simple = explain_context -> tmp_support_as_simple;
- int current_unsup_as_simple = explain_context -> tmp_min_unsupport;
+ //int current_unsup_as_simple = explain_context -> tmp_min_unsupport;
int current_pure_donor_found = explain_context -> tmp_is_pure_donor_found_explain;
explain_context -> tmp_support_as_simple = tested_event -> supporting_reads;
@@ -494,14 +568,12 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
explain_context -> current_is_strand_jumped = !explain_context -> current_is_strand_jumped;
explain_context -> tmp_search_junctions[explain_context -> tmp_search_sections + 1].is_strand_jumped = explain_context -> current_is_strand_jumped;
-
-
//if(explain_context->pair_number == 999999)
// SUBREADprintf(" === %d ; js=%d ===>>>\n", explain_context -> tmp_search_sections, is_junction_scanned);
explain_context -> tmp_search_sections ++;
//printf("SUGGEST_PREV at %u = %d (! %d)\n", tested_event -> event_small_side, tested_event -> connected_previous_event_distance, tested_event -> connected_next_event_distance);
- search_events_to_back(global_context, thread_context, explain_context, read_text , qual_text, new_read_tail_abs_offset , new_read_tail_pos, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_previous_event_distance);
+ search_events_to_back(global_context, thread_context, explain_context, read_text , qual_text, new_read_tail_abs_offset , new_read_tail_pos, sofar_matched + matched_bases_to_site - jump_penalty, tested_event -> connected_previous_event_distance, 0);
explain_context -> tmp_search_sections --;
//if(explain_context->pair_number == 999999)
@@ -510,7 +582,7 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
explain_context -> current_is_strand_jumped = current_is_jumped;
explain_context -> tmp_min_support_as_complex = current_sup_as_complex;
explain_context -> tmp_support_as_simple = current_sup_as_simple;
- explain_context -> tmp_min_unsupport = current_unsup_as_simple;
+ //explain_context -> tmp_min_unsupport = current_unsup_as_simple;
explain_context -> tmp_is_pure_donor_found_explain = current_pure_donor_found;
}
//if(global_context ->config.limited_tree_scan) break;
@@ -528,33 +600,58 @@ void search_events_to_back(global_context_t * global_context, thread_context_t *
int init_junction_tables(global_context_t * context)
{
+ fraglist_init(&context -> funky_list_A);
+ fraglist_init(&context -> funky_list_DE);
+
+ bktable_init(&context -> funky_table_BC, FUNKY_COLOCATION_TOLERANCE * 2, 10000000);
+ bktable_init(&context -> funky_table_DE, FUNKY_COLOCATION_TOLERANCE * 2, 10000000);
+
+ bktable_init(&context -> breakpoint_table_P, 2 * context -> config.maximum_pair_distance, 1000000);
+ bktable_init(&context -> breakpoint_table_QR, 2 * BREAK_POINT_MAXIMUM_TOLERANCE, 1000000);
+ bktable_init(&context -> breakpoint_table_YZ, 2 * context -> config.maximum_pair_distance, 1000000);
+
+ bktable_init(&context -> translocation_result_table, 2*BREAK_POINT_MAXIMUM_TOLERANCE, 1000000);
+ bktable_init(&context -> inversion_result_table, 2*BREAK_POINT_MAXIMUM_TOLERANCE, 1000000);
return 0;
}
int destroy_junction_tables(global_context_t * context)
{
+ fraglist_destroy(&context -> funky_list_A);
+ fraglist_destroy(&context -> funky_list_DE);
+
+ bktable_destroy(&context -> funky_table_BC);
+ bktable_destroy(&context -> funky_table_DE);
+ bktable_destroy(&context -> breakpoint_table_P);
+ bktable_destroy(&context -> breakpoint_table_QR);
+ bktable_destroy(&context -> breakpoint_table_YZ);
+
+ HashTableIteration(context -> inversion_result_table.entry_table , bktable_free_ptrs);
+ bktable_destroy(&context -> inversion_result_table);
+
+ HashTableIteration(context -> translocation_result_table.entry_table , bktable_free_ptrs);
+ bktable_destroy(&context -> translocation_result_table);
+
return 0;
}
int init_junction_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task)
{
- return 0;
+ return 0;
}
int finalise_junction_thread(global_context_t * global_context, thread_context_t * thread_context, int task)
{
- return 0;
+
+ return 0;
}
-void insert_big_margin_record(global_context_t * global_context , unsigned char * big_margin_record, unsigned char votes, short read_pos_start, short read_pos_end, int read_len, int is_negative)
+void insert_big_margin_record(global_context_t * global_context , unsigned short * big_margin_record, unsigned char votes, short read_pos_start, short read_pos_end, int read_len, int is_negative)
{
- unsigned char read_pos_start_2 = (is_negative?read_len -read_pos_end:read_pos_start) ;
- unsigned char read_pos_end_2 = (is_negative?read_len -read_pos_start:read_pos_end);
- if(read_len>255)
- {
- read_pos_start_2>>=2;
- read_pos_end_2>>=2;
- }
+ if( global_context->config.big_margin_record_size<3) return;
+
+ unsigned short read_pos_start_2 = (is_negative?read_len -read_pos_end:read_pos_start) ;
+ unsigned short read_pos_end_2 = (is_negative?read_len -read_pos_start:read_pos_end);
int xk1;
for(xk1=0; xk1< global_context->config.big_margin_record_size / 3; xk1++)
@@ -573,765 +670,1061 @@ void insert_big_margin_record(global_context_t * global_context , unsigned char
}
}
-//#define voting_anchor_number 3
-void set_zero_votes(global_context_t * global_context, int pair_number, int is_second_read , int best_read_id)
+// This function try to add a new anchor into the list or replace an existing anchor by moving done the following anchors.
+// It is only invoked in the first step: select the best anchors. No minor half is considered at all.
+// It also makes if the current result is a tie score: if the last and current Vote+Coverage+Hamming+Qual are equal
+void do_append_inner(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, int * used_anchors, int total_anchors, select_junction_record_t * anchor_list, gene_vote_number_t Vote_major, int coverage_major_start, int coverage_major_end, int hamming_major, int quality_major, unsigned int pos_major, int flags, int read_len, gene_vote_number_t * indel_recorder)
{
- if(best_read_id >= global_context->config.multi_best_reads) return;
- _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id)->selected_votes = 0;
-}
-
+ int xx;
+ int replace_index = -1;
+ int i_am_break_even = 0;
+ if(0<*used_anchors)
+ {
+ for(xx=0; xx< *used_anchors;xx++){
+ select_junction_record_t * tanchor = anchor_list + xx;
-// bitmap score with number of votes:
-// is_paired_end << 63
-// R1_major_number_votes ( 6 bits ) << 57
-// R1_minor_number_votes ( 5 bits ) << 52
-// R2_major_number_votes ( 6 bits ) << 46
-// R2_minor_number_votes ( 5 bits ) << 41
+ if(Vote_major >tanchor -> piece_main_votes ||
+ (Vote_major ==tanchor -> piece_main_votes && coverage_major_end-coverage_major_start > tanchor -> piece_main_coverage_end-tanchor -> piece_main_coverage_start) ||
+ (Vote_major ==tanchor -> piece_main_votes && coverage_major_end-coverage_major_start ==tanchor -> piece_main_coverage_end-tanchor -> piece_main_coverage_start && hamming_major > tanchor -> piece_main_hamming_match) ||
+ (Vote_major ==tanchor -> piece_main_votes && coverage_major_end-coverage_major_start ==tanchor -> piece_main_coverage_end-tanchor -> piece_main_coverage_start && hamming_major ==tanchor -> piece_main_hamming_match && quality_major >= tanchor -> piece_main_read_quality))
+ {
+ if((Vote_major ==tanchor -> piece_main_votes && coverage_major_end-coverage_major_start ==tanchor -> piece_main_coverage_end-tanchor -> piece_main_coverage_start && hamming_major ==tanchor -> piece_main_hamming_match && quality_major == tanchor -> piece_main_read_quality))
+ {
+ // a tie
+ if(xx < total_anchors - 1)
+ replace_index = xx;
-void make_128bit_score(unsigned long long int * score_H, unsigned long long * score_L, int is_paired_end, short Vote_Anchor_Major, short Vote_Anchor_Minor , short Vote_Second_Major, short Vote_Second_Minor, short Span , short HammingMatch,short Quality, unsigned int TLEM, int Intron)
-{
- ( * score_L) = 0;
+ if(xx == 0)// the BEST anchor is a tie
+ {
+ tanchor -> is_break_even = 1;
- if(is_paired_end)
- ( * score_H) = 0x8000000000000000llu;
- else
- ( * score_H) = 0LLU;
+ int yy;
+ for(yy = 1; yy < *used_anchors; yy++)
+ {
+ select_junction_record_t * canchor = anchor_list + yy;
+ if((Vote_major ==canchor -> piece_main_votes && coverage_major_end-coverage_major_start ==canchor -> piece_main_coverage_end-canchor -> piece_main_coverage_start && hamming_major ==canchor -> piece_main_hamming_match && quality_major == canchor -> piece_main_read_quality))
+ canchor -> is_break_even = 1;
+ }
+ i_am_break_even = 1;
+ }
- ( * score_H) += (1LLU*Vote_Anchor_Major&0x3fff)<<49;
+ break;
+ }
+ else
+ {
+ // the current XX-th item is move down.
+ replace_index = xx;
+ if(xx == 0) // the BEST anchor is clearly not a tie
+ {
+ int yy;
+ for(yy = 0; yy < *used_anchors; yy++)
+ {
+ select_junction_record_t * canchor = anchor_list + yy;
+ canchor -> is_break_even = 0;
+ }
+ }
+ break;
+ }
+ }
- if(Vote_Anchor_Minor)
- ( * score_H) += (1LLU*Vote_Anchor_Minor&0x1ff)<<36;
+ if(replace_index < 0 && (*used_anchors) < total_anchors ) replace_index = (*used_anchors);
+ }
+ }else replace_index = 0;
- if(TLEM >= 0x80000 && TLEM < 0x1000000)
- TLEM = 0x80000 + (TLEM >> 8);
- else if(TLEM >= 0x1000000)
- TLEM = 0x90000 + (TLEM >> 16);
+ if(replace_index >= 0){
+ for(xx = (* used_anchors) - 1; xx >= replace_index ; xx--)
+ {
+ if(xx < total_anchors - 1)
+ memcpy(anchor_list + xx+1, anchor_list+xx, sizeof( select_junction_record_t ));
+ }
- TLEM = 0xfffff - TLEM;
+ int major_indels = 0;
- if(Vote_Second_Major)
- {
- ( * score_H) += (1LLU*Vote_Second_Major&0x3fff)<<22;
+ if(read_len > EXON_LONG_READ_LENGTH){
+ int kx1;
+ for(kx1=0; kx1<MAX_INDEL_SECTIONS; kx1++)
+ {
+ if(!indel_recorder[kx1*3]) break;
+ major_indels += indel_recorder[kx1*3+2];
+ }
+ }
- if(Vote_Second_Minor)
- ( * score_H) += (1LLU*Vote_Second_Minor&0x1ff)<<9;
+ select_junction_record_t * nanchor = anchor_list + replace_index;
+ memset(nanchor , 0 , sizeof( select_junction_record_t ));
+ nanchor -> is_break_even = i_am_break_even;
+ nanchor -> piece_main_votes = Vote_major;
+ nanchor -> piece_main_coverage_start = coverage_major_start;
+ nanchor -> piece_main_coverage_end = coverage_major_end;
+ nanchor -> piece_main_hamming_match = hamming_major;
+ nanchor -> piece_main_read_quality = quality_major;
+ nanchor -> piece_main_abs_offset = pos_major;
+ nanchor -> piece_main_masks = flags;
+ nanchor -> piece_main_indels = major_indels;
+ nanchor -> piece_main_indel_record = indel_recorder;
+
+ if( * used_anchors < total_anchors) (*used_anchors) ++;
}
+}
- ( * score_H) += 0x1ff & (Span >> 3);
- ( * score_L) += (1LLU * (Span & 0x7)) << 61;
-
- if(HammingMatch)
- ( * score_L) += (1LLU*HammingMatch & 0xfff) << 49;
+int is_PE_distance(global_context_t * global_context, unsigned int pos1, unsigned int pos2, int rlen1, int rlen2, int is_negative_R1, int is_negative_R2)
+{
+ long long int dist = pos2;
+ dist -= pos1;
- if(Quality)
- ( * score_L) += (1LLU*Quality & 0x1ff) << 40;
+ is_negative_R1 = (is_negative_R1>0)?1:0;
+ is_negative_R2 = (is_negative_R2>0)?1:0;
- ( * score_L) += (1LLU * TLEM & 0xfffff) << 20;
+ if(pos1 > pos2) dist -= rlen1;
+ else if(pos1 < pos2) dist += rlen2;
+ else dist += max(rlen2, rlen1);
-
- if(Intron >= 0x80000 && Intron < 0x1000000)
- Intron = ((Intron >> 8) + 0x80000);
- else if(Intron >= 0x1000000)
- Intron = ((Intron >> 16) + 0x90000);
+ if(abs(dist) > global_context->config.maximum_pair_distance || abs(dist)<global_context->config.minimum_pair_distance) return 0;
- ( * score_L) += (0xfffff - Intron);
+ if(is_negative_R1 != is_negative_R2) return 0;
+ if(pos1 > pos2 && !is_negative_R1) return 0;
+ if(pos1 < pos2 && is_negative_R1) return 0;
+ return 1;
}
-int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, int pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads)
+
+#define MAX_VOTE_TOLERANCE 1
+//returns 1 if the vote number is not significantly higher than the vote numbers in the vote list.
+int test_small_minor_votes(global_context_t * global_context, int minor_i, int minor_j, int major_i, int major_j , gene_vote_t * votes, int read_len)
{
- int i, j, kx1;
- int vote_tmp_1 = 1;
- if (global_context -> config.max_insertion_at_junctions) vote_tmp_1 = 4;
- if (global_context -> config.do_fusion_detection) vote_tmp_1 = 4;
- int voting_anchor_number = global_context -> input_reads.is_paired_end_reads?max(3,global_context -> config.multi_best_reads):(global_context -> config.is_rna_seq_reads?max(vote_tmp_1,global_context -> config.multi_best_reads):global_context -> config.multi_best_reads);
+ int is_small_margin_minor = 0;
+ long long dist = votes -> pos[minor_i][minor_j];
+ dist -= votes -> pos[major_i][major_j];
+
+ if(abs(dist)> global_context->config.maximum_intron_length)
+ {
+ int iii, jjj;
+ for(iii=0; iii<GENE_VOTE_TABLE_SIZE; iii++)
+ {
+ for(jjj = 0; jjj < votes->items[iii]; jjj++)
+ {
+ if(iii == minor_i && jjj == minor_j) continue;
+ // "2" is the tolerance.
+ if(votes -> votes[minor_i][minor_j] - votes -> votes[iii][jjj] >=1) continue;
+
+ int minor_coverage_start = votes -> coverage_start[minor_i][minor_j] ;
+ int minor_coverage_end = votes -> coverage_end[minor_i][minor_j] ;
+
+ int other_coverage_start = votes -> coverage_start[iii][jjj];
+ int other_coverage_end = votes -> coverage_end[iii][jjj];
+
+ int minor_negative = votes -> masks[minor_i][minor_j] & IS_NEGATIVE_STRAND;
+ int other_negative = votes -> masks[iii][jjj] & IS_NEGATIVE_STRAND;
+
+ if(minor_negative) {
+ int ttt = read_len - minor_coverage_end;
+ minor_coverage_end = read_len - minor_coverage_start;
+ minor_coverage_start = ttt;
+ }
+
+ if(other_negative){
+ int ttt = read_len - other_coverage_end;
+ other_coverage_end = read_len - other_coverage_start;
+ other_coverage_start = ttt;
+ }
- // each read nominates at most five anchors
- // the base combination of the two anchors is selected.
+ if(abs(minor_coverage_end - other_coverage_end) < 7 && abs(minor_coverage_start - other_coverage_start)<7)
+ is_small_margin_minor = 1;
- select_junction_record_t read_1_anchors[voting_anchor_number];
- select_junction_record_t read_2_anchors[voting_anchor_number];
- int used_anchors_1=0, used_anchors_2=0, is_anchor_1_breakeven = 0, is_anchor_2_breakeven = 0;
- memset(read_1_anchors, 0, sizeof(select_junction_record_t)*voting_anchor_number);
- memset(read_2_anchors, 0, sizeof(select_junction_record_t)*voting_anchor_number);
+ if(is_small_margin_minor) break;
+ }
+ if(is_small_margin_minor) break;
+ }
+ }
+ return is_small_margin_minor;
+}
- int is_second_read;
- int all_max_votes = vote_1->max_vote;
- if(global_context -> input_reads.is_paired_end_reads)
- all_max_votes = max(vote_2->max_vote, all_max_votes);
+// function test_junction_minor returns 1 if the current anchor and current_vote[i][j] are not good mates in terms of junction reads:
+// for example, if the distance is too far, if the coverered region overlapped or if the two mapped parts in the read are reversely arranged (expect in fusion detection)
+int test_junction_minor(global_context_t * global_context, thread_context_t * thread_context, gene_vote_t * votes, int vote_i, int vote_j, int i, int j, long long int dist)
+{
+ if(abs(dist)> global_context->config.maximum_intron_length) return 1;
+ if(votes -> coverage_start[vote_i][vote_j] == votes -> coverage_start[i][j])return 1;
+ if(votes -> coverage_end[vote_i][vote_j] == votes -> coverage_end[i][j])return 1;
- if(all_max_votes>=global_context-> config.minimum_subread_for_first_read)
+ if(votes -> coverage_start[vote_i][vote_j] > votes -> coverage_start[i][j])
{
+ if(votes -> pos[vote_i][vote_j] < votes -> pos[i][j])return 1;
+ }
+ else
+ {
+ if(votes -> pos[vote_i][vote_j] > votes -> pos[i][j])return 1;
+ }
- for(is_second_read = 0; is_second_read < 1+global_context -> input_reads.is_paired_end_reads; is_second_read++)
- {
- gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
- int current_max_votes = current_vote -> max_vote;
- int total_used_anchors;
- select_junction_record_t * current_anchors = is_second_read?read_2_anchors:read_1_anchors;
+ return 0;
+}
+
+void update_top_three(global_context_t * global_context, int * top_buffer_3i, int new_value){
+ if(new_value > top_buffer_3i[global_context -> config.top_scores - 1]){
+ int x1;
+ for(x1 = 0;x1 < global_context -> config.top_scores ; x1++){
+ if(new_value > top_buffer_3i[x1]){
+ int x2;
+ for(x2 = global_context -> config.top_scores - 1 ; x2 > x1 ; x2 --){
+ top_buffer_3i[x2] = top_buffer_3i[x2-1];
+ }
+ top_buffer_3i[x1] = new_value;
+ break;
+ }else if(new_value == top_buffer_3i[x1]) break;
+ }
+ }
+}
- int curr_read_len = is_second_read?read_len_2:read_len_1;
- char * curr_read_text = is_second_read?read_text_2:read_text_1;
- gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
- // put main_piece to anchors.
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- {
- for (j=0; j< current_vote->items[i]; j++)
- {
- if(current_vote -> votes[i][j] ==current_max_votes)
- {
- int target_addr = 0;
- int is_break_even ;
- int hamming_match = 0, quality_score = 0;
+int comb_sort_compare(void * Vcomb_buffer, int i, int j){
+ vote_combination_t * comb_buffer = (vote_combination_t *)Vcomb_buffer;
+ return comb_buffer[i].score_adj - comb_buffer[j].score_adj;
+}
- if(global_context -> config.use_hamming_distance_break_ties)
- hamming_match = match_chro_indel(curr_read_text, value_index , current_vote -> pos[i][j], curr_read_len, 0, global_context -> config.space_type, global_context -> config.max_indel_length, current_vote -> indel_recorder[i][j], global_context -> config.total_subreads);
- if(global_context -> config.use_quality_score_break_ties)
- quality_score = max(0,min(512,current_vote -> quality[i][j] / current_vote -> votes[i][j]-200));
+void comb_sort_exchange(void * Vcomb_buffer, int i, int j){
+ vote_combination_t * comb_buffer = (vote_combination_t *)Vcomb_buffer;
+ vote_combination_t tmpv;
+ memcpy(&tmpv, comb_buffer + i, sizeof(vote_combination_t));
+ memcpy(comb_buffer + i, comb_buffer + j, sizeof(vote_combination_t));
+ memcpy(comb_buffer + j, &tmpv, sizeof(vote_combination_t));
+}
- //printf("Q=%d\n", current_vote -> quality[i][j]);
+void comb_sort_merge(void * Vcomb_buffer, int start, int items, int items2){
+ vote_combination_t * comb_buffer = (vote_combination_t *)Vcomb_buffer;
+ vote_combination_t * merge_target = malloc(sizeof(vote_combination_t) * (items + items2));
- int main_piece_indels = 0;
+ int items1_cursor = start, items2_cursor = start + items, x1;
- if(curr_read_len > EXON_LONG_READ_LENGTH){
- for(kx1=0; kx1<MAX_INDEL_SECTIONS; kx1++)
- {
- if(!current_vote -> indel_recorder[i][j][kx1*3]) break;
- main_piece_indels += (current_vote -> indel_recorder[i][j][kx1*3+2]);
- }
- }
+ for(x1=0; x1 < items+items2; x1++){
+ int select_items_1 = (items1_cursor < items + start && comb_sort_compare(comb_buffer, items1_cursor, items2_cursor) <=0) || (items2_cursor == start + items + items2);
+ if(select_items_1){
+ memcpy(merge_target+x1, comb_buffer+items1_cursor, sizeof(vote_combination_t));
+ items1_cursor++;
+ }else{
+ memcpy(merge_target+x1, comb_buffer+items2_cursor, sizeof(vote_combination_t));
+ items2_cursor++;
+ }
- unsigned long long int test_score_L = 0, test_score_H = 0;
- //int test_score = 20000000* current_vote -> votes[i][j] + this_extra_scores + (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) - 100 * (main_piece_indels);
-
- make_128bit_score(&test_score_H, &test_score_L, 0, current_vote -> votes[i][j], 0, 0, 0, (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) , hamming_match, quality_score, 0xffffffff, 0xffffffff);
+ }
- for(target_addr =0; target_addr<voting_anchor_number; target_addr++)
- if((current_anchors[target_addr].Score_H < test_score_H || (current_anchors[target_addr].Score_H == test_score_H && current_anchors[target_addr].Score_L < test_score_L ))|| ( current_vote -> pos[i][j] < current_anchors[target_addr].piece_main_abs_offset && current_anchors[target_addr].Score_H == test_score_H && current_anchors[target_addr].Score_L == test_score_L)) break;
+ memcpy(comb_buffer + start, merge_target, (items+items2) * sizeof(vote_combination_t));
+ free(merge_target);
- is_break_even = 0;
- if(current_anchors[0].Score_H == test_score_H && current_anchors[0].Score_L == test_score_L)
- is_break_even = 1;
- else if(current_anchors[0].Score_H < test_score_H || (current_anchors[0].Score_H == test_score_H && current_anchors[0].Score_L < test_score_L))
- {
- if(is_second_read) is_anchor_2_breakeven = 0;
- else is_anchor_1_breakeven = 0;
- }
+}
- if(target_addr<voting_anchor_number-1)
- for(kx1=voting_anchor_number-1; kx1>target_addr; kx1--)
- memcpy(current_anchors+kx1, current_anchors+kx1-1, sizeof(select_junction_record_t));
+int is_better_inner(global_context_t * global_context, thread_context_t * thread_context, subjunc_result_t * junc_res, int old_intron_length, gene_vote_number_t Vote_minor, int coverage_minor_length, int intron)
+{
+ if( Vote_minor > junc_res -> minor_votes ||
+ (Vote_minor ==junc_res -> minor_votes && coverage_minor_length > junc_res -> minor_coverage_end - junc_res -> minor_coverage_start) ||
+ (Vote_minor ==junc_res -> minor_votes && coverage_minor_length ==junc_res -> minor_coverage_end - junc_res -> minor_coverage_start && intron < old_intron_length))
+ return 1;
+ else return 0;
+}
- if(target_addr<voting_anchor_number)
- {
- memset(¤t_anchors[target_addr], 0, sizeof(select_junction_record_t));
- current_anchors[target_addr].piece_main_abs_offset = current_vote -> pos[i][j];
- current_anchors[target_addr].piece_main_coverage_start = current_vote -> coverage_start[i][j];
- current_anchors[target_addr].piece_main_coverage_end = current_vote -> coverage_end[i][j];
- current_anchors[target_addr].piece_main_votes = current_vote -> votes[i][j];
- current_anchors[target_addr].piece_main_indel_record = current_vote -> indel_recorder[i][j] ;
- current_anchors[target_addr].piece_main_indels = main_piece_indels;
- current_anchors[target_addr].piece_main_masks = current_vote -> masks[i][j];
- current_anchors[target_addr].piece_main_read_quality = quality_score;
- current_anchors[target_addr].piece_main_hamming_match = hamming_match;
-
- if(global_context -> config.use_hamming_distance_in_exon)
- {
- int found_indels , found_inde_pos;
-
- int matchingness_count = match_indel_chro_to_front(curr_read_text, value_index, current_vote -> pos[i][j] , curr_read_len, &found_indels, &found_inde_pos, global_context -> config.max_indel_length, 0);
- if(matchingness_count*1000 >= curr_read_len*800)
- {
- current_anchors[target_addr].piece_main_coverage_start = 1;
- current_anchors[target_addr].piece_main_coverage_end = curr_read_len-1;
- }
+#define COVERAGE_STAB_NUMBER 100
+int test_fully_covered(global_context_t * global_context, gene_vote_t * vote, int read_length){
+ int i,j,xk1,xk2;
+ char local_strands[COVERAGE_STAB_NUMBER];
+ unsigned int local_locations[COVERAGE_STAB_NUMBER];
+ unsigned long long local_coverage[COVERAGE_STAB_NUMBER];
+ int used_stabs = 0;
- }
- current_anchors[target_addr].Score_H = test_score_H;
- current_anchors[target_addr].Score_L = test_score_L;
- }
- if(is_break_even)
+ for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
+ {
+ for (j=0; j< vote->items[i]; j++)
+ {
+ if(vote -> votes[i][j]>2 && used_stabs < COVERAGE_STAB_NUMBER)
+ {
+ int is_fresh = 1;
+ int is_negative = (vote -> masks[i][j] & IS_NEGATIVE_STRAND)?1:0;
+ for(xk1=0; xk1<used_stabs; xk1++){
+ if(local_strands[xk1] == is_negative){
+ long long dist = vote -> pos[i][j];
+ dist -= local_locations[xk1];
+ if(abs(dist) < MAX_DELETION_LENGTH)
{
- if(is_second_read) is_anchor_2_breakeven = 1;
- else is_anchor_1_breakeven = 1;
+ is_fresh=0;
+ break;
}
}
- if(current_vote -> votes[i][j] >=current_max_votes-2 && (global_context->config.do_big_margin_filtering_for_junctions || 1 || global_context->config.do_big_margin_filtering_for_reads))
- insert_big_margin_record(global_context, _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read) ,current_vote -> votes[i][j], current_vote -> coverage_start[i][j], current_vote -> coverage_end[i][j], is_second_read?read_len_2:read_len_1, is_negative_strand);
+ }
+
+ if(is_fresh){
+ local_strands[used_stabs]=is_negative;
+ local_locations[used_stabs]= vote -> pos[i][j];
+ local_coverage[used_stabs] = 0;
+ used_stabs++;
}
}
+ }
+ }
+ if(!used_stabs) return 0;
- for(kx1=0; kx1<voting_anchor_number; kx1++)
+ for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
+ {
+ for (j=0; j< vote->items[i]; j++)
+ {
+ if(vote -> votes[i][j]>=1)
{
- if(!current_anchors[kx1].piece_main_votes)break;
- //SUBREADprintf("%s : %s READ : POS=%u, V=%d, HAMMAT=%d\n", read_name_1, is_second_read?"2ND":"1ST", current_anchors[kx1].piece_main_abs_offset, current_anchors[kx1].piece_main_votes, current_anchors[kx1].piece_main_hamming_match);
+ int is_negative = (vote -> masks[i][j] & IS_NEGATIVE_STRAND)?1:0;
+ for(xk1=0; xk1<used_stabs; xk1++){
+ if(local_strands[xk1] == is_negative){
+ long long dist = vote -> pos[i][j];
+ dist -= local_locations[xk1];
+ if(abs(dist) < MAX_DELETION_LENGTH)
+ {
+ for(xk2 = vote -> coverage_start[i][j] * 64 / read_length; xk2 <=
+ vote -> coverage_end[i][j] * 64 / read_length; xk2++){
+ local_coverage[xk1] |= 1llu<<xk2;
+ }
+ }
+ }
+ }
}
- total_used_anchors = kx1;
+ }
+ }
- if(is_second_read)
- used_anchors_2 = total_used_anchors;
- else
- used_anchors_1 = total_used_anchors;
+ for(xk1=0; xk1<used_stabs; xk1++){
+ int covered = 0;
+ for(xk2 = 0; xk2<64; xk2++){
+ covered += ( local_coverage[xk1] & (1llu<<xk2) )?1:0;
+ }
+ //SUBREADprintf("COVERAGE LEVEL=%d\n", covered);
+ if(covered > 54){
+ return 1;
+ }
+ }
+ return 0;
+}
- for(kx1=0; kx1<total_used_anchors; kx1++)
- {
- select_junction_record_t * current_anchor = ¤t_anchors[kx1];
- //if((current_anchors[kx1].piece_main_coverage_end - current_anchors[kx1].piece_main_coverage_start)*10000 > curr_read_len * 8000)continue;
- current_anchor -> is_break_even = is_second_read?is_anchor_2_breakeven:is_anchor_1_breakeven;
+void copy_vote_to_alignment_res(global_context_t * global_context, thread_context_t * thread_context, mapping_result_t * align_res, subjunc_result_t * junc_res, gene_vote_t * current_vote, int vote_i, int vote_j, int curr_read_len, char * read_name, char * curr_read_text, int used_subreads_in_vote, int noninformative_subreads_in_vote, subread_read_number_t pair_number, int is_second_read, int * is_fully_covered)
+{
- if(global_context->config.is_rna_seq_reads || global_context->config.do_fusion_detection)
- {
- unsigned int max_score_L = current_anchor ->Score_L;
- unsigned long long int max_score_H = current_anchor ->Score_H;
+ align_res -> selected_position = current_vote -> pos[vote_i][vote_j];
+ align_res -> selected_votes = current_vote -> votes[vote_i][vote_j];
+ align_res -> indels_in_confident_coverage = indel_recorder_copy(align_res -> selected_indel_record, current_vote -> indel_recorder[vote_i][vote_j]);
+ align_res -> confident_coverage_end = current_vote -> coverage_end[vote_i][vote_j];
+ align_res -> confident_coverage_start = current_vote -> coverage_start[vote_i][vote_j];
+ align_res -> result_flags = (current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND)?(CORE_IS_NEGATIVE_STRAND):0;
+ align_res -> used_subreads_in_vote = used_subreads_in_vote;
+ align_res -> noninformative_subreads_in_vote = noninformative_subreads_in_vote;
+ align_res -> is_fully_covered = *is_fully_covered ;
- for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
- for (j=0; j< current_vote->items[i]; j++)
- {
- // no way: if(current_vote -> votes[i][j] > current_anchor->piece_main_votes) continue;
- if(current_vote -> votes[i][j] < current_anchor->piece_minor_votes) continue;
- //printf("JXK0Y USED=%d %u > %u\n", total_used_anchors, current_vote -> pos[i][j] , current_anchor->piece_main_abs_offset);
- if(current_vote -> votes[i][j] == current_anchor->piece_main_votes && current_vote -> pos[i][j] >= current_anchor->piece_main_abs_offset) continue;
- long long int dist = current_vote -> pos[i][j];
- dist -= current_anchor->piece_main_abs_offset;
-
- if(global_context->config.do_fusion_detection)
- {
- int is_small_margin_minor = 0;
- if(1 && abs(dist)> global_context->config.maximum_intron_length)
- {
- int iii, jjj;
- for(iii=0; iii<GENE_VOTE_TABLE_SIZE; iii++)
- {
- for(jjj = 0; jjj < current_vote->items[iii]; jjj++)
- {
- if(current_anchor->piece_main_abs_offset == current_vote -> pos[iii][jjj]) continue;
- if(current_vote->votes[i][j] - current_vote -> votes[iii][jjj] >=2) continue;
- long long int dist_mate = current_anchor->piece_main_abs_offset;
- dist_mate -= current_vote -> pos[iii][jjj];
- if(dist_mate< global_context->config.maximum_intron_length)
- is_small_margin_minor = 1;
- if(is_small_margin_minor) break;
- }
- if(is_small_margin_minor) break;
- }
- }
- if(is_small_margin_minor) continue;
- }
- else
- { // if it is junction detection, then remove long-distance halves and wrongly ordered halves.
- if(abs(dist)> global_context->config.maximum_intron_length) continue;
- if(current_anchor->piece_main_coverage_start == current_vote -> coverage_start[i][j])continue;
- if(current_anchor->piece_main_coverage_end == current_vote -> coverage_end[i][j])continue;
+ //insert_big_margin_record(global_context , _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read), align_res -> selected_votes, align_res -> confident_coverage_start, align_res -> confident_coverage_end, curr_read_len, (current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND)?1:0);
- if(current_anchor->piece_main_coverage_start > current_vote -> coverage_start[i][j])
- {
- if(current_anchor->piece_main_abs_offset < current_vote -> pos[i][j])continue;
- }
- else
- {
- if(current_anchor->piece_main_abs_offset > current_vote -> pos[i][j])continue;
- }
- }
+ if(global_context -> config.do_breakpoint_detection)
+ {
+ int i,j, current_piece_minor_score = 0;
- int is_strand_jumped = (current_anchor->piece_main_masks & IS_NEGATIVE_STRAND)!=(current_vote -> masks[i][j] & IS_NEGATIVE_STRAND);
- int minor_hamming_match = 0;
- if(global_context -> config.use_hamming_distance_break_ties)
- minor_hamming_match = match_chro_indel(curr_read_text, value_index , current_vote -> pos[i][j], curr_read_len, 0, global_context -> config.space_type, global_context -> config.max_indel_length, current_vote -> indel_recorder[i][j], global_context -> config.total_subreads);
+ // iterate all the anchors we have found in step 1:
+ for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
+ {
+ for (j=0; j< current_vote->items[i]; j++)
+ {
+ if(i == vote_i && j == vote_j) continue;
+ if(align_res -> selected_votes < current_vote -> votes[i][j]) continue; // major half must be the anchor
+
+ long long int dist = current_vote -> pos[vote_i][vote_j];
+ dist -= current_vote -> pos[i][j];
+
+ int is_strand_jumpped = (current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND)!=(current_vote -> masks[i][j] & IS_NEGATIVE_STRAND);
+ if(global_context->config.do_fusion_detection && (*is_fully_covered) && (dist > MAX_DELETION_LENGTH || is_strand_jumpped)) continue;
+
+ if(global_context->config.do_fusion_detection){
+ // function test_small_minor_votes returns 1 if the vote number is not significantly
+ // higher than the vote numbers in the vote list.
+ //#warning "=========== THE TWO LINES SHOULD BE UNCOMMENTED IN RELEASED VERSION ==== WE COMMENT IT FOR A BETTER FUSION SENSITIVITY BUT ONLY FOR TEST ==================="
+ if(1){
+ int small_minor_bigmargin = test_small_minor_votes(global_context , i, j, vote_i, vote_j, current_vote, curr_read_len);
+ if(small_minor_bigmargin) continue;
+ }
+ }else{
+ // function test_junction_minor returns 1 if the current anchor and current_vote[i][j]
+ // are not good mates in terms of junction reads:
+ //
+ // for example, if the distance is too far, if the coverered region overlapped or
+ // if the covered region has a wrong arrangement to their relative positions.
+ int test_minor_res = test_junction_minor(global_context, thread_context, current_vote, vote_i, vote_j, i, j, dist);
+ if(0 && FIXLENstrcmp("R002403247", read_name) == 0) {
+ char posout2[100];
+ char posout1[100];
+ absoffset_to_posstr(global_context, current_vote -> pos[vote_i][vote_j], posout1);
+ absoffset_to_posstr(global_context, current_vote -> pos[i][j], posout2);
+ SUBREADprintf("SMALL_MARGIN=%d at %s ~ %s\n", test_minor_res, posout1, posout2);
+ }
+ // SUBREADprintf("TMR=%d (V=%d)\n", test_minor_res, current_vote -> votes[i][j]);
+ if(test_minor_res)continue;
+ }
- int minor_read_quality = 0;
- if(global_context -> config.use_quality_score_break_ties)
- minor_read_quality = min(1000, current_vote -> quality[i][j] / current_vote -> votes[i][j]);
+ int is_better = is_better_inner(global_context, thread_context,
+ junc_res, abs32uint(current_vote -> pos[vote_i][vote_j] - junc_res -> minor_position), current_vote -> votes[i][j], current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j],
+ abs32uint(current_vote -> pos[vote_i][vote_j] - current_vote -> pos[i][j]));
- unsigned long long int new_score_H = 0, new_score_L = 0 ;
+ int replace_minor = 0, minor_indel_offset = 0, inserted_bases = 0, is_GT_AG_donors = 0, is_donor_found = 0, final_split_point = 0, major_indels = 0, small_side_increasing_coordinate = 0, large_side_increasing_coordinate = 0;
- make_128bit_score(&new_score_H, &new_score_L, 0, current_anchor->piece_main_votes , current_vote -> votes[i][j], 0, 0, (current_anchor->piece_main_coverage_end -current_anchor->piece_main_coverage_start) + (current_vote -> coverage_end[i][j] - current_vote -> coverage_start[i][j]) , current_anchors[kx1].piece_main_hamming_match + minor_hamming_match, current_anchors[kx1].piece_main_read_quality + minor_read_quality , 0xffffffff , global_context->config.report_multi_mapping_reads? [...]
+ if(0 && FIXLENstrcmp("R002403247", read_name) == 0)
+ {
+ char posout[100];
+ absoffset_to_posstr(global_context, current_vote -> pos[i][j], posout);
+ SUBREADprintf("IBT=%d (V=%d , OV=%d) at %s\n", is_better, current_vote -> votes[i][j], junc_res -> minor_votes, posout);
+ SUBREADprintf("IBT OLD_INTRON=%d, INTRON=%d\n", abs32uint(current_vote -> pos[vote_i][vote_j] - junc_res -> minor_position),
+ abs32uint(current_vote -> pos[vote_i][vote_j] - current_vote -> pos[i][j])
+ );
+ }
- //new_score = current_anchors[kx1].piece_main_extra_scores + max(500000-abs(dist),0) + current_anchor -> piece_main_votes * 20000000 + current_vote -> votes[i][j] * 20000000 + (current_anchor->piece_main_coverage_end - current_anchor->piece_main_coverage_start) - 100 * (current_anchors[kx1].piece_main_indels);
+ if(is_better){
+ // Determine the splicing point of the fusion or the junction
+ // If the splicing point is determined, then set replace_minor = 1
+ if(is_strand_jumpped){
+ int minor_cover_end_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_end[i][j]:(curr_read_len - current_vote -> coverage_start[i][j]);
+ int minor_cover_start_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_start[i][j]:(curr_read_len - current_vote -> coverage_end[i][j]);
+ int main_cover_end_as_reversed = (current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND)?current_vote -> coverage_end[vote_i][vote_j]:(curr_read_len - current_vote -> coverage_start[vote_i][vote_j]);
+ int main_cover_start_as_reversed = (current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND)?current_vote -> coverage_start[vote_i][vote_j]:(curr_read_len - current_vote -> coverage_end[vote_i][vote_j]);
- if(new_score_H > max_score_H||(new_score_H == max_score_H && new_score_L> max_score_L))
- {
- int final_split_point, is_GT_AG_donors, is_donor_found, inserted_bases = 0;
- int donors_found_score;
- int minor_indel_offset=0;
- if(is_strand_jumped)
- {
+ int overlapped ;
+ if(main_cover_start_as_reversed > minor_cover_start_as_reversed)
+ overlapped = minor_cover_end_as_reversed - main_cover_start_as_reversed;
+ else
+ overlapped = main_cover_end_as_reversed - minor_cover_start_as_reversed;
- // both guess_start and guess_end have to be translated to "reversed" read manner.
- //if(strcmp(read_name_1,"a4")==0)printf("JXK01 : %d\n", is_second_read);
+ if(overlapped > 14) continue;
- int minor_cover_end_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_end[i][j]:(curr_read_len - current_vote -> coverage_start[i][j]);
- int minor_cover_start_as_reversed = (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)? current_vote -> coverage_start[i][j]:(curr_read_len - current_vote -> coverage_end[i][j]);
- int main_cover_end_as_reversed = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].piece_main_coverage_end:(curr_read_len - current_anchors[kx1].piece_main_coverage_start);
- int main_cover_start_as_reversed = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].piece_main_coverage_start:(curr_read_len - current_anchors[kx1].piece_main_coverage_end);
- // no long overlap
- int overlapped ;
- if(main_cover_start_as_reversed > minor_cover_start_as_reversed)
- overlapped = minor_cover_end_as_reversed - main_cover_start_as_reversed;
- else
- overlapped = main_cover_end_as_reversed - minor_cover_start_as_reversed;
+ int guess_start_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
+ (minor_cover_end_as_reversed - 15): (main_cover_end_as_reversed - 15);
- if(overlapped > 14) continue;
- //if(strcmp(read_name_1,"a4")==0)printf("JXK02\n");
+ int guess_end_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
+ (main_cover_start_as_reversed + 15): (minor_cover_start_as_reversed + 15);
- int guess_start_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
- (minor_cover_end_as_reversed - 15): (main_cover_end_as_reversed - 15);
+ int is_small_half_negative = 0 != ((current_vote -> pos[vote_i][vote_j]>current_vote -> pos[i][j]?current_vote -> masks[i][j]:current_vote -> masks[vote_i][vote_j])&IS_NEGATIVE_STRAND);
+ int is_large_half_negative = !is_small_half_negative;
- int guess_end_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed)?
- (main_cover_start_as_reversed + 15): (minor_cover_start_as_reversed + 15);
+ int is_small_half_on_left_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed) + (current_vote -> pos[vote_i][vote_j]> current_vote -> pos[i][j]) !=1;
+ // small half on left(as reversed) === small half on right (as 'forward' form of the read, i.e., the raw FASTQ form for read_A and reversed FASTQ form for read_B)
- int is_left_half_negative = 0 != ((current_anchor->piece_main_abs_offset>current_vote -> pos[i][j]?current_vote -> masks[i][j]:current_anchors[kx1].piece_main_masks)&IS_NEGATIVE_STRAND);
- int is_right_half_negative = !is_left_half_negative;
+ unsigned int small_half_abs_offset = min(current_vote -> pos[i][j], current_vote -> pos[vote_i][vote_j]);
+ unsigned int large_half_abs_offset = max(current_vote -> pos[i][j], current_vote -> pos[vote_i][vote_j]);
- int is_left_on_left_as_reversed = (main_cover_start_as_reversed > minor_cover_start_as_reversed) + (current_anchor->piece_main_abs_offset > current_vote -> pos[i][j]) !=1;
+ // curr_read_text is the 'reversed' form of the read. I.e., the reversed FASTQ form for read_A and the raw FASTQ form for read_B.
+ replace_minor = donor_jumped_score(global_context, thread_context, small_half_abs_offset, large_half_abs_offset,
+ max(0, guess_start_as_reversed) , min( guess_end_as_reversed, curr_read_len), curr_read_text,
+ curr_read_len, is_small_half_negative, is_large_half_negative, is_small_half_on_left_as_reversed,
+ & final_split_point, & is_GT_AG_donors, & is_donor_found, &small_side_increasing_coordinate, &large_side_increasing_coordinate);
- unsigned int left_half_abs_offset = min(current_vote -> pos[i][j],current_anchor->piece_main_abs_offset);
- unsigned int right_half_abs_offset = max(current_vote -> pos[i][j],current_anchor->piece_main_abs_offset);
+ if( 0 && 1018082 == pair_number)
+ {
+ print_votes(current_vote, global_context -> config.index_prefix);
+ SUBREADprintf("JUMP_001018082 NORMAL=%d SMALL_NEG=%d LARGE_NEG=%d, SMALL_ABS=%u LARGE_ABS=%u, REPLACE=%d, INCS=%d %d\n" , is_small_half_on_left_as_reversed, is_small_half_negative, is_large_half_negative, small_half_abs_offset, large_half_abs_offset, replace_minor, small_side_increasing_coordinate, large_side_increasing_coordinate);
+ }
- donors_found_score = donor_jumped_score(global_context, thread_context, left_half_abs_offset, right_half_abs_offset , max(0, guess_start_as_reversed) , min( guess_end_as_reversed, curr_read_len), curr_read_text, curr_read_len, is_left_half_negative, is_right_half_negative, is_left_on_left_as_reversed , is_second_read, & final_split_point, & is_GT_AG_donors, & is_donor_found);
- //printf("JXK03 : FOUND=%d : %u - %u at %d\n", donors_found_score , left_half_abs_offset, right_half_abs_offset , final_split_point );
- }
- else
- {
- // no long overlap
- int overlapped ;
- if(current_anchor->piece_main_coverage_start > current_vote -> coverage_start[i][j])
- overlapped = current_vote -> coverage_end[i][j] - current_anchor->piece_main_coverage_start;
- else
- overlapped = current_anchor->piece_main_coverage_end - current_vote -> coverage_start[i][j];
+ // Now "final_split_point" is the read offset on the 'reversed' form of the read. It needs to be changed to (read_len - final_split_point) if the major half is on negative strand.
- //printf("PL=%u, PR=%u, OVLP=%d\n", current_anchor->piece_main_coverage_start, current_vote -> coverage_start[i][j], overlapped);
- if(overlapped > 14) continue;
- if(abs(dist)<6) continue;
-
- int guess_start = (current_anchor->piece_main_coverage_start > current_vote -> coverage_start[i][j])?
- (current_vote -> coverage_end[i][j] - 8): (current_anchor->piece_main_coverage_end - 8);
-
- int guess_end = (current_anchor->piece_main_coverage_start < current_vote -> coverage_start[i][j])?
- (current_vote -> coverage_start[i][j] + 8): (current_anchor->piece_main_coverage_start + 8);
-
- if(global_context -> config.do_fusion_detection && !(current_anchor->piece_main_masks & IS_NEGATIVE_STRAND))
- // if for fusion, the current read must have been reversed.
- // hence, it is now changed to "main half" view.
- reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
-
- int normally_arranged = 1!=(current_anchor->piece_main_coverage_start > current_vote -> coverage_start[i][j]) + (current_anchor->piece_main_abs_offset > current_vote -> pos[i][j]);
- if((! global_context -> config.do_fusion_detection )&& !normally_arranged ) continue;
- int left_indel_offset=0, right_indel_offset=0;
-
- int kx2;
- if(curr_read_len > EXON_LONG_READ_LENGTH){
- for(kx2=0; kx2<MAX_INDEL_SECTIONS; kx2++)
- {
- if(!current_vote -> indel_recorder[i][j][kx2*3]) break;
- minor_indel_offset += (current_vote -> indel_recorder[i][j][kx2*3+2]);
- }
- if(current_anchor->piece_main_abs_offset< current_vote -> pos[i][j])
- {
- left_indel_offset=current_anchor->piece_main_indels;
- right_indel_offset=minor_indel_offset;
- }
- else
- {
- right_indel_offset=current_anchor->piece_main_indels;
- left_indel_offset=minor_indel_offset;
+ if(replace_minor>0) replace_minor += current_vote -> votes[i][j] * 100000;
- }
+ }
+ else
+ {
+ int overlapped ;
+ if(current_vote -> coverage_start[vote_i][vote_j] > current_vote -> coverage_start[i][j])
+ overlapped = current_vote -> coverage_end[i][j] - current_vote -> coverage_start[vote_i][vote_j];
+ else
+ overlapped = current_vote -> coverage_end[vote_i][vote_j] - current_vote -> coverage_start[i][j];
- // the section having a smaller coordinate will have indel_offset !=0
- // the section having a larger coordiname MUST HAVE indel_offset == 0
- right_indel_offset=0;
- }
+ if(0 && FIXLENstrcmp("R000002444", read_name) == 0)
+ {
+ SUBREADprintf("OVL=%d, DIST=%llu\n", overlapped, abs(dist));
+ }
- donors_found_score = donor_score(global_context, thread_context, min(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]),max(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]), left_indel_offset, right_indel_offset, normally_arranged , max(0, guess_start) , min( guess_end, curr_read_len), curr_read_text, curr_read_len, is_second_read, & final_split_point, & is_GT_AG_donors, & is_donor_found, & inserted_bases);
- //printf("DONOR SCORE=%d AT %u,%u\n", donors_found_score, min(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]),max(current_anchor->piece_main_abs_offset, current_vote -> pos[i][j]));
+ if(overlapped > 14) continue;
+ if(abs(dist)<6) continue;
- if(global_context -> config.do_fusion_detection && !(current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND))
- // changed back.
- reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
+ int guess_start = (current_vote -> coverage_start[vote_i][vote_j] > current_vote -> coverage_start[i][j])?
+ (current_vote -> coverage_end[i][j] - 8): (current_vote -> coverage_end[vote_i][vote_j] - 8);
- }
+ int guess_end = (current_vote -> coverage_start[vote_i][vote_j] < current_vote -> coverage_start[i][j])?
+ (current_vote -> coverage_start[i][j] + 8): (current_vote -> coverage_start[vote_i][vote_j] + 8);
+ if(global_context -> config.do_fusion_detection && !(current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND))
+ // if for fusion, the current read must have been reversed.
+ // hence, it is now changed to "main half" view.
+ reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
- //printf("MINORV=%d\tDONOR_FOUND=%d\n", current_vote -> votes[i][j], donors_found_score);
+ int left_indel_offset=0, right_indel_offset=0;
+ int kx2;
+ int normally_arranged = 1!=(current_vote -> coverage_start[vote_i][vote_j] > current_vote -> coverage_start[i][j]) + (current_vote -> pos[vote_i][vote_j] > current_vote -> pos[i][j]);
- if(donors_found_score)
- {
- if(0&&global_context -> config.do_fusion_detection && (!(current_anchor->piece_main_masks & IS_NEGATIVE_STRAND)) && !is_strand_jumped)
- final_split_point = curr_read_len - final_split_point;
-
- current_anchor->piece_minor_abs_offset = current_vote -> pos[i][j];
- current_anchor->piece_minor_votes = current_vote -> votes[i][j];
- current_anchor->piece_minor_coverage_start = current_vote -> coverage_start[i][j];
- current_anchor->piece_minor_coverage_end = current_vote -> coverage_end[i][j];
- current_anchor->piece_minor_hamming_match = minor_hamming_match;
- current_anchor->piece_minor_read_quality = minor_read_quality;
- current_anchor->piece_minor_indel_offset = minor_indel_offset;
- current_anchor->intron_length = abs(dist);
- current_anchor->Score_H = new_score_H;
- current_anchor->Score_L = new_score_L;
- current_anchor->split_point = final_split_point;
- current_anchor->inserted_bases = inserted_bases;
- current_anchor->is_GT_AG_donors = is_GT_AG_donors;
- current_anchor->is_donor_found = is_donor_found;
- current_anchor->is_strand_jumped = is_strand_jumped ;
- max_score_H = new_score_H;
- max_score_L = new_score_L;
- }
- current_anchor -> is_break_even = is_second_read?is_anchor_2_breakeven:is_anchor_1_breakeven;
+ if(curr_read_len > EXON_LONG_READ_LENGTH){
+ int kx1;
+ gene_vote_number_t * indel_recorder = current_vote -> indel_recorder[vote_i][vote_j];
+ for(kx1=0; kx1<MAX_INDEL_SECTIONS; kx1++)
+ {
+ if(!indel_recorder[kx1*3]) break;
+ major_indels += indel_recorder[kx1*3+2];
}
- else
- if(new_score_H == max_score_H && new_score_L == max_score_L)
- current_anchor -> is_break_even = 1;
- }
- }
- if(current_anchors[kx1].is_strand_jumped)
- {
- // If "is_strand_jumped" is true, all coordinates so far are on the best voted strands (must be differnet strands, namely they're very likely to be overlapped).
- current_anchors[kx1].piece_minor_coverage_start = curr_read_len - current_anchors[kx1].piece_minor_coverage_end;
- current_anchors[kx1].piece_minor_coverage_end = curr_read_len - current_anchors[kx1].piece_minor_coverage_start;
- // Split_point is now the "negative strand read" view. It has to be changed to "main piece" view
- current_anchors[kx1].split_point = (current_anchors[kx1].piece_main_masks & IS_NEGATIVE_STRAND)?current_anchors[kx1].split_point:(curr_read_len-current_anchors[kx1].split_point);
- }
- }
- }
-
- int is_paired_end_selected = (global_context -> input_reads.is_paired_end_reads && is_result_in_PE( _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0) ));
- int best_read_id_r1 ;
- int best_read_id_r2 =0;
- for(best_read_id_r1=0; best_read_id_r1<global_context->config.multi_best_reads; best_read_id_r1++)
- if(_global_retrieve_alignment_ptr(global_context, pair_number, 0, best_read_id_r1)->selected_votes<1)break;
+ for(kx2=0; kx2<MAX_INDEL_SECTIONS; kx2++)
+ {
+ if(!current_vote -> indel_recorder[i][j][kx2*3]) break;
+ minor_indel_offset += (current_vote -> indel_recorder[i][j][kx2*3+2]);
+ }
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
+ if(current_vote -> pos[vote_i][vote_j] < current_vote -> pos[i][j])
+ {
+ left_indel_offset=major_indels;
+ right_indel_offset=minor_indel_offset;
+ }
+ else
+ {
+ right_indel_offset=major_indels;
+ left_indel_offset=minor_indel_offset;
- for(best_read_id_r2=0; best_read_id_r2<global_context->config.multi_best_reads; best_read_id_r2++)
- if(_global_retrieve_alignment_ptr(global_context, pair_number, 1, best_read_id_r2)->selected_votes<1)break;
-
- //if(pair_number == 119)
- // printf("RESULT(SCAN) _ 2 # %d\n", best_read_id_r2);
+ }
+ // the section having a smaller coordinate will have indel_offset !=0
+ // the section having a larger coordiname MUST HAVE indel_offset == 0
+ right_indel_offset=0;
+ }
-
- for(i=0; i<used_anchors_1; i++)
- for(j=0; j<used_anchors_2; j++)
- {
- long long int dist;
- //int all_votes = read_1_anchors[i].piece_main_votes + read_1_anchors[i].piece_minor_votes + read_2_anchors[j].piece_main_votes + read_2_anchors[j].piece_minor_votes;
- if(global_context -> config.do_fusion_detection)
- {
- unsigned int read1_tail_pos = read_1_anchors[i].piece_main_abs_offset;
- if(read_1_anchors[i].piece_minor_votes)
- read1_tail_pos = (read_1_anchors[i].piece_main_coverage_end > read_1_anchors[i].piece_minor_coverage_end)?
- read_1_anchors[i].piece_main_abs_offset: read_1_anchors[i].piece_minor_abs_offset ;
- unsigned int read2_head_pos = read_2_anchors[j].piece_main_abs_offset;
+ replace_minor = donor_score(global_context, thread_context, min(current_vote -> pos[vote_i][vote_j],
+ current_vote -> pos[i][j]),max(current_vote -> pos[vote_i][vote_j] ,
+ current_vote -> pos[i][j]), left_indel_offset, right_indel_offset, normally_arranged,
+ max(0, guess_start), min( guess_end, curr_read_len), curr_read_text, curr_read_len,
+ & final_split_point, & is_GT_AG_donors, & is_donor_found, & inserted_bases, &small_side_increasing_coordinate, &large_side_increasing_coordinate, read_name);
- if(read_2_anchors[j].piece_minor_votes)
- read2_head_pos = (read_2_anchors[j].piece_main_coverage_end < read_2_anchors[j].piece_minor_coverage_end)?
- read_2_anchors[j].piece_main_abs_offset: read_2_anchors[j].piece_minor_abs_offset ;
+ // Now "final_split_point" is the read offset on the 'reversed' form of the read (I.e., the reversed FASTQ form for read_A and the raw FASTQ form for read_B.) if do_fusion_detection AND if the main half is on negative strand.
+ // However, because the final_split_point is ALWAYS on the form where the major half can be mapped, final_split_point will never be changed.
- dist = read1_tail_pos;
- dist -= read2_head_pos;
+ if(replace_minor>0) replace_minor += current_vote -> votes[i][j] * 100000;
+ //SUBREADprintf("NOJUMP_DONORs=%d LOC=%u\n", replace_minor , current_vote -> pos[i][j]);
+ if(global_context -> config.do_fusion_detection && !(current_vote -> masks[vote_i][vote_j] & IS_NEGATIVE_STRAND))
+ // changed back.
+ reverse_read(curr_read_text, curr_read_len, global_context -> config.space_type);
}
- else
- {
- dist = read_1_anchors[i].piece_main_abs_offset;
- dist -= read_2_anchors[j].piece_main_abs_offset;
+ }
- if(read_1_anchors[i].piece_main_abs_offset > read_2_anchors[j].piece_main_abs_offset) dist += read_len_1;
- else if(read_1_anchors[i].piece_main_abs_offset < read_2_anchors[j].piece_main_abs_offset) dist -= read_len_2;
- else dist = abs(dist) + max(read_len_1, read_len_2);
+ if(0 && FIXLENstrcmp("R000002444", read_name) == 0)
+ {
+ char posout[100];
+ absoffset_to_posstr(global_context, current_vote -> pos[i][j], posout);
+ SUBREADprintf("TEST MINOR: POS=%s, REPLACE=%d\n", posout, replace_minor);
+ }
+ if(replace_minor){// && (replace_minor > current_piece_minor_score)){
+ current_piece_minor_score = replace_minor;
- }
+ junc_res -> minor_position = current_vote -> pos[i][j];
+ junc_res -> minor_votes = current_vote -> votes[i][j];
- // the two ends of a segment must conform to the order.
+ junc_res -> minor_coverage_start = current_vote -> coverage_start[i][j];
+ junc_res -> minor_coverage_end = current_vote -> coverage_end [i][j];
- unsigned long long int new_score_H = 0, new_score_L = 0;
+ junc_res -> double_indel_offset = (minor_indel_offset & 0xf)|((major_indels & 0xf)<<4);
+ junc_res -> split_point = final_split_point;
- int SUM_COVERAGE = read_1_anchors[i].piece_minor_coverage_end - read_1_anchors[i].piece_minor_coverage_start +
- read_2_anchors[j].piece_minor_coverage_end - read_2_anchors[j].piece_minor_coverage_start +
- read_1_anchors[i].piece_main_coverage_end - read_1_anchors[i].piece_main_coverage_start +
- read_2_anchors[j].piece_main_coverage_end - read_2_anchors[j].piece_main_coverage_start ;
+
+ if(0 && 1018082 == pair_number)
+ {
+ SUBREADprintf("REPLACED: LOC %u, INCS=%d %d\n", junc_res -> minor_position, small_side_increasing_coordinate, large_side_increasing_coordinate);
+ }
- int SUM_HAMMING = 0;
-
- if(global_context -> config.use_hamming_distance_break_ties)
- SUM_HAMMING = read_1_anchors[i].piece_main_hamming_match +
- read_1_anchors[i].piece_main_hamming_match +
- read_2_anchors[j].piece_minor_hamming_match +
- read_2_anchors[j].piece_minor_hamming_match ;
+ junc_res -> small_side_increasing_coordinate = small_side_increasing_coordinate;
+ junc_res -> large_side_increasing_coordinate = large_side_increasing_coordinate;
+ junc_res -> indel_at_junction = inserted_bases;
- int SUM_QUAL = 0;
-
- if(global_context -> config.use_quality_score_break_ties)
- SUM_QUAL = read_1_anchors[i].piece_main_read_quality +
- read_1_anchors[i].piece_main_read_quality +
- read_2_anchors[j].piece_minor_read_quality +
- read_2_anchors[j].piece_minor_read_quality ;
+ align_res -> result_flags &=~0x3;
+ if( (!is_donor_found) || is_GT_AG_donors > 2) align_res -> result_flags |= 3;
+ else align_res -> result_flags = is_GT_AG_donors? (align_res -> result_flags|CORE_IS_GT_AG_DONORS):(align_res -> result_flags &~CORE_IS_GT_AG_DONORS);
+
+ align_res -> result_flags = is_strand_jumpped? (align_res -> result_flags|CORE_IS_STRAND_JUMPED):(align_res -> result_flags &~CORE_IS_STRAND_JUMPED);
+ }
+ }
+ }
+
+ if(0 && memcmp("V0112_0155:7:1101:1173:2204", read_name, 26) == 0)
+ {
+ char leftpos[100], rightpos[100];
+ absoffset_to_posstr(global_context, current_vote -> pos[vote_i][vote_j] , leftpos);
+ absoffset_to_posstr(global_context, junc_res -> minor_position, rightpos);
+ SUBREADprintf("READ=%s, MAJOR=%s, MINOR=%s\n", read_name, leftpos, rightpos);
+ }
+
+
+ // This block runs after the minor half of this anchor is fully determined.
+ // If the minor half is a fusion and there is a strand jump, move the minor half coverage to the major half strand.
+ if(align_res -> result_flags & CORE_IS_STRAND_JUMPED)
+ {
+ // If "is_strand_jumped" is true, all coordinates so far are on the best voted strands (must be differnet strands, namely they're very likely to be overlapped).
+ int tmpv = junc_res -> minor_coverage_start;
+ junc_res -> minor_coverage_start = curr_read_len - junc_res -> minor_coverage_end;
+ junc_res -> minor_coverage_end = curr_read_len - tmpv;
+
+ // Split_point is now the "negative strand read" view. It has to be changed to "main piece" view
+ junc_res -> split_point = (align_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?
+ junc_res -> split_point :
+ (curr_read_len - junc_res -> split_point);
+ }
+ }
+}
+
+
+void simple_PE_and_same_chro(global_context_t * global_context , simple_mapping_t * r1, simple_mapping_t * r2 , int * is_PE_distance, int * is_same_chromosome , int rlen1, int rlen2){
+ test_PE_and_same_chro(global_context, r1 -> mapping_position, r2 -> mapping_position, is_PE_distance, is_same_chromosome, rlen1, rlen2);
+}
+
+int process_voting_junction_PE_topK(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads)
+{
+ vote_combination_t * comb_buffer = malloc(global_context -> config.max_vote_combinations * sizeof(vote_combination_t));
+ simple_mapping_t * vote_simple_1_buffer, * vote_simple_2_buffer;
+ vote_simple_1_buffer = malloc(global_context -> config.max_vote_simples * sizeof(simple_mapping_t));
+ vote_simple_2_buffer = malloc(global_context -> config.max_vote_simples * sizeof(simple_mapping_t));
+ memset(comb_buffer, 0 , sizeof(vote_combination_t) * global_context -> config.max_vote_combinations);
+
+ int is_second_read,i,j;
+ int third_highest_votes[2][9];
+ int is_fully_covered_1 = 0;
+ int is_fully_covered_2 = 0;
+
+ for(is_second_read = 0 ; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
+ {
+ gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
+ int *top_three_buff = third_highest_votes[is_second_read], i , j;
+ int * is_fully_covered = is_second_read?&is_fully_covered_2:&is_fully_covered_1;
+ int current_read_len = is_second_read?read_len_2:read_len_1;
- int SUM_OF_INTRONS = read_1_anchors[i].intron_length + read_2_anchors[j].intron_length;
+ memset(top_three_buff, 0 , global_context -> config.top_scores * sizeof(int));
+
+ if(global_context->config.do_fusion_detection){
+ *is_fully_covered = test_fully_covered(global_context , current_vote, current_read_len);
+ }
- int anchor_major_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_1_anchors[i].piece_main_votes :read_2_anchors[j].piece_main_votes;
- int anchor_minor_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_1_anchors[i].piece_minor_votes :read_2_anchors[j].piece_minor_votes;
- int second_major_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_2_anchors[j].piece_main_votes :read_1_anchors[i].piece_main_votes;
- int second_minor_votes = (read_1_anchors[i].piece_main_votes > read_2_anchors[j].piece_main_votes)? read_2_anchors[j].piece_minor_votes :read_1_anchors[i].piece_minor_votes;
- make_128bit_score(&new_score_H, &new_score_L,1, anchor_major_votes, anchor_minor_votes, second_major_votes, second_minor_votes, SUM_COVERAGE , SUM_HAMMING , SUM_QUAL, abs(dist) , global_context->config.report_multi_mapping_reads?SUM_OF_INTRONS:0);
+ for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
+ {
+ for (j=0; j< current_vote->items[i]; j++)
+ update_top_three(global_context, top_three_buff, current_vote -> votes[i][j]);
+ }
- alignment_result_t * alignment_1_best = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- alignment_result_t * alignment_2_best = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
+ //SUBREADprintf("3N [R %d] =%d,%d,%d\n", 1+is_second_read, top_three_buff[0], top_three_buff[1], top_three_buff[2]);
+ for(i = 0; i < global_context -> config.multi_best_reads; i++)
+ {
+ mapping_result_t * old_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, i);
+ if(old_result -> selected_votes>0)
+ {
+ update_top_three(global_context, top_three_buff, old_result -> selected_votes);
+ }
+ }
+ //SUBREADprintf("3Q [R %d] =%d,%d,%d\n", 1+is_second_read, top_three_buff[0], top_three_buff[1], top_three_buff[2]);
+ }
+
- //SUBREADprintf("PREV_POS_PE=%u at %d ;; %u at %d ; DIST=%lld\n", read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes, dist);
+ int simple_record_numbers[2], third_k;
+
+ for(is_second_read = 0 ; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
+ {
+ int current_simple_number = 0;
+ int current_read_len = is_second_read?read_len_2:read_len_1;
+ // populate the two simple read lists
+ for(third_k = 0 ; third_k < global_context -> config.top_scores; third_k ++)
+ {
+ if(current_simple_number >= global_context -> config.max_vote_simples)break;
+ int this_vote_N = third_highest_votes [is_second_read][third_k];
+ // only consider max_votes and max_votes - 1
+ if(this_vote_N<1 || (third_highest_votes[is_second_read][0] - this_vote_N > global_context -> config.max_vote_number_cutoff )) break;
- if(!global_context -> config.do_fusion_detection)
+ simple_mapping_t * current_simple = is_second_read ? vote_simple_2_buffer: vote_simple_1_buffer;
+ gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
+ for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
+ {
+ if(current_simple_number >= global_context -> config.max_vote_simples)break;
+ for (j=0; j< current_vote->items[i]; j++)
+ {
+ if(current_simple_number >= global_context -> config.max_vote_simples)break;
+ if(third_k == 0 && current_vote->votes[i][j] >= third_highest_votes [is_second_read][global_context -> config.top_scores - 1])
{
- // a junction read
- if(global_context->config.is_rna_seq_reads && (read_1_anchors[i].piece_minor_votes || read_2_anchors[j].piece_minor_votes))
+
+ if(0 && memcmp("V0112_0155:7:1101:2293:2015", read_name_1, 26) == 0)
{
- if((dist < 0 && is_negative_strand) || (dist > 0 && !is_negative_strand))
- continue;
- if(abs(dist) > global_context->config.maximum_pair_distance + 100000)
- continue;
+ char posout[100];
+ absoffset_to_posstr(global_context, current_vote -> pos[i][j], posout);
+
+ SUBREADprintf("[%s] INSERT BIG_MARGIN AT %s: COV=%d ~ %d ; V = %d\n", read_name_1, posout, current_vote -> coverage_start[i][j], current_vote -> coverage_end[i][j] , current_vote -> votes[i][j]);
}
- else
- {
- // an exonic read
-
- if(global_context -> config.is_first_read_reversed && !global_context -> config.is_second_read_reversed)
- // if "--rf" : second read must on the positive strand
- if(is_negative_strand) continue;
- if(global_context->config.restrected_read_order)
- {
- if(read_1_anchors[i].piece_main_abs_offset != read_2_anchors[j].piece_main_abs_offset)
- {
- if((dist < 0 && is_negative_strand) || (dist > 0 && !is_negative_strand))
- continue;
- }
- }
- else
- {
-
- if((dist < 0 && is_negative_strand) || (dist > 0 && !is_negative_strand))
- {
- if(abs(dist) > abs(global_context->config.minimum_pair_distance ))
- continue;
- }
- }
+ insert_big_margin_record(global_context , _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read), current_vote -> votes[i][j], current_vote -> coverage_start[i][j], current_vote -> coverage_end[i][j] , current_read_len, (current_vote -> masks[i][j] & IS_NEGATIVE_STRAND)?1:0);
- if(abs(dist) > global_context->config.maximum_pair_distance || abs(dist) < global_context->config.minimum_pair_distance)
- continue;
-
- }
}
-
- //SUBREADprintf("TEST_POS_VOTE , PE=%u at %d ;; %u at %d ; RES=%u\n", read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes, pair_number);
- //SUBREADprintf("HL SCORES: NEW=%016llX %016llX OLD=%016llX %016llX\n", new_score_H, new_score_L, alignment_1_best-> Score_H , alignment_1_best-> Score_L);
- if(new_score_H > alignment_1_best -> Score_H || (new_score_H == alignment_1_best-> Score_H && new_score_L >= alignment_1_best-> Score_L))
+ if(current_vote->votes[i][j] == this_vote_N && current_vote->votes[i][j] >= global_context->config.minimum_subread_for_second_read)
{
- if(new_score_H > alignment_1_best-> Score_H || new_score_L > alignment_1_best-> Score_L)
- {
- best_read_id_r1 = 0;
- best_read_id_r2 = 0;
+ current_simple[current_simple_number].is_vote_t_item = 1;
+ current_simple[current_simple_number].item_index_i = i;
+ current_simple[current_simple_number].item_index_j = j;
+ current_simple[current_simple_number].mapping_position = current_vote -> pos[i][j];
+ current_simple[current_simple_number].major_half_votes = current_vote -> votes[i][j];
+ current_simple_number ++;
+
+ }
+ }
+ }
- alignment_1_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- alignment_2_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- }
- else
- {
- //printf("SET_BE: %d ; S=%16llx+%16llX\n", pair_number, new_score_H, new_score_L);
- if(alignment_1_best -> selected_position != read_1_anchors[i].piece_main_abs_offset &&
- alignment_2_best -> selected_position != read_2_anchors[i].piece_main_abs_offset )
- {
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- alignment_2_best -> result_flags |= CORE_IS_BREAKEVEN;
- }
- }
+ for(i = 0; i < global_context -> config.multi_best_reads; i++)
+ {
+ mapping_result_t * old_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, i);
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:2293:2015", read_name_1)==0)
+ SUBREADprintf("OLD_VOTE_N[%d]; VOTE = %d ( %d == X ) ; SIMP_NO %d > %d POS=%u\n", i, old_result -> selected_votes, this_vote_N, current_simple_number, global_context -> config.max_vote_simples, old_result -> selected_position);
+ if(current_simple_number >= global_context -> config.max_vote_simples)break;
+ if(old_result -> selected_votes == this_vote_N)
+ {
+ current_simple[current_simple_number].is_vote_t_item = 0;
+ current_simple[current_simple_number].item_index_i = i;
+ current_simple[current_simple_number].mapping_position = old_result -> selected_position;
+ current_simple[current_simple_number].major_half_votes = old_result -> selected_votes;
+
+ current_simple_number ++;
+ }
+ }
+
+ if(0 && strcmp(read_name_1, "V0112_0155:7:1101:2293:2015")==0)
+ SUBREADprintf("Read %d : Anchors = %d\n", is_second_read + 1, current_simple_number);
+ }
+ simple_record_numbers[is_second_read] = current_simple_number;
+ }
- int r1_used_subreads = max(v1_all_subreads, alignment_1_best->used_subreads_in_vote );
- int r2_used_subreads = max(v2_all_subreads, alignment_2_best->used_subreads_in_vote );
+ int used_comb_buffer = 0;
+ //calculate all combinations
+ if(global_context -> input_reads.is_paired_end_reads){
+ for(i = 0; i < simple_record_numbers[0]; i++){
+ for(j = 0; j < simple_record_numbers[1]; j++){
+ int target_index;
+ int is_PE_distance = 0, is_same_chromosome = 0;
- //if(pair_number==25)
- //#warning "COMMENT THIS PRINTF "
- //SUBREADprintf("BEST_POS_PE=%u at %d ;; %u at %d ; RES=%u\n", read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes, pair_number);
+ if(0 && FIXLENstrcmp("R006633992", read_name_1)==0)
+ SUBREADprintf("TOPK #%d-%d : %d, %d < %d, PE=%d %u ~ %u\n", i,j, vote_simple_1_buffer[i].major_half_votes, vote_simple_2_buffer[j].major_half_votes, global_context->config.minimum_subread_for_first_read, is_PE_distance, ( vote_simple_1_buffer+i )->mapping_position , (vote_simple_2_buffer+j) ->mapping_position);
- set_alignment_result(global_context, pair_number, 0, best_read_id_r1, read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_1_anchors[i].piece_main_indel_record, read_1_anchors[i].piece_main_coverage_start, read_1_anchors[i].piece_main_coverage_end, 0!=(read_1_anchors[i].piece_main_masks & IS_NEGATIVE_STRAND), read_1_anchors[i].piece_minor_abs_offset, read_1_anchors[i].piece_minor_votes, read_1_anchors[i].piece_minor_coverage_start, read_1_anchors[i].pi [...]
- set_alignment_result(global_context, pair_number, 1, best_read_id_r2, read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes , read_2_anchors[j].piece_main_indel_record, read_2_anchors[j].piece_main_coverage_start, read_2_anchors[j].piece_main_coverage_end, 0!=(read_2_anchors[j].piece_main_masks & IS_NEGATIVE_STRAND), read_2_anchors[j].piece_minor_abs_offset, read_2_anchors[j].piece_minor_votes, read_2_anchors[j].piece_minor_coverage_start, read_2_anchors[j].pi [...]
+ if(max(vote_simple_1_buffer[i].major_half_votes, vote_simple_2_buffer[j].major_half_votes) < global_context->config.minimum_subread_for_first_read)continue;
+
+ simple_PE_and_same_chro(global_context , vote_simple_1_buffer+i, vote_simple_2_buffer+j , &is_PE_distance, &is_same_chromosome , read_len_1, read_len_2);
+ if((!is_PE_distance) && min(vote_simple_1_buffer[i].major_half_votes, vote_simple_2_buffer[j].major_half_votes) < global_context->config.minimum_subread_for_first_read)continue;
+
+ //#warning " ============== USE THE FIRST WEIGHT FORMULA IN RELEASE ================ "
+ //#warning " ============== USE THE SECOND WEIGHT FORMULA FOR SVs GRANT APP ======== "
+ int adjusted_weight = is_PE_distance?1300:(is_same_chromosome?1000:800);
+ if(global_context -> config.PE_predominant_weight) adjusted_weight = is_PE_distance?13000:(is_same_chromosome?100:80);
+ //int adjusted_weight = is_PE_distance?1600:(is_same_chromosome?1000:500);
+ int adjusted_votes = (vote_simple_1_buffer[i].major_half_votes + vote_simple_2_buffer[j].major_half_votes) * adjusted_weight;
+
+ for(target_index=0; target_index<used_comb_buffer; target_index++){
+ if(comb_buffer[target_index].score_adj < adjusted_votes) break;
+ }
- alignment_1_best -> Score_H = new_score_H;
- alignment_1_best -> Score_L = new_score_L;
- alignment_2_best -> Score_H = new_score_H;
- alignment_2_best -> Score_L = new_score_L;
-
- is_paired_end_selected = 1;
+ if(target_index < global_context -> config.max_vote_combinations){
+ int move_i;
+ for(move_i = min(used_comb_buffer, global_context -> config.max_vote_combinations - 1) ; move_i > target_index ; move_i --)
+ memcpy(comb_buffer + move_i, comb_buffer + move_i - 1 , sizeof(vote_combination_t) );
- assert(best_read_id_r1==best_read_id_r2);
+ comb_buffer[target_index].r1_loc = vote_simple_1_buffer+i;
+ comb_buffer[target_index].r2_loc = vote_simple_2_buffer+j;
+ comb_buffer[target_index].score_adj = adjusted_votes;
- best_read_id_r1 += 1;
- best_read_id_r2 += 1;
+ if(used_comb_buffer < global_context -> config.max_vote_combinations)
+ used_comb_buffer ++;
- int set0_x1;
- for(set0_x1 = best_read_id_r1;set0_x1 < global_context->config.multi_best_reads; set0_x1++)
- {
- set_zero_votes(global_context, pair_number,0 , set0_x1);
- set_zero_votes(global_context, pair_number,1 , set0_x1);
- }
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:19612:13380", read_name_1)==0)
+ SUBREADprintf("Vadj [%d][%d] = %d (raw = %d + %d), PE=%d, Target=%d/%d\n", i,j , adjusted_votes, vote_simple_1_buffer[i].major_half_votes, vote_simple_2_buffer[j].major_half_votes, is_PE_distance, target_index, used_comb_buffer);
- }
}
+
+ }
}
+ }
- if(!is_paired_end_selected)
- {
- alignment_result_t * alignment_1_best = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- for(i=0; i<used_anchors_1; i++)
- {
- if((read_1_anchors[i].Score_H > alignment_1_best -> Score_H) || (read_1_anchors[i].Score_H == alignment_1_best -> Score_H && read_1_anchors[i].Score_L >= alignment_1_best -> Score_L))
- {
- //if(global_context->input_reads.is_paired_end_reads)
- // best_read_id_r1 = 0;
- //else
- {
- if(read_1_anchors[i].Score_H > alignment_1_best -> Score_H || read_1_anchors[i].Score_L > alignment_1_best -> Score_L )
- {
- best_read_id_r1 = 0;
+ mapping_result_t * alignment_tmp_r1, * alignment_tmp_r2;
+ alignment_tmp_r1 = malloc(sizeof(mapping_result_t) * global_context->config.multi_best_reads);
+ alignment_tmp_r2 = malloc(sizeof(mapping_result_t) * global_context->config.multi_best_reads);
- if(read_1_anchors[i].is_break_even)
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- else
- alignment_1_best -> result_flags &= ~CORE_IS_BREAKEVEN;
- }
- else
- {
- if(read_1_anchors[i].piece_main_abs_offset > _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0)->selected_position && global_context->config.multi_best_reads == 1)
- best_read_id_r1 = 0;
+ subjunc_result_t * junction_tmp_r2 , * junction_tmp_r1;
+ junction_tmp_r1 = malloc(sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
+ junction_tmp_r2 = malloc(sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
- alignment_1_best -> result_flags |= CORE_IS_BREAKEVEN;
- }
- }
+ memset(junction_tmp_r1, 0, sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
+ memset(junction_tmp_r2, 0, sizeof(subjunc_result_t) * global_context->config.multi_best_reads);
- if(best_read_id_r1<global_context->config.multi_best_reads)
- {
- alignment_1_best -> Score_H = read_1_anchors[i].Score_H;
- alignment_1_best -> Score_L = read_1_anchors[i].Score_L;
+ memset(alignment_tmp_r1, 0, sizeof(mapping_result_t) * global_context->config.multi_best_reads);
+ memset(alignment_tmp_r2, 0, sizeof(mapping_result_t) * global_context->config.multi_best_reads);
+
+ int alignment_res_r1_cursor = 0, alignment_res_r2_cursor = 0;
- alignment_result_t * r1_result = _global_retrieve_alignment_ptr(global_context, pair_number, 0, best_read_id_r1);
- int r1_used_subreads = max(v1_all_subreads, r1_result->used_subreads_in_vote );
- set_alignment_result(global_context, pair_number, 0, best_read_id_r1, read_1_anchors[i].piece_main_abs_offset, read_1_anchors[i].piece_main_votes , read_1_anchors[i].piece_main_indel_record, read_1_anchors[i].piece_main_coverage_start, read_1_anchors[i].piece_main_coverage_end, 0!=(read_1_anchors[i].piece_main_masks & IS_NEGATIVE_STRAND), read_1_anchors[i].piece_minor_abs_offset, read_1_anchors[i].piece_minor_votes, read_1_anchors[i].piece_minor_coverage_start, read_1_anchors[i].pi [...]
+ if(used_comb_buffer > 0){
+ //sort the comb buffers.
- best_read_id_r1 += 1;
+ //quick_sort(comb_buffer, used_comb_buffer, comb_sort_compare, comb_sort_exchange);
+ merge_sort(comb_buffer, used_comb_buffer, comb_sort_compare, comb_sort_exchange, comb_sort_merge);
- int set0_x1;
- for(set0_x1 = best_read_id_r1;set0_x1 < global_context->config.multi_best_reads; set0_x1++)
- set_zero_votes(global_context, pair_number,0 , set0_x1);
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:19612:13380", read_name_1)==0)
+ for(i = 0; i < used_comb_buffer; i++)
+ {
+ SUBREADprintf("C[%d], SCORE = %llu ; VOTES = %d + %d\n", i, comb_buffer[i].score_adj, comb_buffer[i].r1_loc -> major_half_votes, comb_buffer[i].r2_loc -> major_half_votes);
+ }
+ for(is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read++){
+ int current_read_len = is_second_read ? read_len_2:read_len_1;
+ char * current_read_text = is_second_read ? read_text_2:read_text_1;
+ int current_all_subreads = is_second_read ? v2_all_subreads:v1_all_subreads;
+ mapping_result_t * current_alignment_tmp = is_second_read?alignment_tmp_r2:alignment_tmp_r1;
+ int * current_r_cursor = is_second_read ? &alignment_res_r2_cursor:&alignment_res_r1_cursor;
+ int * is_fully_covered = is_second_read?&is_fully_covered_2:&is_fully_covered_1;
+ gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
- //if(pair_number == 119){
- //#warning " ======== COMMENT THIS LINE !! ========"
- // printf("RESULT _ 1 # %d : %llu, %u\n", best_read_id_r1-1, alignment_1_best -> Score_H, alignment_1_best -> Score_L);
- // print_votes(vote_1, global_context -> config.index_prefix);
- //}
+ subjunc_result_t * current_junction_tmp = NULL;
+ if(global_context -> config.do_breakpoint_detection) current_junction_tmp = is_second_read?junction_tmp_r2:junction_tmp_r1;
+
+ for(i = used_comb_buffer - 1; i >=0; i--){
+ if((* current_r_cursor) >= global_context->config.multi_best_reads)break;
+
+ // add the combination of comb_buffer[i] into the two mapping_result_t arrays
+ simple_mapping_t * current_loc = is_second_read?comb_buffer[i].r2_loc:comb_buffer[i].r1_loc;
+ assert(current_loc);
+ unsigned int current_pos = current_loc->mapping_position;
+
+ int is_exist = 0;
+ for(j = 0; j < *current_r_cursor; j++)
+ {
+ if(current_alignment_tmp[j].selected_position == current_pos){
+ is_exist = 1;
+ break;
+ }
+ }
+
+ if(0 && memcmp("HWI-ST212:219:C0C1TACXX:1:1107:20025:113054", read_name_1, 41)==0){
+ SUBREADprintf("%s %s : Read_%d ; BEST=%d / %d, %u\n", is_exist?" ":"NEW", read_name_1 , is_second_read + 1 , *current_r_cursor , global_context->config.multi_best_reads, current_loc->mapping_position);
+ }
+
+ if(!is_exist){
+ //SUBREADprintf("%u\tC_i=%d, C_j=%d, IS_VOTE=%d, Vadj=%llu\n", pair_number, current_loc -> item_index_i, current_loc -> item_index_j, current_loc -> is_vote_t_item, comb_buffer[i].score_adj);
+ if(current_loc -> is_vote_t_item)
+ copy_vote_to_alignment_res(global_context, thread_context, current_alignment_tmp + (*current_r_cursor), current_junction_tmp ? current_junction_tmp + (*current_r_cursor) : NULL, current_vote, current_loc -> item_index_i, current_loc -> item_index_j, current_read_len, read_name_1, current_read_text, current_all_subreads , current_vote -> noninformative_subreads, pair_number, is_second_read, is_fully_covered);
+ else{
+ memcpy(current_alignment_tmp + (*current_r_cursor), _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, current_loc -> item_index_i), sizeof(mapping_result_t));
+ if(current_junction_tmp)
+ memcpy(current_junction_tmp + (*current_r_cursor), _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, current_loc -> item_index_i), sizeof(subjunc_result_t));
}
+ (*current_r_cursor)++;
}
}
+ }
+ }else{// if the one end is not mapped at all
+
+ if(0 == simple_record_numbers[0])
+ _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0) -> noninformative_subreads_in_vote = vote_1 -> noninformative_subreads;
+ if(global_context -> input_reads.is_paired_end_reads && 0 == simple_record_numbers[1])
+ _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0) -> noninformative_subreads_in_vote = vote_2 -> noninformative_subreads;
+ if(simple_record_numbers[0]>0 || simple_record_numbers[1]>0)
+ {
+ // copy all the simple into the mapping_result_t
- if(global_context -> input_reads.is_paired_end_reads)
+ for(is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read++)
{
+ int * current_r_cursor = is_second_read ? &alignment_res_r2_cursor:&alignment_res_r1_cursor;
- alignment_result_t * alignment_2_best = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- for(j=0; j<used_anchors_2; j++)
- {
- if(read_2_anchors[j].Score_H > alignment_2_best -> Score_H || (read_2_anchors[j].Score_H == alignment_2_best -> Score_H && read_2_anchors[j].Score_L >= alignment_2_best -> Score_L))
- {
- if(read_2_anchors[j].Score_H > alignment_2_best -> Score_H || read_2_anchors[j].Score_L > alignment_2_best -> Score_L)
- best_read_id_r2 = 0;
+ int current_read_len = is_second_read ? read_len_2:read_len_1;
+ char * current_read_text = is_second_read ? read_text_2:read_text_1;
+ int current_all_subreads = is_second_read ? v2_all_subreads:v1_all_subreads;
+ mapping_result_t * current_alignment_tmp = is_second_read?alignment_tmp_r2:alignment_tmp_r1;
+ gene_vote_t * current_vote = is_second_read?vote_2:vote_1;
+ int * is_fully_covered = is_second_read?&is_fully_covered_2:&is_fully_covered_1;
- if(read_2_anchors[j].Score_H == alignment_2_best -> Score_H && read_2_anchors[j].Score_L == alignment_2_best -> Score_L)
- {
- if(alignment_2_best -> selected_position != read_2_anchors[i].piece_main_abs_offset)
- alignment_2_best -> result_flags |= CORE_IS_BREAKEVEN;
+ subjunc_result_t * current_junction_tmp = NULL;
+ if(global_context -> config.do_breakpoint_detection) current_junction_tmp = is_second_read?junction_tmp_r2:junction_tmp_r1;
+
+ for(i = 0; i < simple_record_numbers[is_second_read]; i++){
+
+ if((*current_r_cursor) >= global_context->config.multi_best_reads)break;
+
+ simple_mapping_t * current_loc = is_second_read?vote_simple_2_buffer+i:vote_simple_1_buffer+i;
+
+ if(current_loc -> major_half_votes < global_context->config.minimum_subread_for_first_read) continue;
+ unsigned int current_pos = current_loc->mapping_position;
+
+ int is_exist = 0;
+ for(j = 0; j < *current_r_cursor; j++)
+ {
+ if(current_alignment_tmp[j].selected_position == current_pos){
+ is_exist = 1;
+ break;
}
- else
- {
- if(read_2_anchors[j].is_break_even)
- alignment_2_best -> result_flags |= CORE_IS_BREAKEVEN;
- else
- alignment_2_best -> result_flags &= ~CORE_IS_BREAKEVEN;
+ }
+ if(!is_exist){
+ if(current_loc -> is_vote_t_item)
+ copy_vote_to_alignment_res(global_context, thread_context, current_alignment_tmp + (*current_r_cursor), current_junction_tmp ? current_junction_tmp + (*current_r_cursor): NULL, current_vote, current_loc -> item_index_i, current_loc -> item_index_j, current_read_len, read_name_1, current_read_text, current_all_subreads , current_vote -> noninformative_subreads, pair_number, is_second_read, is_fully_covered);
+ else{
+ memcpy(current_alignment_tmp + (*current_r_cursor), _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, current_loc -> item_index_i), sizeof(mapping_result_t));
+ if(current_junction_tmp)
+ memcpy(current_junction_tmp + (*current_r_cursor), _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, current_loc -> item_index_i), sizeof(subjunc_result_t));
}
- if(best_read_id_r2<global_context->config.multi_best_reads)
+ if(0)
{
- alignment_2_best -> Score_H = read_2_anchors[j].Score_H;
- alignment_2_best -> Score_L = read_2_anchors[j].Score_L;
- is_paired_end_selected = 0;
-
- // TODO: add result at best_read_id_r2
- alignment_result_t * r2_result = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- int r2_used_subreads = max(v2_all_subreads, r2_result->used_subreads_in_vote );
- set_alignment_result(global_context, pair_number, 1, best_read_id_r2, read_2_anchors[j].piece_main_abs_offset, read_2_anchors[j].piece_main_votes , read_2_anchors[j].piece_main_indel_record, read_2_anchors[j].piece_main_coverage_start, read_2_anchors[j].piece_main_coverage_end, 0!=(read_2_anchors[j].piece_main_masks & IS_NEGATIVE_STRAND), read_2_anchors[j].piece_minor_abs_offset, read_2_anchors[j].piece_minor_votes, read_2_anchors[j].piece_minor_coverage_start, read_2_anchors[j].p [...]
- best_read_id_r2 ++;
-
- int set0_x1;
- for(set0_x1 = best_read_id_r2;set0_x1 < global_context->config.multi_best_reads; set0_x1++)
- set_zero_votes(global_context, pair_number,1 , set0_x1);
-
- //if(pair_number == 119)
- //{
- // printf("RESULT _ 2 # %d : %llu, %u\n", best_read_id_r2-1, alignment_2_best -> Score_H, alignment_2_best -> Score_L);
- //}
+ char posout[100];
+ absoffset_to_posstr(global_context, current_alignment_tmp[*current_r_cursor] . selected_position, posout);
+ SUBREADprintf("The %d-th %s is at %s; vote=%d, minor=%d\n", *current_r_cursor, read_name_1, posout, current_alignment_tmp[*current_r_cursor].selected_votes, current_junction_tmp[*current_r_cursor].minor_votes);
}
+ (*current_r_cursor)++;
}
}
}
-
}
-
}
- alignment_result_t * tmp_result = _global_retrieve_alignment_ptr(global_context, pair_number, 0, 0);
- if(tmp_result->selected_votes <1)
+ //SUBREADprintf("TOPK : CANDIDATES = %d , %d\n", alignment_res_r1_cursor, alignment_res_r2_cursor);
+
+ for(is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read++)
{
- tmp_result -> used_subreads_in_vote = max(v1_all_subreads, tmp_result -> used_subreads_in_vote );
- tmp_result -> noninformative_subreads_in_vote = max(vote_1 -> noninformative_subreads, tmp_result -> noninformative_subreads_in_vote);
+ int * current_r_cursor = is_second_read ? &alignment_res_r2_cursor:&alignment_res_r1_cursor;
+ mapping_result_t * current_alignment_tmp = is_second_read?alignment_tmp_r2:alignment_tmp_r1;
+ subjunc_result_t * current_junction_tmp = NULL;
+
+ if(global_context -> config.do_breakpoint_detection) current_junction_tmp = is_second_read?junction_tmp_r2:junction_tmp_r1;
+
+ for(i = 0; i < global_context->config.multi_best_reads ; i++){
+ mapping_result_t * cur_res = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, i);
+ if( i < (*current_r_cursor))
+ {
+ memcpy(cur_res, current_alignment_tmp + i, sizeof(mapping_result_t));
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:19612:13380", read_name_1)==0)
+ SUBREADprintf("COPIED READ_%d\t\t%llu [%d] , V=%d, MASK=%d, POS=%u, PTR=%p\n", is_second_read + 1, pair_number, *current_r_cursor, cur_res -> selected_votes, cur_res -> result_flags, current_alignment_tmp[i].selected_position, cur_res);
+ }
+ else cur_res -> selected_votes = 0;
+
+ if(global_context -> config.do_breakpoint_detection) {
+ subjunc_result_t * cur_junc = _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, i);
+ if(i < (*current_r_cursor))
+ {
+ memcpy(cur_junc, current_junction_tmp + i , sizeof(subjunc_result_t));
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:19612:13380", read_name_1)==0)
+ SUBREADprintf("COPIED SUBJUNC: MINOR=%u, MINORVOTES=%d\n", (current_junction_tmp + i) -> minor_position, (current_junction_tmp + i) -> minor_votes);
+ }
+ else cur_junc -> minor_votes = 0;
+
+ }
+ }
}
+ free(junction_tmp_r1);
+ free(junction_tmp_r2);
+ free(alignment_tmp_r1);
+ free(alignment_tmp_r2);
+ free(comb_buffer);
+ free(vote_simple_1_buffer);
+ free(vote_simple_2_buffer);
+
+ return 0;
+}
+
+
+// seq1 and seq2 must be on the same strand!
+// (seq2 is reversed)
+// The second half of seq1 MUST BE the same as the first half of seq2 if the two reads have an overlapping part.
+int is_gapped_as_funky(global_context_t * global_context, char * rname1, char * chr1, unsigned int pos1, int rlen1, int is_1_negative, char * cigar1, char * seq1, char * rname2, char * chr2, unsigned int pos2, int rlen2, int is_2_negative, char * cigar2, char * seq2, int tlen_removed_intron)
+{
+/*
+ if(tlen_removed_intron >= rlen1 + rlen2) return 1; // may be gapped.
+ int try_overlapping;
+
+ int best_matched_bases = 0;
+ int best_overlapping_len = -1;
- tmp_result = _global_retrieve_alignment_ptr(global_context, pair_number, 1, 0);
- if(tmp_result->selected_votes <1 && global_context -> input_reads.is_paired_end_reads)
+ int assumed_overlapping = rlen1+rlen2-tlen_removed_intron;
+ for(try_overlapping = 0; try_overlapping < min(rlen1, rlen2); try_overlapping++)
{
+ int r1_start = rlen1 - try_overlapping;
+ int r2_end = try_overlapping;
+ int xk1;
+ int all_matched = 0, all_mismatched = 0;
+ for(xk1 = 0; xk1 < r2_end; xk1++){
+ char r1ch = seq1[r1_start + xk1];
+ char r2ch = seq2[xk1];
+ if(r1ch==r2ch) all_matched++;
+ else all_mismatched++;
+ }
- tmp_result->used_subreads_in_vote = max(v2_all_subreads, tmp_result->used_subreads_in_vote );
- tmp_result->noninformative_subreads_in_vote = max(vote_2 -> noninformative_subreads, tmp_result->noninformative_subreads_in_vote);
+ if(all_mismatched <= 1 && try_overlapping == assumed_overlapping){
+ // the assumed overlapping length is good enough.
+ return 0;
+ }
+ if(all_mismatched <= 1 && all_matched > best_matched_bases){
+ best_overlapping_len = try_overlapping;
+ best_matched_bases = all_matched;
+ }
}
- return 0;
+
+ if(best_overlapping_len <= 0)return 0;
+ return assumed_overlapping
+*/
+ return tlen_removed_intron > 600;
+}
+
+// the positions are not offset by adding the first soft clipping length. I.e., pos1 and pos2 may be smaller than those in the SAM files.
+// seq1 and seq2 must be on the same strand!
+// (seq2 is reversed)
+int is_funky_fragment(global_context_t * global_context, char * rname1, char * chr1, unsigned int pos1, int rlen1, int is_1_negative, char * cigar1, char * seq1, char * rname2, char * chr2, unsigned int pos2, int rlen2, int is_2_negative, char * cigar2, char * seq2, int tlen_removed_intron)
+{
+ long long llraw_tlen = pos1;
+ llraw_tlen -= pos2;
+ if(llraw_tlen <0)
+ llraw_tlen = -llraw_tlen;
+ unsigned int raw_tlen = llraw_tlen;
+ raw_tlen += max(rlen2, rlen1);
+
+ //SUBREADprintf("CHRS=%p,%p, POS=%u,%u, RTLEN=%u\n", chr1, chr2, pos1, pos2, raw_tlen);
+
+ if(chr1 != chr2) raw_tlen = 0;
+
+ // note: the two pointers can be compared because they should be derived from the offset table.
+ // Each chromosome name should have one and only one distinct char * pointer.
+ if(chr1 == chr2 && raw_tlen <= global_context -> config.maximum_translocation_length && is_2_negative == is_1_negative)
+ {
+ if(is_gapped_as_funky(global_context, rname1, chr1, pos1, rlen1, is_1_negative, cigar1, seq1, rname2, chr2, pos2, rlen2, is_2_negative, cigar2, seq2, tlen_removed_intron))
+ return FUNKY_FRAGMENT_A;
+ else return NOT_FUNKY;
+ }
+ else if( chr1 == chr2 && raw_tlen <= global_context -> config.maximum_translocation_length && is_2_negative != is_1_negative )
+ return FUNKY_FRAGMENT_DE;
+ else if( chr1 != chr2 || raw_tlen > global_context -> config.maximum_translocation_length)
+ return FUNKY_FRAGMENT_BC;
+
+ return NOT_FUNKY;
}
-int explain_read(global_context_t * global_context, thread_context_t * thread_context, int pair_number, int read_len, char * read_name , char *read_text, char *qual_text, int is_second_read, int best_read_id, int is_negative_strand)
+int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads){
+ //if(global_context -> input_reads.is_paired_end_reads || global_context -> config.do_breakpoint_detection)
+ return process_voting_junction_PE_topK(global_context, thread_context, pair_number, vote_1, vote_2, read_name_1, read_name_2, read_text_1, read_text_2, read_len_1, read_len_2, is_negative_strand, v1_all_subreads, v2_all_subreads);
+ //else
+ // return process_voting_junction_SE(global_context, thread_context, pair_number, vote_1, read_name_1, read_text_1, read_len_1, is_negative_strand, v1_all_subreads);
+
+}
+
+
+unsigned int explain_read(global_context_t * global_context, thread_context_t * thread_context, realignment_result_t * final_realignments, subread_read_number_t pair_number, int read_len, char * read_name , char *read_text, char *qual_text, int is_second_read, int best_read_id, int is_negative_strand)
{
explain_context_t explain_context;
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
+ mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
if(global_context -> config.do_big_margin_filtering_for_reads)
{
@@ -1344,6 +1737,7 @@ int explain_read(global_context_t * global_context, thread_context_t * thread_co
memset(&explain_context,0, sizeof(explain_context_t));
explain_context.full_read_len = read_len;
+ explain_context.is_fully_covered = current_result -> is_fully_covered ;
explain_context.full_read_text = read_text;
explain_context.full_qual_text = qual_text;
explain_context.read_name = read_name;
@@ -1362,6 +1756,7 @@ int explain_read(global_context_t * global_context, thread_context_t * thread_co
explain_context.tmp_search_junctions[0].read_pos_end = back_search_read_tail;
explain_context.tmp_search_junctions[0].abs_offset_for_start = back_search_tail_position;
+ explain_context.all_back_alignments = 0;
explain_context.tmp_search_sections = 0;
explain_context.best_matching_bases = -9999;
explain_context.second_best_matching_bases = -9999;
@@ -1375,12 +1770,33 @@ int explain_read(global_context_t * global_context, thread_context_t * thread_co
explain_context.tmp_min_unsupport = 999999;
explain_context.tmp_is_pure_donor_found_explain = 1;
explain_context.best_is_pure_donor_found_explain = 0;
-
- search_events_to_back(global_context, thread_context, &explain_context, read_text , qual_text, back_search_tail_position , back_search_read_tail, 0, 0);
+ if(1) {
+ front_search_read_start = back_search_read_tail - 8;
+ front_search_start_position = back_search_tail_position - 8;
+ } else {
+ //front_search_read_start = current_result -> confident_coverage_start + 5;
+ front_search_read_start = min(explain_context.full_read_len , current_result -> confident_coverage_end);
+ if(front_search_read_start > 2*global_context -> config.realignment_minimum_variant_distance) front_search_read_start -= 2*global_context -> config.realignment_minimum_variant_distance;
+ else front_search_read_start = 0;
+ front_search_start_position = current_result -> selected_position + front_search_read_start;
+ }
+
+ if(0 && FIXLENstrcmp( explain_context.read_name, "R000002689")==0)
+ {
+ SUBREADprintf("EXPLAIN_READ_%d %s [%d]: POS=%u ;; BACK SEARCH TAILPOS=%u, READTAIL=%d ; INDEL_IN_CONF=%d ; READ_COV=%d~%d\n", 1+is_second_read, explain_context.read_name, best_read_id, current_result -> selected_position, back_search_tail_position, back_search_read_tail, current_result -> indels_in_confident_coverage, front_search_read_start, back_search_read_tail);
+ }
- int is_backsearch_tie = explain_context.is_currently_tie;
+ search_events_to_back(global_context, thread_context, &explain_context, read_text , qual_text, back_search_tail_position , back_search_read_tail, 0, 0, 1);
+ if(0 && FIXLENstrcmp("R_chr901_932716_91M1D9M",explain_context.read_name ) == 0)
+ SUBREADprintf("B_SEARCH has found %d result sets\n", explain_context.all_back_alignments);
+
+ //int is_backsearch_tie = explain_context.is_currently_tie;
int back_search_matches_diff = -9999;
+
+ /*
+
+
if(explain_context.back_search_confirmed_sections>0)
{
@@ -1394,14 +1810,23 @@ int explain_read(global_context_t * global_context, thread_context_t * thread_co
current_result -> selected_position = explain_context.back_search_junctions[last_sec].abs_offset_for_start - explain_context.back_search_junctions[last_sec].read_pos_end + explain_context.back_search_junctions[last_sec].read_pos_start;
back_search_matches_diff = explain_context.best_matching_bases - explain_context.second_best_matching_bases;
+ if(0 && memcmp(explain_context.read_name, TTTSNAME, 26)==0)
+ {
+ int xk1;
+ for(xk1 = 0; xk1 < explain_context.back_search_confirmed_sections; xk1++)
+ {
+ short pr_section_length = explain_context.back_search_junctions[xk1].read_pos_end - explain_context.back_search_junctions[xk1].read_pos_start;
+ if(explain_context.back_search_junctions[xk1].event_after_section)
+ SUBREADprintf("BACK_SECTIONS [%d], START IS %u; RPSS=%d ; RPED=%d ; LEN=%d ; EVENT is %u %u INDEL=%d\n", xk1, explain_context.back_search_junctions[xk1].abs_offset_for_start, explain_context.back_search_junctions[xk1].read_pos_start, explain_context.back_search_junctions[last_sec].read_pos_end, pr_section_length, explain_context.back_search_junctions[xk1].event_after_section->event_small_side, explain_context.back_search_junctions[xk1].event_after_section->event_large_side, explain_ [...]
+ else SUBREADprintf("BACK_SECTIONS [%d], START IS %u; RPSS=%d ; RPED=%d ; LEN=%d\n", xk1, explain_context.back_search_junctions[xk1].abs_offset_for_start, explain_context.back_search_junctions[xk1].read_pos_start, explain_context.back_search_junctions[last_sec].read_pos_end, pr_section_length);
+ }
+ }
+
//SUBREADprintf("DBI:%d - %d;\n", explain_context.best_matching_bases , explain_context.second_best_matching_bases);
}
else
- {
- front_search_read_start = current_result -> confident_coverage_start + 5;
- front_search_start_position = current_result -> selected_position + front_search_read_start;
- }
-
+ */
+ explain_context.all_front_alignments = 0;
explain_context.tmp_search_sections = 0;
explain_context.best_matching_bases = -9999;
explain_context.second_best_matching_bases = -9999;
@@ -1420,45 +1845,91 @@ int explain_read(global_context_t * global_context, thread_context_t * thread_co
explain_context.tmp_search_junctions[0].read_pos_start = front_search_read_start;
explain_context.tmp_search_junctions[0].abs_offset_for_start = front_search_start_position;
- search_events_to_front(global_context, thread_context, &explain_context, read_text + front_search_read_start, qual_text + front_search_read_start, front_search_start_position,read_len - front_search_read_start , 0, 0);
- int is_frontsearch_tie = explain_context.is_currently_tie;
+ if(0 && FIXLENstrcmp("R000002689",explain_context.read_name ) == 0)
+ SUBREADprintf("Enter F_SEARCH: start=%u read_pos=%d\n", front_search_start_position, front_search_read_start);
+
+ search_events_to_front(global_context, thread_context, &explain_context, read_text + front_search_read_start, qual_text + front_search_read_start, front_search_start_position,read_len - front_search_read_start , 0, 0, 1);
+ if(0 && FIXLENstrcmp("R_chr901_932716_91M1D9M",explain_context.read_name ) == 0)
+ SUBREADprintf("F_SEARCH has found %d result sets\n", explain_context.all_front_alignments);
+
+ //int is_frontsearch_tie = explain_context.is_currently_tie;
//SUBREADprintf("DFI:%d - %d;\n", explain_context.best_matching_bases , explain_context.second_best_matching_bases);
int front_search_matches_diff = explain_context.best_matching_bases - explain_context.second_best_matching_bases;
explain_context.best_second_match_diff = front_search_matches_diff + back_search_matches_diff;
+ /*
if((!global_context -> config.report_multi_mapping_reads )&& (is_frontsearch_tie || is_backsearch_tie))
{
current_result -> final_quality = 0;
current_result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
- current_result -> Score_H &= 0x7fffffffffffffffllu;
+ current_result -> result_flags &= ~CORE_IS_PAIRED_END;
if(explain_context. best_read_id)
{
- alignment_result_t * result_prime = _global_retrieve_alignment_ptr(global_context, explain_context.pair_number, 0, 0);
- result_prime -> Score_H &= 0x7fffffffffffffffllu;
+ mapping_result_t * result_prime = _global_retrieve_alignment_ptr(global_context, explain_context.pair_number, 0, 0);
+ result_prime -> result_flags &= ~CORE_IS_PAIRED_END;
result_prime = _global_retrieve_alignment_ptr(global_context, explain_context.pair_number, 1, 0);
- result_prime -> Score_H &= 0x7fffffffffffffffllu;
+ result_prime -> result_flags &= ~CORE_IS_PAIRED_END;
}
}
// calc
- else finalise_explain_CIGAR(global_context, thread_context, &explain_context);
+ else*/
+ int realignment_number = finalise_explain_CIGAR(global_context, thread_context, &explain_context, final_realignments);
+
+ return realignment_number;
+}
+
+
+void debug_clipping(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail, int search_center, int number_of_clipped, char * read_name){
+
+ //if(test_len>100)return;
+
+ int xk1;
+
+ SUBREADprintf("\n %s CENTER=%d, CLIPPED=%d, TLEN=%d %s\n", read_name, search_center, number_of_clipped, test_len, search_to_tail?">>>>":"<<<<");
+
+ for(xk1 = 0 ; xk1 < test_len ; xk1++)
+ {
+ char reference_base = gvindex_get(current_value_index, xk1 + mapped_pos);
+ SUBREADprintf("%c", reference_base == read_text[xk1] ? '-':'#');
+ }
+
+ SUBREADprintf("\n");
+ for(xk1 = 0 ; xk1 < test_len ; xk1++)
+ {
+ if(xk1 == search_center)
+ SUBREADprintf("%c", search_to_tail?'>':'<');
+ else SUBREADprintf(" ");
+ }
+
+ SUBREADprintf("\n");
+ for(xk1 = 0 ; xk1 < test_len ; xk1++)
+ {
+ if( search_to_tail && xk1 >= test_len - number_of_clipped)
+ SUBREADprintf("R");
+ else if( (!search_to_tail) && xk1 <= number_of_clipped - 1)
+ SUBREADprintf("L");
+ else SUBREADprintf(" ");
+ }
+
+ SUBREADprintf("\n");
- return 0;
}
-#define find_soft_clipping find_soft_clipping_147
#define SOFT_CLIPPING_WINDOW_SIZE 5
#define SOFT_CLIPPING_MAX_ERROR 1
+#define find_soft_clipping_147 find_soft_clipping
-int find_soft_clipping_147(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail, int search_center, int * remedy)
+
+// it returns the number of bases to be clipped off.
+int find_soft_clipping_147(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail, int search_center)
{
int base_in_window = 0;
int added_base_index = 0, removed_base_index = 0;
int search_start = 0;
int matched_in_window = SOFT_CLIPPING_WINDOW_SIZE;
int last_matched_base_index = -1, delta;
- int ret_clipped = -1;
if(search_to_tail)
{
@@ -1503,44 +1974,26 @@ int find_soft_clipping_147(global_context_t * global_context, thread_context_t
if(matched_in_window < SOFT_CLIPPING_WINDOW_SIZE - SOFT_CLIPPING_MAX_ERROR){
// clip, bondary is the last matched base.
if(search_to_tail){
- if(last_matched_base_index < 0) ret_clipped = test_len - search_start;
- else ret_clipped = test_len - last_matched_base_index - 1;
- }else{
- if(last_matched_base_index >= 0) ret_clipped = last_matched_base_index;
- else ret_clipped = search_start - 1;
- }
- break;
- }
- }
-
- if(ret_clipped<0)
- {
- if(last_matched_base_index < 0) ret_clipped = test_len;
- else{
- if(search_to_tail){
- if(last_matched_base_index < 0) ret_clipped = test_len - search_start;
- else ret_clipped = test_len - last_matched_base_index - 1;
+ if(last_matched_base_index < 0) return test_len - search_start;
+ else return test_len - last_matched_base_index - 1;
}else{
- if(last_matched_base_index >= 0) ret_clipped = last_matched_base_index;
- else ret_clipped = search_start - 1;
+ if(last_matched_base_index >= 0) return last_matched_base_index;
+ else return search_start - 1;
}
}
}
- int xk1;
- int remedy_begin = search_to_tail?test_len - ret_clipped: ret_clipped;
- *remedy = 0;
+ if(last_matched_base_index < 0) return test_len;
- for(xk1 = remedy_begin ; xk1 < test_len && xk1>=0; xk1 += delta)
- {
- char reference_base = gvindex_get(current_value_index, xk1 + mapped_pos);
- (*remedy) += reference_base != read_text[xk1];
+ if(search_to_tail){
+ if(last_matched_base_index < 0) return test_len - search_start;
+ else return test_len - last_matched_base_index - 1;
+ }else{
+ if(last_matched_base_index >= 0) return last_matched_base_index;
+ else return search_start - 1;
}
-
- return ret_clipped;
}
-
-int find_soft_clipping_146(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail, int * remedy)
+int find_soft_clipping_146(global_context_t * global_context, thread_context_t * thread_context, gene_value_index_t * current_value_index, char * read_text, unsigned int mapped_pos, int test_len, int search_to_tail, int search_center)
{
char window_matched[SOFT_CLIPPING_WINDOW_SIZE];
@@ -1562,8 +2015,6 @@ int find_soft_clipping_146(global_context_t * global_context, thread_context_t
}
window_matched[0] = (ref_value == read_text[x1]);
sum_matched += window_matched[0];
- if(x0>=SOFT_CLIPPING_WINDOW_SIZE - 1)
- (*remedy) += !(window_matched[SOFT_CLIPPING_WINDOW_SIZE-1]);
/*
for(x2 = 0; x2 < SOFT_CLIPPING_WINDOW_SIZE; x2++){
@@ -1588,21 +2039,22 @@ int find_soft_clipping_146(global_context_t * global_context, thread_context_t
// read_head_abs_offset is the first WANTED base in read.
// If the first section in read is reversed, read_head_abs_offset is the LAST WANTED bases in this section. (the abs offset of the first base in the section is actually larger than read_head_abs_offset)
-int final_CIGAR_quality(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qual_text, int read_len, char * cigar_string, unsigned long read_head_abs_offset, int is_read_head_reversed, int * mismatched_bases, int covered_start, int covered_end)
+int final_CIGAR_quality(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qual_text, int read_len, char * cigar_string, unsigned long read_head_abs_offset, int is_read_head_reversed, int * mismatched_bases, int covered_start, int covered_end, char * read_name, int * non_clipped_length, int *total_indel_length, int * matched_bases)
{
int cigar_cursor = 0;
int read_cursor = 0;
unsigned int current_perfect_section_abs = read_head_abs_offset;
- int rebuilt_read_len = 0;
+ int rebuilt_read_len = 0, total_insertion_length = 0;
float all_matched_bases = 0;
gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
int current_reversed = is_read_head_reversed;
- int all_perfect_length = 0;
int all_mismatched = 0;
- int is_First_M = 1, remedy_MM_tail = 0, remedy_MM_head = 0;
+ int is_First_M = 1;
int head_soft_clipped = -1, tail_soft_clipped = -1;
unsigned int tmp_int = 0;
+ //SUBREADprintf("Coverage : %d ~ %d\n", covered_start, covered_end);
+
while(1)
{
char nch = cigar_string[cigar_cursor++];
@@ -1618,32 +2070,61 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
float section_qual;
- if(global_context -> config.space_type == GENE_SPACE_COLOR)
- section_qual = match_base_quality_cs(current_value_index, read_text+read_cursor, current_perfect_section_abs, qual_text_cur, tmp_int, global_context->config.phred_score_format , mismatched_bases, &all_mismatched, global_context -> config.high_quality_base_threshold);
- else
- section_qual = match_base_quality(current_value_index, read_text+read_cursor, current_perfect_section_abs, qual_text_cur, tmp_int, current_reversed, global_context->config.phred_score_format , mismatched_bases, &all_mismatched, global_context -> config.high_quality_base_threshold);
- all_matched_bases += section_qual;
- rebuilt_read_len += tmp_int;
- all_perfect_length += tmp_int;
-
int is_Last_M = (cigar_string[cigar_cursor]==0);
+ int has_clipping_this_section_head = 0, has_clipping_this_section_tail = 0;
+ char * reversed_first_section_text = NULL;
// find "J" sections if it is the first M
if(is_First_M && global_context -> config.show_soft_cliping)
{
int adj_coverage_start = covered_start - read_cursor;
+ char * debug_ptr = read_text;
+
+ if(current_reversed)
+ {
+ reversed_first_section_text = malloc(MAX_READ_LENGTH);
+ memcpy(reversed_first_section_text, read_text, tmp_int);
+ reverse_read(reversed_first_section_text, tmp_int, global_context->config.space_type);
+ debug_ptr = reversed_first_section_text;
+
+ head_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, reversed_first_section_text, current_perfect_section_abs, tmp_int, 1, 0);
+ }
+ else
+ head_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text, current_perfect_section_abs, tmp_int, 0, adj_coverage_start);
+ if(0&& memcmp(read_name, TTTSNAME, 26)==0)
+ debug_clipping(global_context, thread_context, current_value_index, debug_ptr, current_perfect_section_abs, tmp_int, 0, adj_coverage_start, head_soft_clipped, read_name);
+
- remedy_MM_head = 0;
- head_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text, current_perfect_section_abs, tmp_int, 0, adj_coverage_start, &remedy_MM_head);
if(head_soft_clipped == tmp_int) head_soft_clipped = 0;
+ else has_clipping_this_section_head = 1;
+
+ if(reversed_first_section_text)
+ free(reversed_first_section_text);
+ reversed_first_section_text = NULL;
}
if(is_Last_M && global_context -> config.show_soft_cliping)
{
int adj_coverage_end = covered_end - read_cursor;
+ char * debug_ptr = read_text + read_cursor;
+
+ if(current_reversed)
+ {
+ reversed_first_section_text = malloc(MAX_READ_LENGTH);
+ memcpy(reversed_first_section_text, read_text + read_cursor, tmp_int);
+ reverse_read(reversed_first_section_text, tmp_int, global_context->config.space_type);
+ debug_ptr = reversed_first_section_text;
+ tail_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, reversed_first_section_text, current_perfect_section_abs, tmp_int, 0, tmp_int);
+ }
+ else
+ tail_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text + read_cursor, current_perfect_section_abs, tmp_int, 1, adj_coverage_end);
+
+ if(0 && memcmp(read_name, TTTSNAME, 26)==0)
+ debug_clipping(global_context, thread_context, current_value_index, debug_ptr, current_perfect_section_abs, tmp_int, !current_reversed, adj_coverage_end , tail_soft_clipped, read_name);
- remedy_MM_tail = 0;
- tail_soft_clipped = find_soft_clipping(global_context, thread_context, current_value_index, read_text + read_cursor, current_perfect_section_abs, tmp_int, 1, adj_coverage_end, &remedy_MM_tail);
if(tail_soft_clipped == tmp_int) tail_soft_clipped = 0;
+ else has_clipping_this_section_tail = 1;
+ if(reversed_first_section_text)
+ free(reversed_first_section_text);
}
if(is_Last_M && is_First_M && tail_soft_clipped+head_soft_clipped >= tmp_int-1)
{
@@ -1651,6 +2132,15 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
tail_soft_clipped=0;
}
+ int mismatch_calculation_start = has_clipping_this_section_head?head_soft_clipped:0;
+ int mismatch_calculation_end = has_clipping_this_section_tail?tail_soft_clipped:0;
+
+ if(global_context -> config.space_type == GENE_SPACE_COLOR)
+ section_qual = match_base_quality_cs(current_value_index, read_text+read_cursor, current_perfect_section_abs, qual_text_cur, tmp_int, global_context->config.phred_score_format , mismatched_bases, &all_mismatched, global_context -> config.high_quality_base_threshold, mismatch_calculation_start, mismatch_calculation_end);
+ else
+ section_qual = match_base_quality(current_value_index, read_text+read_cursor, current_perfect_section_abs, qual_text_cur, tmp_int, current_reversed, global_context->config.phred_score_format , mismatched_bases, &all_mismatched, global_context -> config.high_quality_base_threshold, mismatch_calculation_start, mismatch_calculation_end);
+ all_matched_bases += section_qual;
+ rebuilt_read_len += tmp_int;
is_First_M=0;
read_cursor += tmp_int;
@@ -1667,20 +2157,26 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
{
rebuilt_read_len += tmp_int;
read_cursor += tmp_int;
+
all_matched_bases += tmp_int;
+ total_indel_length += tmp_int;
+ total_insertion_length += tmp_int;
}
else if(nch == 'D')
{
+ total_indel_length ++;
if(!current_reversed)
current_perfect_section_abs += tmp_int;
}
else if(tolower(nch) == 'n')
{
+ total_indel_length ++;
current_perfect_section_abs += tmp_int;
if(nch == 'n') current_reversed = !current_reversed;
}
else if(tolower(nch) == 'b')
{
+ total_indel_length ++;
current_perfect_section_abs -= tmp_int;
if(nch == 'b') current_reversed = !current_reversed;
}
@@ -1689,17 +2185,14 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
}
}
- int read_mapped_fraction = read_len;
- read_mapped_fraction -= max(0,tail_soft_clipped);
- read_mapped_fraction -= max(0,head_soft_clipped);
-
- if(head_soft_clipped>0) all_mismatched -= remedy_MM_head;
- if(tail_soft_clipped>0) all_mismatched -= remedy_MM_tail;
+ int my_non_clipped_length = read_len;
+ my_non_clipped_length -= max(0,tail_soft_clipped);
+ my_non_clipped_length -= max(0,head_soft_clipped);
//#warning " ========== COMMENT THIS LINE !! ========="
- //printf("QCR ALL MM=%d, RBLEN=%d, MAPPED_LEN=%d ; CIGAR=%s\n", all_mismatched, rebuilt_read_len , read_mapped_fraction, cigar_string);
+ //printf("QCR ALL MM=%d, RBLEN=%d, MAPPED_LEN=%d ; CIGAR=%s\n", all_mismatched, rebuilt_read_len , my_non_clipped_length, cigar_string);
- if(rebuilt_read_len != read_len || read_mapped_fraction < global_context->config.min_mapped_fraction){
+ if(rebuilt_read_len != read_len || my_non_clipped_length < global_context->config.min_mapped_fraction){
(*mismatched_bases)=99999;
all_matched_bases = 0;
sprintf(cigar_string, "%dM", read_len);
@@ -1759,283 +2252,329 @@ int final_CIGAR_quality(global_context_t * global_context, thread_context_t * th
if((*mismatched_bases) != 99999)
(*mismatched_bases) = all_mismatched;
- return max(0, (int)(all_matched_bases*60/read_len));
+
+ (*non_clipped_length) = my_non_clipped_length;
+ (*matched_bases) = my_non_clipped_length - all_mismatched - total_insertion_length;
+
+ return max(0, (int)(all_matched_bases*60/my_non_clipped_length));
}
// this function also adds final_counting_reads in chromosome_events.
-int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context)
+unsigned int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, realignment_result_t * final_realignments)
{
- int xk1;
+ int xk1, front_i, back_i;
char tmp_cigar[120], tmp_cigar_exonic[120];
chromosome_event_t * to_be_supported [20];
short flanking_size_left[20], flanking_size_right[20];
int to_be_supported_count = 0;
int is_junction_read = 0;
int total_perfect_matched_sections = 0;
- alignment_result_t * result = _global_retrieve_alignment_ptr(global_context, explain_context->pair_number, explain_context->is_second_read, explain_context-> best_read_id);
+ mapping_result_t * result = _global_retrieve_alignment_ptr(global_context, explain_context->pair_number, explain_context->is_second_read, explain_context-> best_read_id);
+ result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
+ result -> result_flags &= ~CORE_IS_PAIRED_END;
+ //SUBREADprintf("FINAL_CIGAR R1 %d[%d] = %p, FLAGS=%d\n", explain_context -> pair_number , explain_context-> best_read_id , result , result -> result_flags);
tmp_cigar[0]=0;
tmp_cigar_exonic[0]=0;
- // reverse the back_search results
- for(xk1=0; xk1<explain_context -> back_search_confirmed_sections/2; xk1++)
- {
- perfect_section_in_read_t tmp_exp;
- memcpy(&tmp_exp, &explain_context -> back_search_junctions[xk1], sizeof(perfect_section_in_read_t));
- memcpy(&explain_context -> back_search_junctions[xk1], &explain_context -> back_search_junctions[explain_context -> back_search_confirmed_sections - xk1 - 1] , sizeof(perfect_section_in_read_t));
- memcpy(&explain_context -> back_search_junctions[explain_context -> back_search_confirmed_sections - xk1 - 1] , &tmp_exp , sizeof(perfect_section_in_read_t));
- }
-
+ // reverse the back_search result for every equally best alignment
+ //
+ for(back_i = 0; back_i < explain_context -> all_back_alignments; back_i++){
+ for(xk1=0; xk1<explain_context -> result_back_junction_numbers[back_i]/2; xk1++)
+ {
+ perfect_section_in_read_t tmp_exp;
+ memcpy(&tmp_exp, &explain_context -> result_back_junctions[back_i][xk1], sizeof(perfect_section_in_read_t));
+ memcpy(&explain_context -> result_back_junctions[back_i][xk1], &explain_context -> result_back_junctions[back_i][explain_context -> result_back_junction_numbers[back_i] - xk1 - 1] , sizeof(perfect_section_in_read_t));
+ memcpy(&explain_context -> result_back_junctions[back_i][explain_context -> result_back_junction_numbers[back_i] - xk1 - 1] , &tmp_exp , sizeof(perfect_section_in_read_t));
+ }
+ }
+
// adding indel lengths in read lengths and relocate sections
// note that the last section in back results has the same strand of the main piece.
- int is_first_section_negative = (result ->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
- for(xk1=0; xk1<explain_context -> back_search_confirmed_sections; xk1++)
- {
- int section_length = explain_context -> back_search_junctions[xk1].read_pos_end - explain_context -> back_search_junctions[xk1].read_pos_start;
- unsigned int new_start_pos;
-
- if(explain_context -> back_search_junctions[xk1].is_strand_jumped)
- // the "strand_jumped" section do not need to move
- // however, the "abs_offset_for_start" is actually for the last base in this section.
- // this does not metter if we compare the reversed read to the chromosome.
- // "abs_offset_for_start" is the first UNWANTED base (smaller than the first WANTED base)
- new_start_pos = explain_context -> back_search_junctions[xk1].abs_offset_for_start +1;
- else
- // "abs_offset_for_start" is the first UNWANTED base. By subtracting the length, it becomes the first WANTED base.
- new_start_pos = explain_context -> back_search_junctions[xk1].abs_offset_for_start - section_length;
-
- explain_context -> back_search_junctions[xk1].abs_offset_for_start = new_start_pos;
- if(explain_context -> back_search_junctions[xk1].event_after_section
- && explain_context -> back_search_junctions[xk1].event_after_section->is_strand_jumped) is_first_section_negative=!is_first_section_negative;
- }
- // build CIGAR
- int is_cigar_overflow = 0;
- for(xk1 = 0; xk1 < explain_context -> back_search_confirmed_sections + explain_context -> front_search_confirmed_sections -1; xk1++)
- {
- char piece_cigar[25];
- int read_pos_start, read_pos_end;
- perfect_section_in_read_t * current_section, *next_section = NULL;
+ int is_cigar_overflow = 0, fusions_in_read = 0, final_alignment_number = 0;
+ for(back_i = 0; back_i < explain_context -> all_back_alignments; back_i++){
+ if(final_alignment_number >= MAX_ALIGNMENT_PER_ANCHOR)break;
- int is_front_search = 0;
- if(xk1 >= explain_context -> back_search_confirmed_sections || xk1 == explain_context -> back_search_confirmed_sections -1)
+ int is_first_section_negative = (result ->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ for(xk1=0; xk1<explain_context -> result_back_junction_numbers[back_i]; xk1++)
{
- current_section = &explain_context -> front_search_junctions[xk1 - explain_context -> back_search_confirmed_sections +1];
- if(xk1 - explain_context -> back_search_confirmed_sections +2 < explain_context -> front_search_confirmed_sections)
- next_section = &explain_context -> front_search_junctions[xk1 - explain_context -> back_search_confirmed_sections +2];
+ int section_length = explain_context -> result_back_junctions[back_i][xk1].read_pos_end - explain_context -> result_back_junctions[back_i][xk1].read_pos_start;
+ unsigned int new_start_pos;
+
+ if(explain_context -> result_back_junctions[back_i][xk1].is_strand_jumped)
+ // the "strand_jumped" section do not need to move
+ // however, the "abs_offset_for_start" is actually for the last base in this section.
+ // this does not metter if we compare the reversed read to the chromosome.
+ // "abs_offset_for_start" is the first UNWANTED base (smaller than the first WANTED base)
+ new_start_pos = explain_context -> result_back_junctions[back_i][xk1].abs_offset_for_start +1;
+ else
+ // "abs_offset_for_start" is the first UNWANTED base. By subtracting the length, it becomes the first WANTED base.
+ new_start_pos = explain_context -> result_back_junctions[back_i][xk1].abs_offset_for_start - section_length;
- is_front_search = 1;
- }
- else
- {
- current_section = &explain_context -> back_search_junctions[xk1];
- if(xk1+1 < explain_context -> back_search_confirmed_sections)
- next_section = &explain_context -> back_search_junctions[xk1+1];
+ explain_context -> result_back_junctions[back_i][xk1].abs_offset_for_start = new_start_pos;
+ if(explain_context -> result_back_junctions[back_i][xk1].event_after_section
+ && explain_context -> result_back_junctions[back_i][xk1].event_after_section->is_strand_jumped) is_first_section_negative=!is_first_section_negative;
}
+ // build CIGAR
+ for(front_i = 0; front_i < explain_context -> all_front_alignments; front_i++){
+ if(final_alignment_number >= MAX_ALIGNMENT_PER_ANCHOR)break;
- read_pos_start = current_section -> read_pos_start;
- read_pos_end = current_section -> read_pos_end;
- chromosome_event_t *event_after = current_section -> event_after_section;
- sprintf(piece_cigar, "%dM", (read_pos_end - read_pos_start));
- total_perfect_matched_sections += (read_pos_end - read_pos_start);
- flanking_size_left[xk1] = (read_pos_end - read_pos_start);
+ if(0 && FIXLENstrcmp("DB7DT8Q1:236:C2NGTACXX:2:1213:17842:64278",explain_context->read_name ) == 0){
+ SUBREADprintf("For the %d-th front search result set and the %d-th back search result set, there are %d + %d - 1 = %d sections in the read\nmapped location = %u\n", front_i, back_i, explain_context -> result_back_junction_numbers[back_i] , explain_context -> result_front_junction_numbers[front_i] , explain_context -> result_back_junction_numbers[back_i] + explain_context -> result_front_junction_numbers[front_i] -1, result -> selected_position);
+
+ for(xk1 = 0; xk1 < explain_context -> result_back_junction_numbers[back_i] + explain_context -> result_front_junction_numbers[front_i]; xk1++)
+ {
+ perfect_section_in_read_t * current_section;
+ int is_front_search = 0;
+ if(xk1 >= explain_context -> result_back_junction_numbers[back_i]) {
+ current_section = &explain_context -> result_front_junctions[front_i][xk1 - explain_context -> result_back_junction_numbers[back_i]];
+ is_front_search = 1;
+ } else {
+ current_section = &explain_context -> result_back_junctions[back_i][xk1];
+ }
+ SUBREADprintf(" The %d-th section ( %d long ) has next event being %p\n", xk1, current_section -> read_pos_end - current_section -> read_pos_start , current_section -> event_after_section);
+ }
+ }
- if(xk1<explain_context -> back_search_confirmed_sections + explain_context -> front_search_confirmed_sections -2)
- assert(event_after);
+ for(xk1 = 0; xk1 < explain_context -> result_back_junction_numbers[back_i] + explain_context -> result_front_junction_numbers[front_i] -1; xk1++)
+ {
+ char piece_cigar[25];
+ int read_pos_start, read_pos_end;
+ perfect_section_in_read_t * current_section, *next_section = NULL;
+
+ int is_front_search = 0;
+ if(xk1 >= explain_context -> result_back_junction_numbers[back_i] - 1) {
+ current_section = &explain_context -> result_front_junctions[front_i][xk1 - explain_context -> result_back_junction_numbers[back_i] +1];
+ if(xk1 - explain_context -> result_back_junction_numbers[back_i] +2 < explain_context -> result_front_junction_numbers[front_i])
+ next_section = &explain_context -> result_front_junctions[front_i][xk1 - explain_context -> result_back_junction_numbers[back_i] +2];
+ is_front_search = 1;
+ } else {
+ current_section = &explain_context -> result_back_junctions[back_i][xk1];
+ if(xk1+1 < explain_context -> result_back_junction_numbers[back_i])
+ next_section = &explain_context -> result_back_junctions[back_i][xk1+1];
+ }
- if(xk1>0)
- flanking_size_right[xk1-1] = (read_pos_end - read_pos_start);
- if(event_after)
- {
- if(event_after -> event_type == CHRO_EVENT_TYPE_INDEL)
- sprintf(piece_cigar+strlen(piece_cigar), "%d%c", abs(event_after->indel_length), event_after->indel_length>0?'D':'I');
- else if(event_after -> event_type == CHRO_EVENT_TYPE_JUNCTION||event_after -> event_type == CHRO_EVENT_TYPE_FUSION)
- {
- //char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
- //if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
+ if(xk1 == explain_context -> result_back_junction_numbers[back_i] - 1)
+ read_pos_start = explain_context -> result_back_junctions[back_i][xk1].read_pos_start;
+ else read_pos_start = current_section -> read_pos_start;
+
+ read_pos_end = current_section -> read_pos_end;
+ chromosome_event_t *event_after = current_section -> event_after_section;
+
+ sprintf(piece_cigar, "%dM", (read_pos_end - read_pos_start));
+ total_perfect_matched_sections += (read_pos_end - read_pos_start);
+ flanking_size_left[xk1] = (read_pos_end - read_pos_start);
+
+ if(xk1<explain_context -> result_back_junction_numbers[back_i] + explain_context -> result_front_junction_numbers[front_i] -2)
+ assert(event_after);
- // the distance in CIGAR is the NEXT UNWANTED BASE of piece#1 to the FIRST WANTED BASE in piece#2
- int delta_one ;
- if(current_section -> is_strand_jumped + current_section -> is_connected_to_large_side == 1) delta_one = 1;
- else delta_one = -1;
+ if(xk1>0)
+ flanking_size_right[xk1-1] = (read_pos_end - read_pos_start);
- // if it is from front_search, the event side points to the first WANTED base of the next section; it should be moved to the last WANTED base the next section if the next section is jumped.
- if(next_section && (event_after -> is_strand_jumped + current_section -> is_strand_jumped==1))
+ if(event_after)
{
- if(is_front_search)
- {
- if(current_section -> is_connected_to_large_side)
- delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
- else
- delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
- }
- else
+ if(event_after -> event_type == CHRO_EVENT_TYPE_INDEL)
{
- if(current_section -> is_connected_to_large_side)
- delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
- else
- delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
- }
- }
+ if(0 && FIXLENstrcmp("R000002444", explain_context -> read_name) ==0){
+ SUBREADprintf("Get INDEL from the %d-th mapped section (back=%d, front=%d) ; event_pntr=%p, section_mapped_len=%d (start=%d, end=%d)\n", xk1, explain_context -> result_back_junction_numbers[back_i] , explain_context -> result_front_junction_numbers[front_i] , event_after, read_pos_end - read_pos_start, read_pos_start, read_pos_end);
+ }
+ sprintf(piece_cigar+strlen(piece_cigar), "%d%c", abs(event_after->indel_length), event_after->indel_length>0?'D':'I');
+ } else if(event_after -> event_type == CHRO_EVENT_TYPE_JUNCTION||event_after -> event_type == CHRO_EVENT_TYPE_FUSION) {
+ // the distance in CIGAR is the NEXT UNWANTED BASE of piece#1 to the FIRST WANTED BASE in piece#2
+ int delta_one ;
+ if(current_section -> is_strand_jumped + current_section -> is_connected_to_large_side == 1) delta_one = 1;
+ else delta_one = -1;
+
+ // if it is from front_search, the event side points to the first WANTED base of the next section; it should be moved to the last WANTED base the next section if the next section is jumped.
+ if(next_section && (event_after -> is_strand_jumped + current_section -> is_strand_jumped==1))
+ {
+ if(is_front_search)
+ {
+ if(current_section -> is_connected_to_large_side)
+ delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
+ else
+ delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
+ }
+ else
+ {
+ if(current_section -> is_connected_to_large_side)
+ delta_one += (next_section->read_pos_end - next_section-> read_pos_start - 1);
+ else
+ delta_one -= (next_section->read_pos_end - next_section-> read_pos_start - 1);
+ }
+ }
+
+ char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
+ long long int movement = event_after -> event_large_side;
+ movement -= event_after -> event_small_side - delta_one;
+ if(1){
+ if(jump_mode == 'B' && movement < 0){
+ movement = - movement;
+ jump_mode = 'N';
+ }else if(jump_mode == 'N' && movement < 0){
+ movement = - movement;
+ jump_mode = 'B';
+ }
+ }
+
+ if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
+ fusions_in_read += (event_after -> event_type == CHRO_EVENT_TYPE_FUSION);
- char jump_mode = current_section -> is_connected_to_large_side?'B':'N';
- long long int movement = event_after -> event_large_side;
- movement -= event_after -> event_small_side - delta_one;
- if(1){
- if(jump_mode == 'B' && movement < 0){
- movement = - movement;
- jump_mode = 'N';
- }else if(jump_mode == 'N' && movement < 0){
- movement = - movement;
- jump_mode = 'B';
- }
- }
+ //if(event_after -> event_large_side + delta_one < event_after -> event_small_side)
+ // SUBREADprintf("%s CONNECT_TO_LARGE : %d REV ENV: %u ~ %u: %s, DELTA=%d, MOVE_LEN=%d, READ=%s JUMP: CUR=%d, AFT=%d\n", is_front_search?"FRONT_SEARCH":"BACK_SEARCH", current_section -> is_connected_to_large_side, event_after -> event_small_side , event_after -> event_large_side, explain_context -> read_name, delta_one, event_after -> event_large_side - event_after -> event_small_side + delta_one, explain_context -> read_name, current_section -> is_strand_jumped, event_after -> [...]
- if(event_after -> is_strand_jumped) jump_mode = tolower(jump_mode);
+ sprintf(piece_cigar+strlen(piece_cigar), "%u%c", (int)movement, jump_mode);
-
- sprintf(piece_cigar+strlen(piece_cigar), "%u%c", (int)movement, jump_mode);
- if(event_after -> indel_at_junction) sprintf(piece_cigar+strlen(piece_cigar), "%dI", event_after -> indel_at_junction);
- is_junction_read ++;
+ //if(event_after -> event_large_side + delta_one < event_after -> event_small_side)
+ // SUBREADprintf("PART CIGAR=%s\n" , piece_cigar);
+
+ if(event_after -> indel_at_junction) sprintf(piece_cigar+strlen(piece_cigar), "%dI", event_after -> indel_at_junction);
+ is_junction_read ++;
+ }
+ to_be_supported[to_be_supported_count++] = event_after;
+ }
+ strcat(tmp_cigar, piece_cigar);
+ if(strlen(tmp_cigar) > CORE_MAX_CIGAR_STR_LEN - 14){
+ is_cigar_overflow=1;
+ break;
+ }
}
- to_be_supported[to_be_supported_count++] = event_after;
- }
- strcat(tmp_cigar, piece_cigar);
- if(strlen(tmp_cigar)>80){
- is_cigar_overflow=1;
- break;
- }
- }
- int mismatch_bases = 0, isCigarOK = 0;
+ int mismatch_bases = 0, isCigarOK = 0;
- if(is_cigar_overflow) sprintf(tmp_cigar, "%dM", explain_context -> full_read_len);
+ if(is_cigar_overflow) sprintf(tmp_cigar, "%dM", explain_context -> full_read_len);
- unsigned int final_position = explain_context -> back_search_junctions[0].abs_offset_for_start;
+ unsigned int final_position;
- int is_exonic_read_fraction_OK = 1;
+ if( explain_context -> result_back_junction_numbers[back_i] + explain_context -> result_front_junction_numbers[front_i] <= 2) final_position = result -> selected_position;
+ else final_position = explain_context -> result_back_junctions[back_i][0].abs_offset_for_start;
- if( global_context -> config.minimum_exonic_subread_fraction > 0.0000001 && (!is_junction_read) && result -> used_subreads_in_vote>0)
- {
- int min_subreads = global_context -> config.minimum_exonic_subread_fraction * result-> used_subreads_in_vote;
- if( result -> selected_votes < min_subreads )
- is_exonic_read_fraction_OK = 0 ;
- }
+ int is_exonic_read_fraction_OK = 1;
+ if( global_context -> config.minimum_exonic_subread_fraction > 0.0000001 && (!is_junction_read) && result -> used_subreads_in_vote>0)
+ {
+ int min_subreads = global_context -> config.minimum_exonic_subread_fraction * result-> used_subreads_in_vote;
+ if( result -> selected_votes < min_subreads )
+ is_exonic_read_fraction_OK = 0 ;
+ }
- int final_qual = 0, applied_mismatch = 0;
- if(is_exonic_read_fraction_OK)
- {
- final_qual = final_CIGAR_quality(global_context, thread_context, explain_context -> full_read_text, explain_context -> full_qual_text, explain_context -> full_read_len , tmp_cigar, final_position, is_first_section_negative != ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0), &mismatch_bases, result -> confident_coverage_start, result -> confident_coverage_end);
+ int final_qual = 0, applied_mismatch = 0, non_clipped_length = 0, total_indel_length = 0, total_coverage_length = 0, final_MATCH = 0;
+ if(is_exonic_read_fraction_OK)
+ {
+ total_coverage_length = result -> confident_coverage_end - result -> confident_coverage_start;
+ final_qual = final_CIGAR_quality(global_context, thread_context, explain_context -> full_read_text, explain_context -> full_qual_text, explain_context -> full_read_len , tmp_cigar, final_position, is_first_section_negative != ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0), &mismatch_bases, result -> confident_coverage_start, result -> confident_coverage_end, explain_context -> read_name, &non_clipped_length, &total_indel_length, & final_MATCH);
- applied_mismatch = is_junction_read? global_context->config.max_mismatch_junction_reads:global_context->config.max_mismatch_exonic_reads ;
- if(explain_context->full_read_len > EXON_LONG_READ_LENGTH)
- applied_mismatch = ((((explain_context->full_read_len+1)<<16) / 100) * applied_mismatch)>>16;
- if(global_context -> config.space_type == GENE_SPACE_COLOR) applied_mismatch += to_be_supported_count*2;
- }
+ applied_mismatch = is_junction_read? global_context->config.max_mismatch_junction_reads:global_context->config.max_mismatch_exonic_reads ;
+ if(explain_context->full_read_len > EXON_LONG_READ_LENGTH)
+ applied_mismatch = ((((explain_context->full_read_len+1)<<16) / 100) * applied_mismatch)>>16;
+ if(global_context -> config.space_type == GENE_SPACE_COLOR) applied_mismatch += to_be_supported_count*2;
+ }
- //#warning " ========== COMMENT THIS LINE !! ========="
- //if(explain_context -> pair_number == 999999)
- if(0 && memcmp(explain_context -> read_name, "H7TVLADXX140423:2:1112:17883:23072", 32)==0)
- printf("%s : POS=%u\tCIGAR=%s\tMM=%d > %d?\tVOTE=%d > %0.2f x %d ?\tQUAL=%d\tBRNO=%d\n", explain_context -> read_name, final_position , tmp_cigar, mismatch_bases, applied_mismatch, result -> selected_votes, global_context -> config.minimum_exonic_subread_fraction,result-> used_subreads_in_vote, final_qual, explain_context -> best_read_id);
- if(mismatch_bases <= applied_mismatch && is_exonic_read_fraction_OK)
- {
- int compressed_len;
+ //#warning " ========== COMMENT THIS LINE !! ========="
+ //if(explain_context -> pair_number == 999999)
+
+ // ACDB PVDB TTTS
+ if(0 && FIXLENstrcmp("R001135677", explain_context -> read_name) ==0)
+ SUBREADprintf("FINALQUAL %s : FINAL_POS=%u\tCIGAR=%s\tMM=%d > %d?\tVOTE=%d > %0.2f x %d ? MASK=%d\tQUAL=%d\tBRNO=%d\n\n", explain_context -> read_name, final_position , tmp_cigar, mismatch_bases, applied_mismatch, result -> selected_votes, global_context -> config.minimum_exonic_subread_fraction,result-> used_subreads_in_vote, result->result_flags, final_qual, explain_context -> best_read_id);
- if(((result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) != is_first_section_negative)
- {
- if(!global_context->config.do_fusion_detection)
- assert(0);
- result -> cigar_string[0]=0xff;
- compressed_len = cigar2bincigar(tmp_cigar, result -> cigar_string + 1, CORE_MAX_CIGAR_LEN - 1);
- if(result -> cigar_string [1]==0) result -> cigar_string[0]=0;
- }
- else
- compressed_len = cigar2bincigar(tmp_cigar, result -> cigar_string, CORE_MAX_CIGAR_LEN);
- // commit the change to the chromosome_events
- if(compressed_len>0)
- {
- int is_RNA_from_positive = -1;
- for(xk1= 0; xk1 < to_be_supported_count; xk1++)
+ if( mismatch_bases <= applied_mismatch && is_exonic_read_fraction_OK && fusions_in_read < 2)
{
- if(to_be_supported [xk1] -> event_type == CHRO_EVENT_TYPE_INDEL || is_junction_read)
+ realignment_result_t * realign_res = final_realignments+final_alignment_number;
+ final_alignment_number ++;
+
+ realign_res -> realign_flags = result->result_flags;
+ realign_res -> first_base_is_jumpped = 0;
+ realign_res -> mapping_result = result;
+
+ if(mismatch_bases > applied_mismatch ) realign_res -> realign_flags |= CORE_TOO_MANY_MISMATCHES;
+ else realign_res -> realign_flags &= ~CORE_TOO_MANY_MISMATCHES;
+
+ if(((result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) != is_first_section_negative)
{
- if(thread_context)
- {
- ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> final_counted_reads_array [ to_be_supported [xk1] -> global_event_id] ++;
- ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> final_reads_mismatches_array [ to_be_supported [xk1] -> global_event_id] += mismatch_bases;
- }
- else
- {
- to_be_supported [xk1] -> final_counted_reads ++;
- to_be_supported [xk1] -> final_reads_mismatches +=mismatch_bases;
- }
+ assert(global_context->config.do_fusion_detection);
+ realign_res -> first_base_is_jumpped = 1;
}
- if(to_be_supported [xk1] -> event_type !=CHRO_EVENT_TYPE_INDEL && is_junction_read)
+ strcpy(realign_res -> cigar_string, tmp_cigar);
+
+ if(1)
{
- short current_event_flanking_left = flanking_size_left[xk1];
- short current_event_flanking_right = flanking_size_right[xk1];
- to_be_supported [xk1] -> junction_flanking_left = max(to_be_supported [xk1] -> junction_flanking_left, current_event_flanking_left);
- to_be_supported [xk1] -> junction_flanking_right = max(to_be_supported [xk1] -> junction_flanking_right, current_event_flanking_right);
+ // commit the change to the chromosome_events
+
+ int is_RNA_from_positive = -1;
- if(to_be_supported [xk1] -> event_type == CHRO_EVENT_TYPE_JUNCTION && to_be_supported [xk1] -> is_donor_found && is_RNA_from_positive == -1)
- is_RNA_from_positive = !(to_be_supported [xk1] -> is_negative_strand);
+ unsigned long long read_id = 2llu * explain_context -> pair_number + explain_context->is_second_read;
- }
- }
+ for(xk1= 0; xk1 < to_be_supported_count; xk1++)
+ {
+ if(xk1 >= MAX_EVENTS_IN_READ) break;
+ if(0 && strcmp( explain_context -> read_name, "ERR161544.68584")==0)
+ SUBREADprintf("%s RELATED_EVENT= EVENT_NO_%d\n", explain_context -> read_name , to_be_supported[xk1] -> global_event_id);
+ if(to_be_supported [xk1] -> event_type !=CHRO_EVENT_TYPE_INDEL && is_junction_read){
+ if(to_be_supported [xk1] -> event_type == CHRO_EVENT_TYPE_JUNCTION && to_be_supported [xk1] -> is_donor_found && is_RNA_from_positive == -1)
+ is_RNA_from_positive = !(to_be_supported [xk1] -> is_negative_strand);
+ }
+ realign_res -> supporting_chromosome_events[xk1] = to_be_supported[xk1];
+ realign_res -> flanking_size_left[xk1] = flanking_size_left[xk1];
+ realign_res -> flanking_size_right[xk1] = flanking_size_right[xk1];
+ realign_res -> crirical_support[xk1] += (read_id == to_be_supported [xk1] -> critical_read_id);
+ //if(flanking_size_left[xk1]>=16 && flanking_size_right[xk1]>=16) realign_res -> crirical_support[xk1]++;
+ //SUBREADprintf("CRITICAL=%llu, THIS=%llu\n", read_id, to_be_supported [xk1] -> critical_read_id);
+ //if(read_id == to_be_supported [xk1] -> critical_read_id) realign_res -> crirical_support[] = // to_be_supported [xk1] -> critical_supporting_reads ++;
+ }
+ if(to_be_supported_count < MAX_EVENTS_IN_READ )
+ realign_res -> supporting_chromosome_events[to_be_supported_count] = NULL;
+
+ result -> result_flags |= CORE_IS_FULLY_EXPLAINED;
+ result -> read_length = explain_context->full_read_len;
- result -> result_flags |= CORE_IS_FULLY_EXPLAINED;
+ //if(explain_context -> pair_number < 20)
+ // SUBREADprintf("RESULT %d at %p : FLAGS=%d\n", explain_context -> pair_number, result, result -> result_flags);
+ if(is_RNA_from_positive == -1)
+ {
+ realign_res -> realign_flags |= CORE_NOTFOUND_DONORS ;
+ realign_res -> realign_flags &= ~(CORE_IS_GT_AG_DONORS);
+ }
+ else
+ {
+ realign_res -> realign_flags &= ~ (CORE_NOTFOUND_DONORS | CORE_IS_GT_AG_DONORS);
- if(is_RNA_from_positive == -1)
- {
- result -> result_flags |= CORE_NOTFOUND_DONORS ;
- result -> result_flags &= ~(CORE_IS_GT_AG_DONORS);
- }
- else
- {
- result -> result_flags &= ~ (CORE_NOTFOUND_DONORS | CORE_IS_GT_AG_DONORS);
+ if(is_RNA_from_positive)
+ realign_res -> realign_flags |= CORE_IS_GT_AG_DONORS;
+ }
- if(is_RNA_from_positive)
- result -> result_flags |= CORE_IS_GT_AG_DONORS;
- }
+ isCigarOK=1;
+ }
- isCigarOK=1;
- }
- //else printf("CIGAR COMPRESSION ERROR : %s\n", tmp_cigar);
- }
+ //final_MATCH = non_clipped_length - mismatch_bases;
+ //if(final_MATCH > 0);
+ //else printf("CIGAR COMPRESSION ERROR : %s by %s\n", tmp_cigar, explain_context -> read_name);
- if(!isCigarOK)
- {
- result -> final_quality = final_qual;
- result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
- result -> Score_H &= 0x7fffffffffffffffllu;
- if(explain_context-> best_read_id)
- {
- alignment_result_t * result_prime = _global_retrieve_alignment_ptr(global_context, explain_context->pair_number, 0, 0);
- result_prime -> Score_H &= 0x7fffffffffffffffllu;
- result_prime = _global_retrieve_alignment_ptr(global_context, explain_context->pair_number, 1, 0);
- result_prime -> Score_H &= 0x7fffffffffffffffllu;
+ realign_res -> first_base_position = final_position;
+ realign_res -> final_quality = final_qual;
+ realign_res -> final_mismatched_bases = mismatch_bases;
+ realign_res -> final_matched_bases = (unsigned short)final_MATCH;
+ realign_res -> best_second_diff_bases = (9<explain_context -> best_second_match_diff)?-1:explain_context -> best_second_match_diff;
+
+ }
}
}
- result -> selected_position = final_position;
- result -> final_quality = final_qual;
- result -> final_mismatched_bases = mismatch_bases;
- result -> best_second_diff_bases = (9<explain_context -> best_second_match_diff)?-1:explain_context -> best_second_match_diff;
-
- return 0;
+ //SUBREADprintf("L2MM = %d\n", final_MATCH);
+ //return final_MATCH * 10000 - total_indel_length;
+ return final_alignment_number;
}
@@ -2061,20 +2600,16 @@ int paired_chars_full_core(char * ch1, char * ch2, int is_reverse)
int paired_chars_part_core(char * ch1, char * ch2, int is_reverse)
{
- if (c2eq(ch1, ch2, "GT", "AG") || c2eq(ch1, ch2, "CT", "AC"))
- {
- if (is_reverse)
- {
+ if (c2eq(ch1, ch2, "GT", "AG") || c2eq(ch1, ch2, "CT", "AC")) {
+ if (is_reverse){
if (ceq(ch1, "AG") || ceq(ch1, "AC")) return 1;
- }else
+ } else {
if (ceq(ch1, "CT") || ceq(ch1, "GT")) return 1;
+ }
}
return 0;
}
-#define paired_chars paired_chars_part_core
-
-
#define is_donor_chars_full(cc) (((cc)[0]=='G' && (cc)[1]=='T') || \
((cc)[0]=='A' && (cc)[1]=='G') || \
((cc)[0]=='A' && (cc)[1]=='C') || \
@@ -2089,13 +2624,33 @@ int paired_chars_part_core(char * ch1, char * ch2, int is_reverse)
((cc)[0]=='A' && (cc)[1]=='C') || \
((cc)[0]=='C' && (cc)[1]=='T'))
+//#warning "=============== NO DONOR-RECEPTOR NEEDED =============="
+//#define is_donor_chars(x) 1
+//#define paired_chars(x,y,z) 1
+
#define is_donor_chars is_donor_chars_part
+#define paired_chars paired_chars_part_core
+
+void print_big_margin(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read){
+ unsigned short * big_margin_record = _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read);
+ int x1;
-int is_ambiguous_voting(global_context_t * global_context, int pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative)
+ SUBREADprintf("\n >>> READ_NO=%llu, SECOND=%d, MEM=%p <<< \n", pair_number, is_second_read, big_margin_record);
+ for(x1 = 0; x1 < global_context->config.big_margin_record_size/3 ; x1++)
+ {
+ SUBREADprintf("%d %d~%d ", big_margin_record[x1*3] , big_margin_record[x1*3+1] , big_margin_record[x1*3+2]);
+ }
+ SUBREADputs("");
+}
+
+#define ABGIGUOUS_TOLERANCE 3
+
+int is_ambiguous_voting(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int selected_vote, int max_start,int max_end, int read_len, int is_negative)
{
+ if( global_context->config.big_margin_record_size<3) return 0;
int xk1;
int encounter = 0;
@@ -2106,23 +2661,34 @@ int is_ambiguous_voting(global_context_t * global_context, int pair_number, int
max_end = read_len - tmp;
}
- if(read_len > 255)
- {
- max_start = max_start>>2;
- max_end = max_end>>2;
- }
-
- unsigned char * big_margin_record = _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read);
+ unsigned short * big_margin_record = _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read);
for(xk1 = 0; xk1 < global_context->config.big_margin_record_size/3 ; xk1++)
{
if(!big_margin_record[xk1*3])break;
- if((big_margin_record[xk1*3]) >= max_vote -1) // actually, max-1
- if(big_margin_record[xk1*3+1] >= max_start - 2 && big_margin_record[xk1*3+2] <= max_end + 1)
- encounter++;
+ if(big_margin_record[xk1*3] >= selected_vote - 1) // actually, max-1
+ {
+ if(0) {
+ if ( max_start >= big_margin_record[xk1*3+1] - ABGIGUOUS_TOLERANCE && max_end <= big_margin_record[xk1*3+2] + ABGIGUOUS_TOLERANCE )
+ encounter++;
+ else if ( big_margin_record[xk1*3+1] >= max_start - ABGIGUOUS_TOLERANCE && big_margin_record[xk1*3+2] <= max_end + ABGIGUOUS_TOLERANCE )
+ encounter++;
+
+ } else {
+ // 4 and 4 are the best setting for indel and fusion simulation.
+ if(selected_vote >= big_margin_record[xk1*3]) {
+ if(big_margin_record[xk1*3+1] >= max_start - 4 && big_margin_record[xk1*3+2] <= max_end + 4)
+ encounter++;
+ } else {
+ if(big_margin_record[xk1*3+1] <= max_start + 4 && big_margin_record[xk1*3+2] >= max_end - 4)
+ encounter++;
+ }
+ }
+ }
}
+
if(encounter>1) return encounter;
return 0;
}
@@ -2134,7 +2700,7 @@ int is_ambiguous_voting(global_context_t * global_context, int pair_number, int
// Note that the read_text is on reversed mode. The guess points are on reversed mode too.
// "Left" and "Right" means the left/right half in the "reversed" read.
-int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int normally_arranged, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found)
+int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int small_virtualHead_abs_offset, unsigned int large_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_small_half_negative, int is_large_half_negative, int small_half_on_left_reversed, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * small_side_increasing_coordinate, int * large_side_increasing_coordinate)
{
gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
// guess_end is the index of the first UNWANTED BASE.
@@ -2152,8 +2718,13 @@ int donor_jumped_score(global_context_t * global_context, thread_context_t * thr
strcpy(positive_read, read_text) ;
reverse_read(positive_read, read_len, global_context->config.space_type);
- //printf("TEST_JUMPED: %u - %u\n", left_virtualHead_abs_offset, right_virtualHead_abs_offset);
+ //printf("TEST_JUMPED: %u - %u\n", small_virtualHead_abs_offset, large_virtualHead_abs_offset);
+
+ (*small_side_increasing_coordinate) = (small_half_on_left_reversed != is_small_half_negative);
+ (*large_side_increasing_coordinate) = (small_half_on_left_reversed == is_large_half_negative);
+
+
for(real_split_point_i = 0 ; real_split_point_i < real_split_point_numbers; real_split_point_i++)
{
int left_should_match, right_should_match;
@@ -2166,33 +2737,33 @@ int donor_jumped_score(global_context_t * global_context, thread_context_t * thr
int is_donor_test_ok=0;
- if(normally_arranged)
+ if(small_half_on_left_reversed)
{
- unsigned int small_pos_test_begin = left_virtualHead_abs_offset + (is_left_half_negative?real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW:(read_len - real_split_point_as_reversed));
- char * small_pos_read_begin = (is_left_half_negative?read_text:positive_read) + (is_left_half_negative?
+ unsigned int small_pos_test_begin = small_virtualHead_abs_offset + (is_small_half_negative?real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW:(read_len - real_split_point_as_reversed));
+ char * small_pos_read_begin = (is_small_half_negative?read_text:positive_read) + (is_small_half_negative?
(real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW) :
(read_len - real_split_point_as_reversed)
);
- unsigned int large_pos_test_begin = right_virtualHead_abs_offset + (is_right_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
- char * large_pos_read_begin = (is_right_half_negative?read_text:positive_read) + (is_right_half_negative?
+ unsigned int large_pos_test_begin = large_virtualHead_abs_offset + (is_large_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
+ char * large_pos_read_begin = (is_large_half_negative?read_text:positive_read) + (is_large_half_negative?
(real_split_point_as_reversed) :
(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
left_should_match = match_chro(small_pos_read_begin , value_index , small_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
right_should_match = match_chro(large_pos_read_begin , value_index , large_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
left_should_not_match = right_should_not_match = 0;
- //match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + real_split_point - JUNCTION_CONFIRM_WINDOW , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
+ //match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, small_virtualHead_abs_offset + real_split_point - JUNCTION_CONFIRM_WINDOW , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
}
else
{
- unsigned int small_pos_test_begin = left_virtualHead_abs_offset + (is_left_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
- char * small_pos_read_begin = (is_left_half_negative?read_text:positive_read) + (is_left_half_negative?
+ unsigned int small_pos_test_begin = small_virtualHead_abs_offset + (is_small_half_negative?real_split_point_as_reversed:(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
+ char * small_pos_read_begin = (is_small_half_negative?read_text:positive_read) + (is_small_half_negative?
(real_split_point_as_reversed):(read_len - real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW));
- unsigned int large_pos_test_begin = right_virtualHead_abs_offset + (is_right_half_negative?(real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW):(read_len - real_split_point_as_reversed));
- char * large_pos_read_begin = (is_right_half_negative?read_text:positive_read) + (is_right_half_negative?
+ unsigned int large_pos_test_begin = large_virtualHead_abs_offset + (is_large_half_negative?(real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW):(read_len - real_split_point_as_reversed));
+ char * large_pos_read_begin = (is_large_half_negative?read_text:positive_read) + (is_large_half_negative?
(real_split_point_as_reversed - JUNCTION_CONFIRM_WINDOW):(read_len - real_split_point_as_reversed));
left_should_match = match_chro(small_pos_read_begin , value_index , small_pos_test_begin , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
@@ -2201,7 +2772,9 @@ int donor_jumped_score(global_context_t * global_context, thread_context_t * thr
}
- int mismatch_in_between_allowd = (global_context -> config.more_accurate_fusions)?0:1;
+ //#warning "============ REMOVE THE TWO '+ 1' FROM THE NEXT LINE ================="
+ //#warning "============ ADD THE TWO '+ 1's IN THE BLANKETS FOR SVs GRANT APP ================="
+ int mismatch_in_between_allowd = (global_context -> config.more_accurate_fusions)?(0):(1);
if(left_should_match + right_should_match >= JUNCTION_CONFIRM_WINDOW*2 - mismatch_in_between_allowd &&
left_should_not_match <= JUNCTION_CONFIRM_WINDOW -3 && right_should_not_match <= JUNCTION_CONFIRM_WINDOW -3)
{
@@ -2226,12 +2799,15 @@ int donor_jumped_score(global_context_t * global_context, thread_context_t * thr
}
-int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indel_offset, int right_indel_offset, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * final_inserted_bases)
+int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indel_offset, int right_indel_offset, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * final_inserted_bases, int * small_side_increasing_coordinate, int * large_side_increasing_coordinate, char * r [...]
{
gene_value_index_t * value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
- int need_donor_test = global_context->config.is_rna_seq_reads && global_context -> config.check_donor_at_junctions && (! global_context->config.do_fusion_detection);
+ int need_donor_test = global_context->config.do_breakpoint_detection && global_context -> config.check_donor_at_junctions && (! global_context->config.do_fusion_detection);
+
+ (*small_side_increasing_coordinate)=!normally_arranged;
+ (*large_side_increasing_coordinate)= normally_arranged;
// guess_end is the index of the first UNWANTED BASE.
int most_likely_point = (guess_start+guess_end)/2;
@@ -2242,11 +2818,13 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
int best_score = -111111;
+ int non_insertion_preferred = 0;
int real_split_point_i;
int real_split_point_numbers = guess_end - guess_start;
- //printf("TESTDON: LR=%d; RR=%d\n", left_indel_offset, right_indel_offset);
+ if(0 && FIXLENstrcmp("R006633992", read_name) == 0)
+ SUBREADprintf("TESTDON: LR=%d; RR=%d\n", left_indel_offset, right_indel_offset);
for(real_split_point_i = 0 ; real_split_point_i < real_split_point_numbers; real_split_point_i++)
{
@@ -2282,9 +2860,17 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
}
// donor_left[2]=0; donor_right[2]=0;
- //printf("TESTDON: %s %s; OFFSET=%d; DON_OK=%d; NORMAL=%d; LEFT_OFF=%d; RIGHT_OFF=%d\n", donor_left, donor_right, real_split_point_i, is_donor_test_ok, normally_arranged, left_indel_offset, right_indel_offset);
- int mismatch_in_between_allowd = (global_context -> config.more_accurate_fusions)?0:1;
+ if(0 && FIXLENstrcmp("R002403247", read_name) == 0)
+ {
+ donor_left[2]=0;
+ donor_right[2]=0;
+ SUBREADprintf("TESTDON: %s %s; OFFSET=%d; DON_OK=%d; NORMAL=%d; LEFT_OFF=%d; RIGHT_OFF=%d\n", donor_left, donor_right, real_split_point_i, is_donor_test_ok, normally_arranged, left_indel_offset, right_indel_offset);
+ }
+
+ //#warning "============ REMOVE THE TWO '+ 1' FROM THE NEXT LINE ================="
+ //#warning "============ ADD TWO '+ 1' IN THE BLANKETS FOR SVs GRANT APP ================="
+ int mismatch_in_between_allowd = (global_context -> config.more_accurate_fusions)?(0) : (1);
if(is_donor_test_ok || !need_donor_test)
{
if(normally_arranged)
@@ -2293,13 +2879,13 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
left_should_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + real_split_point - JUNCTION_CONFIRM_WINDOW + left_indel_offset , JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
//printf("INS=%d; LM=%d\t\tLOL=%u, LOR=%u, SP=%d\n", inserted_bases, left_should_match, left_virtualHead_abs_offset, right_virtualHead_abs_offset, real_split_point);
- if(left_should_match > JUNCTION_CONFIRM_WINDOW-2)
+ if(left_should_match > JUNCTION_CONFIRM_WINDOW- (global_context->config.max_insertion_at_junctions?5:2))
{
for(inserted_bases = 0; inserted_bases <= global_context->config.max_insertion_at_junctions; inserted_bases++)
{
right_should_match = match_chro(read_text + real_split_point + inserted_bases, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset + inserted_bases, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
- // printf("INS=%d; LM=%d; RM=%d\t\tLOL=%u, LOR=%u, SP=%d\n", inserted_bases, left_should_match, right_should_match, left_virtualHead_abs_offset, right_virtualHead_abs_offset, real_split_point);
+ //printf("INS=%d; LM=%d; RM=%d\t\tLOL=%u, LOR=%u, SP=%d\n", inserted_bases, left_should_match, right_should_match, left_virtualHead_abs_offset, right_virtualHead_abs_offset, real_split_point);
if(right_should_match >= 2*JUNCTION_CONFIRM_WINDOW - left_should_match - mismatch_in_between_allowd)
{
left_should_not_match = match_chro(read_text + real_split_point + inserted_bases, value_index, left_virtualHead_abs_offset + real_split_point + left_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
@@ -2316,8 +2902,8 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
if(test_score > best_score)
{
- //if(left_virtualHead_abs_offset >2663426025 && left_virtualHead_abs_offset< 2663436025 && inserted_bases)
- // printf("INS=%d; BSS=%d; TSC=%d\n%s\n\n", inserted_bases , best_score, test_score, read_text);
+ //if(left_virtualHead_abs_offset > 2729745284 - 200 && left_virtualHead_abs_offset< 2729745284 + 200)
+ // SUBREADprintf("INS=%d; BSS=%d; TSC=%d\n%s\n\n", inserted_bases , best_score, test_score, read_text);
selected_junction_strand = (donor_left[0]=='G' || donor_right[1]=='G');
selected_inserted_bases = inserted_bases;
selected_real_split_point = real_split_point;
@@ -2326,6 +2912,8 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
}
}
+ if(global_context->config.max_insertion_at_junctions && 0 == inserted_bases && right_should_match >= 2*JUNCTION_CONFIRM_WINDOW - left_should_match - 5)
+ non_insertion_preferred = 1;
}
}
@@ -2338,6 +2926,7 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
right_should_not_match = match_chro(read_text + real_split_point, value_index, right_virtualHead_abs_offset + real_split_point + right_indel_offset, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
left_should_not_match = match_chro(read_text + real_split_point - JUNCTION_CONFIRM_WINDOW, value_index, left_virtualHead_abs_offset + left_indel_offset + real_split_point - JUNCTION_CONFIRM_WINDOW, JUNCTION_CONFIRM_WINDOW , 0, global_context -> config.space_type);
+ //printf("LEFT:MA=%d UMA=%d RIGHT:MA=%d UMA=%d\n", left_should_match, left_should_not_match, right_should_match, right_should_not_match);
if(left_should_match +right_should_match >= 2*JUNCTION_CONFIRM_WINDOW - mismatch_in_between_allowd &&
left_should_not_match <= JUNCTION_CONFIRM_WINDOW -5 && right_should_not_match <= JUNCTION_CONFIRM_WINDOW -5)
@@ -2356,25 +2945,65 @@ int donor_score(global_context_t * global_context, thread_context_t * thread_con
}
}
}
- if(best_score>0)
+ if(best_score>0 && (0==non_insertion_preferred || 0==selected_inserted_bases))
{
*final_split_point = selected_real_split_point;
*is_donor_found = best_score>=290000;
*is_GT_AG_strand = selected_junction_strand;
*final_inserted_bases = selected_inserted_bases;
+
+ if(0 && FIXLENstrcmp("R000000029", read_name)==0)
+ SUBREADprintf("FINAL_INS_LEN=%d; BEST_SCORE=%d %s\n", selected_inserted_bases, best_score, read_name);
return (1+best_score)/100;
}
return 0;
}
+#define NEW_EXTEND_SCAN_INTRON_LONGEST 5000
+#define NEW_EXTEND_SCAN_EXON_SHORTEST 12
+
+typedef struct {
+ unsigned int small_exon_last_base;
+ unsigned int large_exon_first_base;
+ int canonical_donor_receptor_found;
+} newcore_extend_result_t;
+
+void newcore_extend_search_go(global_context_t * global_context, thread_context_t * thread_context, char * read_name, char * read_text, int search_to_tail, int candidate_last_base_in_exon_in_read, int candidate_last_base_in_exon_on_chro, newcore_extend_result_t * results, int * found_events) {
+
+}
+
+void newcore_extend_new_junctions( global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id, mapping_result_t * result, subjunc_result_t * subjunc_result){
+ int scan_to_tail;
+ void * results;
+ for(scan_to_tail = 0; scan_to_tail < 2 ; scan_to_tail++) {
+ // (1) test if this read's worth scan to head and/or to tail
+ int unexplained_head ;
+ if(scan_to_tail) unexplained_head = read_len - result -> confident_coverage_end;
+ else unexplained_head = result -> confident_coverage_start;
+
+ if(unexplained_head < NEW_EXTEND_SCAN_EXON_SHORTEST) continue;
+
+ // (2) scan to head or to tail
+
+ unexplained_head += (scan_to_tail?-3:3);
+ int candidate_last_base_in_exon_in_read = unexplained_head, found_events = 0;
+ unsigned int candidate_last_base_in_exon_on_chro = result -> selected_position + unexplained_head;
+
+ newcore_extend_search_go(global_context, thread_context, read_name, read_text, scan_to_tail, candidate_last_base_in_exon_in_read, candidate_last_base_in_exon_on_chro, results, &found_events);
+ }
+}
+
-void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id)
+void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id)
{
- alignment_result_t * result =_global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
+ mapping_result_t * result =_global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id);
subjunc_result_t * subjunc_result =_global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id);
+ if(0)
+ newcore_extend_new_junctions(global_context, thread_context, pair_number, read_name, read_text, qual_text, read_len, is_second_read, best_read_id, result, subjunc_result);
+
if(read_len > EXON_LONG_READ_LENGTH)
{
assert(result -> selected_position <= 0xffff0000);
@@ -2383,23 +3012,54 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
int selected_real_split_point = subjunc_result->split_point;
- //if(pair_number == 27842025)
- // printf("L1 MAIN_POS=%u; MINOR_POS=%u ; LEN=%d ; SPL=%d\nMNVT=%d ; RSSV=%d\n", result -> selected_position, subjunc_result -> minor_position, read_len, selected_real_split_point, subjunc_result -> minor_votes , result -> selected_votes );
+ //#warning " =============== remove "+ 2" FROM THE NEXT LINE (FOR A HIGHER ACCURACY FROM SubFusion on 19 JAN 2015) =================="
+ if(global_context -> config.do_fusion_detection && subjunc_result -> minor_votes < 1)return;
+ if((!global_context -> config.do_fusion_detection) && subjunc_result -> minor_votes < 1)return;
- if(subjunc_result -> minor_votes < 1)return;
- if(result -> selected_votes < global_context->config.minimum_subread_for_first_read)return;
+ //if(result -> selected_votes < global_context->config.minimum_subread_for_first_read)return;
if(global_context->config.do_big_margin_filtering_for_junctions)
{
+
+
+ if(0 && FIXLENstrcmp("R000000052", read_name) == 0 )
+ {
+ char posout[100];
+ int xk1;
+ absoffset_to_posstr(global_context, result -> selected_position, posout);
+
+
+ unsigned short * big_margin_record = _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read);
+ for(xk1 = 0; xk1 < global_context->config.big_margin_record_size ; xk1+=3)
+ {
+ SUBREADprintf("[%d] %d:%d:%d\t", xk1, big_margin_record[xk1], big_margin_record[xk1+1], big_margin_record[xk1+2]);
+ }
+
+ SUBREADprintf("\nSIZE=%d, [%s] ENCOUNTER=%d at %s (PROBE: v=%d coverage=%d - %d)\n", global_context->config.big_margin_record_size, read_name, is_ambiguous_voting(global_context, pair_number, is_second_read, result->selected_votes, result -> confident_coverage_start, result -> confident_coverage_end, read_len, (result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0), posout, result->selected_votes, result -> confident_coverage_start, result -> confident_coverage_end);
+
+ SUBREADprintf("NEWJUNC: %s , L1 MAIN_POS=%u; MINOR_POS=%u ; LEN=%d ; SPL=%d\nMNVT=%d ; RSSV=%d\n", read_name, result -> selected_position, subjunc_result -> minor_position, read_len, selected_real_split_point, subjunc_result -> minor_votes , result -> selected_votes );
+ }
+
+
+ //print_big_margin(global_context, pair_number, is_second_read);
if(is_ambiguous_voting(global_context, pair_number, is_second_read, result->selected_votes, result -> confident_coverage_start, result -> confident_coverage_end, read_len, (result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0))return;
}
- //if(pair_number == 27842025)
- //{
- // printf("SPLIT=%d\n", subjunc_result->split_point);
- //}
+ if(0){
+ #define TEST_SUBJUNC_POS0 100182270
+ if((result -> selected_position > TEST_SUBJUNC_POS0 - 100 && result -> selected_position < TEST_SUBJUNC_POS0 + 100)||
+ (subjunc_result -> minor_position > TEST_SUBJUNC_POS0 - 100 && subjunc_result -> minor_position < TEST_SUBJUNC_POS0 + 100))
+ //if(FIXLENstrcmp("V0112_0155:7:1101:14157:2012", read_name)==0)
+ SUBREADprintf("NEWJUNC: %s , L1 MAIN_POS=%u; MINOR_POS=%u ; LEN=%d ; SPL=%d\nMNVT=%d ; RSSV=%d\n", read_name, result -> selected_position, subjunc_result -> minor_position, read_len, selected_real_split_point, subjunc_result -> minor_votes , result -> selected_votes );
+ }
+
+
+
+
+ //if(strcmp(read_name, "dd1")==0)
+ // SUBREADprintf("SPLIT=%d\n", subjunc_result->split_point);
- //printf("SPLIT=%d\n", subjunc_result->split_point);
+ //SUBREADprintf("L1 SPLIT=%d\n", subjunc_result->split_point);
unsigned int left_virtualHead_abs_offset = min(result -> selected_position, subjunc_result -> minor_position);
unsigned int right_virtualHead_abs_offset = max(result -> selected_position, subjunc_result -> minor_position);
@@ -2408,43 +3068,62 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
int is_donor_found = is_GT_AG_donors<3;
int is_strand_jumped = (result->result_flags & CORE_IS_STRAND_JUMPED)?1:0;
- if(1 && (!is_donor_found) && (selected_real_split_point < read_len * 0.2 || selected_real_split_point >= read_len *0.8000) )
- return;
-
if(selected_real_split_point>0)
{
unsigned int left_edge_wanted, right_edge_wanted;
if(is_strand_jumped)
{
+ if(0){
- // recover the "negative view" splicing point location
- int S = (result->result_flags & CORE_IS_NEGATIVE_STRAND) ? selected_real_split_point : (read_len - selected_real_split_point);
- int Sbar = read_len - S;
+ // note that splicing point and the coverage coordinates are "major negative" view.
+ // recover the "negative view" splicing point location
+ int S = (result->result_flags & CORE_IS_NEGATIVE_STRAND) ? selected_real_split_point : (read_len - selected_real_split_point);
+ int Sbar = read_len - S;
- int is_abnormal_as_reversed = (subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) == 1;
- if(!(result->result_flags & CORE_IS_NEGATIVE_STRAND)) is_abnormal_as_reversed = !is_abnormal_as_reversed;
- int is_small_half_negative = ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) + (subjunc_result->minor_position < result->selected_position) ==1;
+ int is_abnormal_as_reversed = (subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) == 1;
+ if(!(result->result_flags & CORE_IS_NEGATIVE_STRAND)) is_abnormal_as_reversed = !is_abnormal_as_reversed;
+ int is_small_half_negative = ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) + (subjunc_result->minor_position < result->selected_position) ==1;
- if(is_abnormal_as_reversed && is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + S;
- right_edge_wanted = right_virtualHead_abs_offset + Sbar;
- }
- else if(is_abnormal_as_reversed && !is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + Sbar - 1;
- right_edge_wanted = right_virtualHead_abs_offset + S - 1;
- }
- else if(!is_abnormal_as_reversed && is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + S - 1;
- right_edge_wanted = right_virtualHead_abs_offset + Sbar - 1;
- }
- else // if(!is_abnormal_as_reversed && !is_small_half_negative)
- {
- left_edge_wanted = left_virtualHead_abs_offset + Sbar;
- right_edge_wanted = right_virtualHead_abs_offset + S;
+ if(is_abnormal_as_reversed && is_small_half_negative)
+ {
+ left_edge_wanted = left_virtualHead_abs_offset + S;
+ right_edge_wanted = right_virtualHead_abs_offset + Sbar;
+ }
+ else if(is_abnormal_as_reversed && !is_small_half_negative)
+ {
+ left_edge_wanted = left_virtualHead_abs_offset + Sbar - 1;
+ right_edge_wanted = right_virtualHead_abs_offset + S - 1;
+ }
+ else if(!is_abnormal_as_reversed && is_small_half_negative)
+ {
+ left_edge_wanted = left_virtualHead_abs_offset + S - 1;
+ right_edge_wanted = right_virtualHead_abs_offset + Sbar - 1;
+ }
+ else // if(!is_abnormal_as_reversed && !is_small_half_negative)
+ {
+ left_edge_wanted = left_virtualHead_abs_offset + Sbar;
+ right_edge_wanted = right_virtualHead_abs_offset + S;
+ }
+
+ if(left_edge_wanted >= right_edge_wanted){
+ SUBREADprintf("REVERSED NEW JUNC: %u ~ %u : ABN_REV=%d , SMALL_NEG=%d, LEFT_VH=%u, RIGHT_VH=%u, S/~S=%d/%d\n", left_edge_wanted, right_edge_wanted, is_abnormal_as_reversed, is_small_half_negative, left_virtualHead_abs_offset, right_virtualHead_abs_offset, S, Sbar);
+ }
+
+ }else{
+ unsigned int major_half_smallest_coordinate, minor_half_smallest_coordinate;
+ major_half_smallest_coordinate = result -> selected_position + selected_real_split_point;
+ minor_half_smallest_coordinate = subjunc_result->minor_position + read_len - selected_real_split_point;
+ left_edge_wanted = min(major_half_smallest_coordinate, minor_half_smallest_coordinate);
+ right_edge_wanted = max(major_half_smallest_coordinate, minor_half_smallest_coordinate);
+ int is_abnormal_as_reversed = (subjunc_result->minor_coverage_start > result->confident_coverage_start) + (minor_half_smallest_coordinate > major_half_smallest_coordinate) == 1;
+ int is_small_half_negative = ((result->result_flags & CORE_IS_NEGATIVE_STRAND)?1:0) + (minor_half_smallest_coordinate < major_half_smallest_coordinate) ==1;
+ if(!(result->result_flags & CORE_IS_NEGATIVE_STRAND)) is_abnormal_as_reversed = !is_abnormal_as_reversed;
+ if(is_small_half_negative != is_abnormal_as_reversed)
+ {
+ left_edge_wanted -=1;
+ right_edge_wanted -=1;
+ }
}
}
else
@@ -2490,10 +3169,19 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
}
// note that selected_real_split_point is the first UNWANTED base after left half.
+
+ //if(abs(left_edge_wanted-27286396) < 250 || abs(right_edge_wanted - 27286396)<250)
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:19612:13380", read_name) == 0)
+ {
+ char leftpos[100], rightpos[100];
+ absoffset_to_posstr(global_context, left_edge_wanted, leftpos);
+ absoffset_to_posstr(global_context, right_edge_wanted, rightpos);
+ SUBREADprintf("READ=%s, LEFT=%s, RIGHT=%s\n", read_name, leftpos, rightpos);
+ }
chromosome_event_t * found = NULL;
chromosome_event_t * search_return [MAX_EVENT_ENTRIES_PER_SITE];
- int found_events = search_event(global_context, event_table, event_space, left_edge_wanted , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_JUNCTION|CHRO_EVENT_TYPE_FUSION, search_return);
+ int found_events = search_event(global_context, event_table, event_space, left_edge_wanted , EVENT_SEARCH_BY_SMALL_SIDE, CHRO_EVENT_TYPE_INDEL | CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION, search_return);
mark_gapped_read(result);
if(found_events)
@@ -2509,6 +3197,15 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
}
}
+ //if( 1018082 == pair_number)
+ // SUBREADprintf("NEW_CHIMERISM_HERE [%u:%d: R_%d] : %s , %s , %u , %u, %c ; INC=%d %d\n", pair_number, best_read_id, is_second_read+1, chro_name_left, chro_name_right, chro_pos_left, chro_pos_right, is_strand_jumped?'X':'=', subjunc_result -> small_side_increasing_coordinate, subjunc_result -> large_side_increasing_coordinate);
+
+ //if(
+ // (74814303 + 52 - 8 <= left_edge_wanted && 74814303 + 52 + 8 >= left_edge_wanted) ||
+ // (74814303 + 52 - 8 <= right_edge_wanted && 74814303 + 52 + 8 >= right_edge_wanted)
+ //)
+ // SUBREADprintf("PAIR NO = %09u, FOUND = %p , %s:%u , %s:%u, INCs= %d, %d, JUMP=%d\n", pair_number, found, chro_name_left, chro_pos_left, chro_name_right, chro_pos_right, subjunc_result -> small_side_increasing_coordinate, subjunc_result -> large_side_increasing_coordinate, is_strand_jumped);
+
if(found) found -> supporting_reads ++;
else
{
@@ -2527,8 +3224,9 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
memset(new_event,0,sizeof(chromosome_event_t));
new_event -> event_small_side = left_edge_wanted;
new_event -> event_large_side = right_edge_wanted + subjunc_result->indel_at_junction;
+ new_event -> critical_read_id = 2llu * pair_number + is_second_read;
- int new_event_type = CHRO_EVENT_TYPE_JUNCTION;
+ int new_event_type = (global_context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && global_context -> config.do_fusion_detection && !global_context -> config.prefer_donor_receptor_junctions)?CHRO_EVENT_TYPE_FUSION:CHRO_EVENT_TYPE_JUNCTION;
if(is_strand_jumped) new_event_type = CHRO_EVENT_TYPE_FUSION;
if((subjunc_result->minor_coverage_start > result->confident_coverage_start) + (subjunc_result -> minor_position > result -> selected_position) ==1)
@@ -2539,6 +3237,53 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
if(!global_context -> config.do_fusion_detection)
new_event_type = CHRO_EVENT_TYPE_REMOVED;
+
+ if(1)
+ {
+ unsigned int dist = new_event -> event_large_side - new_event -> event_small_side;
+ int origin_type = new_event_type;
+ int fusion_cover_len = -1;
+
+ if(dist > MAX_INSERTION_LENGTH && new_event_type == CHRO_EVENT_TYPE_FUSION)
+ {
+ int cov_end, cover_start, major_cov;
+ cov_end = max(subjunc_result->minor_coverage_end, result->confident_coverage_end );
+ cover_start = min(subjunc_result->minor_coverage_start, result->confident_coverage_start);
+
+ major_cov = result->confident_coverage_end - result->confident_coverage_start;
+
+ fusion_cover_len = cov_end - cover_start ;
+
+ if(fusion_cover_len < read_len - 15 || major_cov > read_len - 15)
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+ }
+
+ if(dist > MAX_INSERTION_LENGTH && new_event_type == CHRO_EVENT_TYPE_FUSION && subjunc_result -> minor_votes < 2)
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+ else if(new_event_type == CHRO_EVENT_TYPE_FUSION && subjunc_result -> minor_votes < 1)
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+
+
+ if(dist > MAX_INSERTION_LENGTH && new_event_type == CHRO_EVENT_TYPE_FUSION && result -> selected_votes < 2)
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+ else if(new_event_type == CHRO_EVENT_TYPE_FUSION && result -> selected_votes < 1)
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+
+ if(0 && origin_type == CHRO_EVENT_TYPE_FUSION)
+ {
+ char leftpos[100], rightpos[100];
+ absoffset_to_posstr(global_context, new_event -> event_small_side, leftpos);
+ absoffset_to_posstr(global_context, new_event -> event_large_side, rightpos);
+
+ if(new_event_type == CHRO_EVENT_TYPE_REMOVED)
+ SUBREADprintf("NEW_FUSION REMOVED %s SUGGEST %s ~ %s MAJOR COV=%d ~ %d, MINOR COV=%d ~ %d, RLEN=%d, COVED=%d, VOTES=%d, %d, %s, SPLIT=%d\n", read_name, leftpos, rightpos, result->confident_coverage_start, result->confident_coverage_end, subjunc_result->minor_coverage_start, subjunc_result->minor_coverage_end, read_len, fusion_cover_len, result -> selected_votes, subjunc_result -> minor_votes, is_strand_jumped?"JUMPED":"======", selected_real_split_point);
+ else
+ SUBREADprintf("NEW_FUSION WANTED %s SUGGEST %s ~ %s MAJOR COV=%d ~ %d, MINOR COV=%d ~ %d, RLEN=%d, COVED=%d, VOTES=%d, %d, %s, SPLIT=%d\n", read_name, leftpos, rightpos, result->confident_coverage_start, result->confident_coverage_end, subjunc_result->minor_coverage_start, subjunc_result->minor_coverage_end, read_len, fusion_cover_len, result -> selected_votes, subjunc_result -> minor_votes, is_strand_jumped?"JUMPED":"======", selected_real_split_point);
+ }
+
+ if(dist > MAX_INSERTION_LENGTH && new_event_type == CHRO_EVENT_TYPE_FUSION && (selected_real_split_point < read_len * 0.2 || selected_real_split_point >= read_len *0.8000) )
+ new_event_type = CHRO_EVENT_TYPE_REMOVED;
+ }
//if(pair_number == 13)
//printf("MMMMX %d %u -- %u : TYPE %d\n" , event_no, left_edge_wanted, right_edge_wanted, new_event_type);
@@ -2555,10 +3300,14 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
new_event -> indel_length = 0;
new_event -> indel_at_junction = subjunc_result->indel_at_junction;
new_event -> is_donor_found = is_donor_found;
+
+ new_event -> small_side_increasing_coordinate = subjunc_result -> small_side_increasing_coordinate;
+ new_event -> large_side_increasing_coordinate = subjunc_result -> large_side_increasing_coordinate;
put_new_event(event_table, new_event , event_no);
- //printf("NEW_JUNCTION_HERE : %s , %u , %u\n", chro_name_right, chro_pos_left, chro_pos_right);
+ if(0 && FIXLENstrcmp("R000000052", read_name) == 0)
+ SUBREADprintf("NEW_JUNCTION_HERE : %s , %u , %u (%u, %u)\n", chro_name_right, chro_pos_left, chro_pos_right, new_event -> event_small_side, new_event -> event_large_side);
}
else if(new_event_type == CHRO_EVENT_TYPE_FUSION)
{
@@ -2567,26 +3316,34 @@ void find_new_junctions(global_context_t * global_context, thread_context_t * th
new_event -> event_type = CHRO_EVENT_TYPE_FUSION;
new_event -> is_strand_jumped = is_strand_jumped;
+
new_event -> supporting_reads = 1;
new_event -> indel_length = 0;
+
+ new_event -> small_side_increasing_coordinate = subjunc_result -> small_side_increasing_coordinate;
+ new_event -> large_side_increasing_coordinate = subjunc_result -> large_side_increasing_coordinate;
put_new_event(event_table, new_event , event_no);
- //if(pair_number == 13)
- //printf("NEW_CHIMERISM_HERE : %s , %s , %u , %u, %c , read: %d - %d \n", chro_name_left, chro_name_right, chro_pos_left, chro_pos_right, is_strand_jumped?'X':'=');
+ //if( 1018082 == pair_number)
+ // SUBREADprintf("NEW_CHIMERISM_HERE_FULL [%u:%d: R_%d] : %s , %s , %u , %u, %c ; INC=%d %d\n", pair_number, best_read_id, is_second_read+1, chro_name_left, chro_name_right, chro_pos_left, chro_pos_right, is_strand_jumped?'X':'=', new_event -> small_side_increasing_coordinate, new_event -> large_side_increasing_coordinate);
}
}
}
}
}
+void write_translocation_results_final(void * buckv, HashTable * tab);
+void write_inversion_results_final(void * buckv, HashTable * tab);
+
int write_fusion_final_results(global_context_t * global_context)
{
indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
char fn2 [MAX_FILE_NAME_LENGTH];
- snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.fusion.txt", global_context->config.output_prefix);
+ snprintf(fn2, MAX_FILE_NAME_LENGTH, "%s.breakpoints.txt", global_context->config.output_prefix);
FILE * ofp = f_subr_open(fn2, "wb");
fprintf(ofp,"#Chr Location Chr Location SameStrand nSupport\n");
+ //fprintf(ofp,"#Chr Location Chr Location SameStrand nSupport BreakPoint1_GoUp BreakPoint2_GoUp\n");
int xk1;
unsigned int all_junctions = 0;
@@ -2603,7 +3360,8 @@ int write_fusion_final_results(global_context_t * global_context)
all_juncs++;
- if(event_body->final_counted_reads<1 )
+ //#warning "================== REMOVE '- 1' IN THE NEXT LINE ========================"
+ if(event_body->final_counted_reads<1|| event_body->critical_supporting_reads < 1 - 1)
{
no_sup_juncs++;
continue;
@@ -2614,13 +3372,82 @@ int write_fusion_final_results(global_context_t * global_context)
chro_pos_left++;
all_junctions ++;
- fprintf(ofp, "%s\t%u\t%s\t%u\t%s\t%d\n", chro_name_left, chro_pos_left, chro_name_right, chro_pos_right, event_body -> is_strand_jumped?"No":"Yes", event_body -> final_counted_reads);
+ fprintf(ofp, "%s\t%u\t%s\t%u\t%s\t%d\n", chro_name_left, chro_pos_left, chro_name_right, chro_pos_right+1, event_body -> is_strand_jumped?"No":"Yes", event_body -> final_counted_reads);
+ //fprintf(ofp, "%s\t%u\t%s\t%u\t%s\t%d\t%s\t%s\n", chro_name_left, chro_pos_left, chro_name_right, chro_pos_right+1, event_body -> is_strand_jumped?"No":"Yes", event_body -> final_counted_reads, event_body -> small_side_increasing_coordinate?"Yes":"No", event_body -> large_side_increasing_coordinate?"Yes":"No");
}
global_context -> all_fusions = all_junctions;
+
+ if(global_context->config.do_structural_variance_detection){
+ global_context -> translocation_result_table.entry_table -> appendix1 = ofp;
+ global_context -> translocation_result_table.entry_table -> appendix2 = global_context;
+ HashTableIteration(global_context -> translocation_result_table.entry_table, write_translocation_results_final);
+ global_context -> inversion_result_table.entry_table -> appendix1 = ofp;
+ global_context -> inversion_result_table.entry_table -> appendix2 = global_context;
+ HashTableIteration(global_context -> inversion_result_table.entry_table, write_inversion_results_final);
+ }
+
fclose(ofp);
return 0;
}
+
+void write_inversion_results_final(void * buckv, HashTable * tab){
+ int x1;
+ bucketed_table_bucket_t * buck = buckv;
+
+ FILE * ofp = (FILE *)tab -> appendix1;
+ global_context_t * global_context = (global_context_t * )tab -> appendix2;
+ for(x1 = 0; x1 < buck -> items; x1++)
+ {
+ if(buck->positions[x1] - buck->positions[x1] % buck -> maximum_interval_length == buck -> keyed_bucket)
+ {
+ inversion_result_t * inv_res = buck -> details[x1];
+
+ char * src_chr;
+ unsigned int src_pos;
+
+ locate_gene_position(inv_res -> small_side, &global_context -> chromosome_table, &src_chr , &src_pos);
+ fprintf(ofp, "INV\t%s\t%u\t%s\t%u\t%s\n", src_chr, src_pos + 1, src_chr, src_pos + 1 + inv_res -> length, inv_res -> is_precisely_called ? "PRECISE":"IMPRECISE");
+ fprintf(ofp, "INV\t%s\t%u\t%s\t%u\t%s\n", src_chr, src_pos + 2, src_chr, src_pos + inv_res -> length, inv_res -> is_precisely_called ? "PRECISE":"IMPRECISE");
+
+ //fprintf(ofp, "INVERSION\t%s\t%u\t%u\t%u\t%u\n", src_chr, src_pos, inv_res -> length, inv_res -> all_sup_D , inv_res -> max_sup_E);
+ }
+ }
+
+}
+
+void write_translocation_results_final(void * buckv, HashTable * tab){
+ int x1;
+ bucketed_table_bucket_t * buck = buckv;
+
+ FILE * ofp = (FILE *)tab -> appendix1;
+ global_context_t * global_context = (global_context_t * )tab -> appendix2;
+ for(x1 = 0; x1 < buck -> items; x1++)
+ {
+ if(buck->positions[x1] - buck->positions[x1] % buck -> maximum_interval_length == buck -> keyed_bucket)
+ {
+ char * src_chr, *targ_chr;
+ unsigned int src_pos, targ_pos;
+
+ translocation_result_t * trans_res = buck -> details[x1];
+
+ locate_gene_position(trans_res -> source_left_side, &global_context -> chromosome_table, &src_chr , &src_pos);
+ locate_gene_position(trans_res -> target_left_side, &global_context -> chromosome_table, &targ_chr , &targ_pos);
+
+ //fprintf(ofp, "TRANSLOCATION\t%s\t%u\t%u\t%s\t%u\t%s\t%u\t%u\n", src_chr, src_pos, trans_res -> length, targ_chr, targ_pos, trans_res -> is_inv?"INV":"STR", trans_res -> all_sup_P , trans_res -> max_sup_QR);
+ /*
+ SUBREADprintf("ABS=%u, %u, PRECISE=%d\n", trans_res -> source_left_side, trans_res -> target_left_side, trans_res -> is_precisely_called);
+ SUBREADprintf("%u, %u\n", src_pos, targ_pos);
+ SUBREADprintf("%s, %s\n", src_chr, targ_chr);
+ */
+ fprintf(ofp, "%s\t%s\t%u\t%s\t%u\t%s\t%s\n", src_chr == targ_chr?"ITX":"CTX", src_chr, src_pos + 1, targ_chr, targ_pos + 1, trans_res -> is_inv?"X":"=", trans_res -> is_precisely_called ? "PRECISE":"IMPRECISE");
+ fprintf(ofp, "%s\t%s\t%u\t%s\t%u\t%s\t%s\n", src_chr == targ_chr?"ITX":"CTX", src_chr, src_pos + trans_res -> length + 1, targ_chr, targ_pos + 1, trans_res -> is_inv?"X":"=", trans_res -> is_precisely_called ? "PRECISE":"IMPRECISE");
+ fprintf(ofp, "DEL\t%s\t%u\t%u\t%s\n", src_chr, src_pos + 1, trans_res -> length , trans_res -> is_precisely_called ? "PRECISE":"IMPRECISE");
+ }
+ }
+
+}
+
int write_junction_final_results(global_context_t * global_context)
{
@@ -2646,7 +3473,7 @@ int write_junction_final_results(global_context_t * global_context)
continue;
//#warning " ================================== remove '- 1' from the next line!!! ================================="
- if(event_body->final_counted_reads<1)
+ if(event_body->final_counted_reads < 1 || ( event_body->critical_supporting_reads < 1&& event_body->indel_at_junction))
{
no_sup_juncs++;
continue;
@@ -2657,16 +3484,24 @@ int write_junction_final_results(global_context_t * global_context)
chro_pos_left++;
- unsigned int feature_start = max(0, chro_pos_left - event_body -> junction_flanking_left );
+
+ unsigned int feature_start = chro_pos_left - event_body -> junction_flanking_left;
+ if(chro_pos_left <= event_body -> junction_flanking_left){
+ feature_start = 1;
+ event_body -> junction_flanking_left = chro_pos_left - 1;
+ }
+
unsigned int feature_end = chro_pos_right + event_body -> junction_flanking_right;
all_junctions ++;
if(event_body->indel_at_junction)
sprintf(indel_sect,"INS%d", event_body->indel_at_junction);
+ //else if(event_body->critical_supporting_reads < 1)
+ // strcpy(indel_sect, "NOCRT");
else indel_sect[0]=0;
- fprintf(ofp,"%s\t%u\t%u\tJUNC%08u%s\t%d\t%c\t%u\t%u\t%d,%d,%d\t2\t%d,%d\t0,%u\t\n", chro_name_left, feature_start, feature_end,
+ fprintf(ofp,"%s\t%u\t%u\tJUNC%08u%s\t%d\t%c\t%u\t%u\t%d,%d,%d\t2\t%d,%d\t0,%u\n", chro_name_left, feature_start, feature_end,
all_junctions, indel_sect, event_body -> final_counted_reads, event_body->is_negative_strand?'-':'+',
feature_start, feature_end, event_body->is_negative_strand?0:255, /*event_body -> anti_supporting_reads*/ event_body->is_negative_strand?255:0, event_body->is_negative_strand?255:0,
event_body -> junction_flanking_left, event_body -> junction_flanking_right, feature_end-feature_start-event_body -> junction_flanking_right);
@@ -2709,7 +3544,7 @@ int paired_chars_part(char * ch1, char * ch2, int is_reverse)
void core_search_short_exons(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qualityb0, int rl, unsigned int P1_Pos, unsigned int P2_Pos, short read_coverage_start, short read_coverage_end)
{
- char inb[1201], qualityb[1201];
+ char inb[MAX_READ_LENGTH], qualityb[MAX_READ_LENGTH];
if ( (rl <= EXON_LONG_READ_LENGTH ) && (!EXON_EXTENDING_SCAN)) return;
//return;
gene_value_index_t * base_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
@@ -2794,12 +3629,12 @@ void core_search_short_exons(global_context_t * global_context, thread_context_t
max_score = -999;
int max_is_GTAG = 0;
- if(need_to_test && pos_small >= 16)
+ if(need_to_test && pos_small >= SHORT_EXON_MIN_LENGTH)
{
unsigned int test_end = pos_small - SHORT_EXON_EXTEND;
if(SHORT_EXON_EXTEND > pos_small) test_end = 0;
- unsigned int new_pos = pos_small-16;
+ unsigned int new_pos = pos_small-SHORT_EXON_MIN_LENGTH;
while(1)
{
new_pos = match_chro_range(inb, base_index, new_pos, 7 , new_pos - test_end , SEARCH_BACK);
@@ -2827,7 +3662,7 @@ void core_search_short_exons(global_context_t * global_context, thread_context_t
test_score = 1000000+ (matched_in_exon_new )*10000 + matched_in_exon_old * 1000 + new_pos - test_end;
if(test_score <= max_score) continue;
- max_score = test_score + 39999 ;
+ max_score = test_score;
if(matched_in_exon_new < splice_point || matched_in_exon_old < SHORT_EXON_WINDOW )
continue;
@@ -2988,7 +3823,7 @@ void core_search_short_exons(global_context_t * global_context, thread_context_t
test_score = 1000000+ (matched_in_exon_new)*10000 + matched_in_exon_old * 1000 + test_end - new_pos;
if(test_score <= max_score) continue;
- max_score = test_score + 39999;
+ max_score = test_score;
if(matched_in_exon_new < (rl - splice_point) || matched_in_exon_old < SHORT_EXON_WINDOW)
continue;
@@ -3472,7 +4307,7 @@ int core13_test_donor(char *read, int read_len, unsigned int pos1, unsigned int
#define EXON_LARGE_WINDOW 60
#define ACCEPTED_SUPPORT_RATE 0.3
-void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1)
+void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * rname, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1)
{
int windows = full_rl / EXON_LARGE_WINDOW +1;
float overlap = (1.0*windows * EXON_LARGE_WINDOW - full_rl) / (windows-1);
@@ -3532,26 +4367,156 @@ void core_fragile_junction_voting(global_context_t * global_context, thread_cont
break;
}
+ int ii, jj, kk;
+ for(ii = 0; ii < GENE_VOTE_TABLE_SIZE; ii++) {
+ for(jj = 0; jj < vote_p1 -> items[ii] ; jj++) {
+ if(vote_p1 -> votes[ii][jj] < vote_p1 -> max_vote) continue;
- if(1)
- {
- finalise_vote(vote_p1);
- select_best_vote(vote_p1);
- //print_votes(vote_p1, global_context -> config.index_prefix);
- unsigned int best_pos1=0;
- unsigned int best_pos2=0;
- int best_vote1=0;
- int best_vote2=0;
- char is_abnormal=0;
- short half_marks=0;
- int is_reversed_halves=0, max_cover_start=0, max_cover_end=0;
+ gene_vote_number_t * indel_recorder = vote_p1 -> indel_recorder[ii][jj];
+ unsigned int voting_position = vote_p1 -> pos[ii][jj];
+ int last_indel = 0, last_correct_subread=0;
+
+ for(kk =0; indel_recorder[kk] && (kk < MAX_INDEL_SECTIONS); kk+=3){
+ char movement_buffer[MAX_READ_LENGTH * 10 / 7];
+ //chromosome_event_t * last_event = NULL;
+ int last_event_id = -1;
+
+ int indels = indel_recorder[kk+2] - last_indel;
+ if(indels==0) continue;
+
+ int next_correct_subread = indel_recorder[kk] -1;
+
+ int last_correct_base = find_subread_end(read_len, global_context->config.total_subreads , last_correct_subread) - 9;
+ int first_correct_base = find_subread_end(read_len, global_context->config.total_subreads , next_correct_subread) - 16 + 9;
+ first_correct_base = min(first_correct_base+10, read_len);
+ last_correct_base = max(0, last_correct_base);
+ last_correct_base = min(read_len-1, last_correct_base);
+
+ int x1, dyna_steps;
+
+ dyna_steps = core_dynamic_align(global_context, thread_context, InBuff + last_correct_base, first_correct_base - last_correct_base, voting_position + last_correct_base + last_indel, movement_buffer, indels, rname);
+
+ movement_buffer[dyna_steps]=0;
+
+ if(0 && strcmp("MISEQ:13:000000000-A1H1M:1:1112:12194:5511", rname) == 0)
+ {
+ SUBREADprintf("IR= %d %d~%d\n", dyna_steps, last_correct_base, first_correct_base);
+
+ for(x1=0; x1<dyna_steps;x1++)
+ {
+ int mc, mv=movement_buffer[x1];
+ if(mv==0)mc='=';
+ else if(mv==1)mc='D';
+ else if(mv==2)mc='I';
+ else mc='X';
+ SUBREADprintf("%c",mc);
+ }
+ SUBREADputs("");
+ }
+ unsigned int cursor_on_chromosome = voting_position + last_correct_base + last_indel, cursor_on_read = last_correct_base;
+ int last_mv = 0;
+ unsigned int indel_left_boundary = 0;
+ int is_in_indel = 0, current_indel_len = 0, total_mismatch = 0;
+
+ for(x1=0; x1<dyna_steps;x1++)
+ {
+ int mv=movement_buffer[x1];
+ if(mv==3) total_mismatch++;
+ }
+
+ if(total_mismatch<2 || (global_context->config.maximise_sensitivity_indel && total_mismatch <= 2 ))
+ for(x1=0; x1<dyna_steps;x1++)
+ {
+ int mv=movement_buffer[x1];
+
+ if(last_mv != mv)
+ {
+ if( ( mv==1 || mv==2 ) && ! is_in_indel)
+ {
+ indel_left_boundary = cursor_on_chromosome;
+ is_in_indel = 1;
+ current_indel_len = 0;
+ }
+ else if ( is_in_indel && (mv == 0 || mv == 3) )
+ {
+ gene_value_index_t * current_value_index = thread_context?thread_context->current_value_index:global_context->current_value_index;
+ int ambiguous_i, ambiguous_count=0;
+ int best_matched_bases = match_chro(InBuff + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6, 0, global_context->config.space_type) +
+ match_chro(InBuff + cursor_on_read - min(current_indel_len,0), current_value_index, indel_left_boundary + max(0, current_indel_len), 6, 0, global_context->config.space_type);
+ for(ambiguous_i=-5; ambiguous_i<=5; ambiguous_i++)
+ {
+ int left_match = match_chro(InBuff + cursor_on_read - 6, current_value_index, indel_left_boundary - 6, 6+ambiguous_i, 0, global_context->config.space_type);
+ int right_match = match_chro(InBuff + cursor_on_read + ambiguous_i - min(current_indel_len,0), current_value_index, indel_left_boundary + ambiguous_i + max(0, current_indel_len), 6-ambiguous_i, 0,global_context->config.space_type);
+ if(left_match+right_match == best_matched_bases) ambiguous_count ++;
+ }
+
+ if(0 && strcmp("MISEQ:13:000000000-A1H1M:1:1112:12194:5511", rname) == 0)
+ SUBREADprintf("INDEL_DDADD: abs(I=%d); INDELS=%d; LOC=%u\n",i, current_indel_len, indel_left_boundary-1);
+ if(abs(current_indel_len)<=global_context -> config.max_indel_length)
+ {
+ chromosome_event_t * new_event = local_add_indel_event(global_context, thread_context, event_table, InBuff + cursor_on_read + min(0,current_indel_len), indel_left_boundary - 1, current_indel_len, 1, ambiguous_count, 0);
+ if(last_event_id >=0 && new_event){
+ // the event space can be changed when the new event is added. the location is updated everytime.
+ chromosome_event_t * event_space = NULL;
+ if(thread_context)
+ event_space = ((indel_thread_context_t *)thread_context -> module_thread_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
+ else
+ event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
+ chromosome_event_t * last_event = event_space + last_event_id;
+
+ int dist = new_event -> event_small_side - last_event -> event_large_side +1;
+ new_event -> connected_previous_event_distance = dist;
+ last_event -> connected_next_event_distance = dist;
+ }
+
+ if (new_event)
+ last_event_id = new_event -> global_event_id;
+ else last_event_id = -1;
+ }
+ }
+
+
+ if(mv == 0 || mv == 3)
+ is_in_indel = 0;
+ }
+
+ if(is_in_indel && mv == 1)
+ current_indel_len += 1;
+ if(is_in_indel && mv == 2)
+ current_indel_len -= 1;
+
+ if(mv == 1 || mv == 3 || mv == 0) cursor_on_chromosome++;
+ if(mv == 2 || mv == 3 || mv == 0) cursor_on_read++;
+
+ last_mv = mv;
+ }
+ last_correct_subread = indel_recorder[i+1]-1;
+ }
+
+ }
+ }
+
+
+
+ if(1)
+ {
+ finalise_vote(vote_p1);
+ select_best_vote(vote_p1);
+ //print_votes(vote_p1, global_context -> config.index_prefix);
+ unsigned int best_pos1=0;
+ unsigned int best_pos2=0;
+ int best_vote1=0;
+ int best_vote2=0;
+ char is_abnormal=0;
+ short half_marks=0;
+ int is_reversed_halves=0, max_cover_start=0, max_cover_end=0;
char indel_in_p1=0, indel_in_p2=0;
short read_coverage_start =0, read_coverage_end=0;
gene_value_index_t * base_index = thread_context?thread_context->current_value_index:global_context->current_value_index ;
int splice_point = core_select_best_matching_halves(global_context, vote_p1, &best_pos1, &best_pos2, &best_vote1, &best_vote2, &is_abnormal ,&half_marks, &is_reversed_halves, ACCEPTED_SUPPORT_RATE, read_len, -1, 0, &read_coverage_start, &read_coverage_end, &indel_in_p1, &indel_in_p2, &max_cover_start, &max_cover_end, read_len, -1 , 0, NULL , 0xffffffff);
- //printf("SP=%d; BV=%d; BV2=%d\n", splice_point, best_vote1, best_vote2);
+ //SUBREADprintf("RN=%s , WINDOW = %d ~ %d , SP=%d; BV=%d; BV2=%d\n", rname , window_cursor , window_cursor + read_len , splice_point, best_vote1, best_vote2);
if (splice_point>0 && best_vote1 >= 1 && best_vote2>=1)
{
int test_real_break_point = -1, test_donor_score=-1;
@@ -3603,10 +4568,9 @@ void core_fragile_junction_voting(global_context_t * global_context, thread_cont
new_event -> indel_length = 0;
put_new_event(event_table, new_event , event_no);
- //printf("ADD JUNCTION BY FRAGILE, %d-%d\n", pos_small, pos_big);
+ // SUBREADprintf("ADD JUNCTION BY FRAGILE, %d-%d\n", pos_small, pos_big);
}
-
}
}
@@ -3616,3 +4580,1073 @@ void core_fragile_junction_voting(global_context_t * global_context, thread_cont
}
+void print_frags(global_context_t * global_context, fragment_list_t * fls){
+ int x1;
+
+ for(x1 =0; x1 < fls -> fragments; x1++){
+ subread_read_number_t fno = fls -> fragment_numbers[x1] / 2;
+ int f_is_B = fls -> fragment_numbers[x1] % 2;
+
+ mapping_result_t * f_res = _global_retrieve_alignment_ptr(global_context, fno, f_is_B, 0);
+ mapping_result_t * mate_res = _global_retrieve_alignment_ptr(global_context, fno, !f_is_B, 0);
+ char outpos[100];
+ char outposm[100];
+ absoffset_to_posstr(global_context, f_res -> selected_position, outpos);
+ absoffset_to_posstr(global_context, mate_res -> selected_position, outposm);
+
+ int f_negative = (f_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int mate_negative = (mate_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+
+ if(f_is_B) f_negative=!f_negative;
+ else mate_negative=!mate_negative;
+
+ //SUBREADprintf("TRALOG: READ %09u %c AT %s (%c) ; MATE: %s (%c)\n", fno, f_is_B?'B':'A' , outpos, f_negative?'N':'P' , outposm, mate_negative?'N':'P');
+
+ }
+}
+
+// fragnos_paired_B = B_fragment_no * 2 + is_mate_b (is_mate_b points the mate that has the location in locations_mate_B)
+// fragnos_paired_C = C_fragment_no * 2 + is_mate_c (is_mate_c points the mate that has the location in locations_mate_C)
+//
+// locations_mate_B and locations_mate_C are the locations where the sequence is moved to. I.e., locations_mate_B and locations_mate_C are far far away from fragment A.
+//
+int find_translocation_BC_mates(global_context_t * global_context, mapping_result_t * res_A1, mapping_result_t * res_A2, fragment_list_t * listB, fragment_list_t * listC, int is_INV, unsigned long long * fragnos_paired_B, unsigned long long * fragnos_paired_C, unsigned int * locations_mate_B, unsigned int * locations_mate_C,unsigned int * guessed_brkP_small_sum, unsigned int * guessed_moved_length_sum , unsigned int * guessed_brkQ_small_sum){
+
+ int ret = 0, xk1, xk2;
+ char * is_C_used = malloc(sizeof(char) * listC->fragments);
+ memset(is_C_used, 0, sizeof(char) * listC->fragments);
+ long long tmp_guessed_brkP_small_sum = 0, tmp_guessed_moved_length_sum = 0, tmp_guessed_brkQ_small_sum = 0;
+
+ for(xk1 = 0; xk1 < listB->fragments; xk1++)
+ {
+ long long minimum_mate_distance = 0x7fffffff;
+ int minimum_xk2 = -1;
+ unsigned int mate_C_pos = 0;
+ mapping_result_t * res_Ca = NULL, * res_Cc = NULL, * res_Ba = NULL, *res_Bb = NULL;
+ mapping_result_t meta_C_res_body, res_Ca_body;
+ res_Ca = &res_Ca_body;
+
+ mapping_result_t * meta_C_res = &meta_C_res_body;
+
+ subread_read_number_t B_read_no = listB->fragment_numbers[xk1]/2;
+ int B_read_is_b = listB->fragment_numbers[xk1]%2;
+
+ mapping_result_t meta_B_res_body, res_Ba_body;
+ mapping_result_t * meta_B_res = &meta_B_res_body;
+ res_Ba = &res_Ba_body;
+
+ bigtable_readonly_result(global_context, NULL, B_read_no, 0, !B_read_is_b, meta_B_res, NULL);
+ res_Bb = meta_B_res;
+
+ bigtable_readonly_result(global_context, NULL, B_read_no, 0, B_read_is_b, res_Ba, NULL);
+
+ for(xk2 = 0; xk2 < listC->fragments; xk2++)
+ {
+ if(is_C_used[xk2]) continue;
+
+ subread_read_number_t C_read_no = listC->fragment_numbers[xk2]/2;
+ int C_read_is_b = listC->fragment_numbers[xk2]%2;
+
+ bigtable_readonly_result(global_context, NULL, C_read_no, 0, !C_read_is_b, meta_C_res, NULL);
+ res_Cc = meta_C_res;
+
+ bigtable_readonly_result(global_context, NULL, C_read_no, 0, C_read_is_b, res_Ca, NULL);
+
+ int is_meta_B_negative = (meta_B_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ if(!B_read_is_b) is_meta_B_negative = !is_meta_B_negative;
+
+ int is_meta_C_negative = (meta_C_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ if(!C_read_is_b) is_meta_C_negative = !is_meta_C_negative;
+
+ //SUBREADprintf("TRALOG: MATES : B[%d] = %u (%c); C[%d] = %u (%c)\n", xk1, meta_B_res -> selected_position, is_meta_B_negative?'N':'P' , xk2, meta_C_res -> selected_position, is_meta_C_negative?'N':'P');
+
+ if(is_meta_B_negative != is_meta_C_negative &&
+ meta_B_res -> selected_position < meta_C_res -> selected_position &&
+ meta_C_res -> selected_position - meta_C_res -> selected_position < global_context -> config.maximum_translocation_length &&
+ meta_C_res -> selected_position - meta_B_res -> selected_position < minimum_mate_distance)
+ {
+ minimum_mate_distance = meta_C_res -> selected_position - meta_B_res -> selected_position;
+ minimum_xk2 = xk2;
+ mate_C_pos = meta_C_res -> selected_position;
+ }
+ }
+ // read B has a mate of C[minimum xk2] if there is one.
+ if(minimum_xk2>=0)
+ {
+ subread_read_number_t C_mate_fno = listC -> fragment_numbers[minimum_xk2] / 2;
+ int C_mate_is_b = listC -> fragment_numbers[minimum_xk2] % 2;
+
+ fragnos_paired_B[ret] = (B_read_no*2)+(!B_read_is_b);
+ locations_mate_B[ret] = meta_B_res -> selected_position;
+
+ fragnos_paired_C[ret] = (C_mate_fno*2)+(C_mate_is_b);
+ locations_mate_C[ret] = mate_C_pos;
+
+ is_C_used[minimum_xk2] = 1;
+
+
+ int gapA, gapB, gapC;
+
+ if(is_INV){
+ gapA = res_Ca -> selected_position - res_A1 -> selected_position - res_A1 -> read_length;
+ gapB = res_A2 -> selected_position - res_Ba -> selected_position - res_Ba -> read_length;
+ gapC = res_Cc -> selected_position - res_Bb -> selected_position - res_Bb -> read_length;
+ }else{
+ gapA = res_Ba -> selected_position - res_A1 -> selected_position - res_A1 -> read_length;
+ gapB = res_A2 -> selected_position - res_Ca -> selected_position - res_Ca -> read_length;
+ gapC = res_Cc -> selected_position - res_Bb -> selected_position - res_Bb -> read_length;
+ }
+
+ tmp_guessed_brkP_small_sum += res_A1 -> selected_position + res_A1 -> read_length + gapA/2;
+ tmp_guessed_moved_length_sum += res_A2 -> selected_position - res_A1 -> selected_position - res_A1 -> read_length - gapB/2 + gapA/2;
+ tmp_guessed_brkQ_small_sum += res_Bb -> selected_position + res_Bb -> read_length + gapC/2;
+
+ ret ++;
+ }
+ }
+
+ free(is_C_used);
+
+ if(ret>0){
+ *guessed_brkP_small_sum= tmp_guessed_brkP_small_sum / ret;
+ *guessed_moved_length_sum = tmp_guessed_moved_length_sum/ ret;
+ *guessed_brkQ_small_sum = tmp_guessed_brkQ_small_sum / ret;
+ }
+
+ return ret;
+}
+
+
+// This function sees if all the mates of read B_x and C_y are at the same location.
+// If mates of B_x and C_y spread on a large region, it is usually unreliable.
+// posesB and posesB are linear absolute positions of the mate reads.
+int find_translocation_BC_conformation(global_context_t * global_context, int PEmates, unsigned int * posesB, unsigned int * posesC){
+
+ unsigned int min_pos = 0xffffffff, max_pos = 0, xk1;
+ if(PEmates<1) return 0;
+
+ for(xk1 = 0; xk1 < PEmates; xk1++)
+ {
+ min_pos = min(min_pos, posesB[xk1]);
+ min_pos = min(min_pos, posesC[xk1]);
+
+ max_pos = max(max_pos, posesB[xk1]);
+ max_pos = max(max_pos, posesC[xk1]);
+ }
+
+ if(max_pos - min_pos< 2*global_context -> config.maximum_pair_distance)return 1;
+ return 0;
+}
+
+
+// fliB and fliB are : frag_[BC]_no * 2 + is_Read_b_close_to_BreakPoint_P
+int breakpoint_PQR_supported(global_context_t * global_context , unsigned int brkPno , unsigned int brkQno, unsigned int brkRno, fragment_list_t * fliB, fragment_list_t * fliC, int isInv){
+ int fli_i;
+ int isFliB, nSupB=0, nSupC=0;
+
+ for(isFliB = 0; isFliB < 2; isFliB++){
+ fragment_list_t * fli = isFliB?fliB:fliC;
+ int * nSup = isFliB?&nSupB:&nSupC;
+ // fliB => support source_small ~ target_large if inv, or source_small ~ target_small if !inv
+ // fliC => support source_large ~ target_small if inv, or source_large ~ target_large if !inv
+
+ // the read that is close to BreakPoint_P should support source, the other read should support target
+ for(fli_i = 0; fli_i < fli -> fragments; fli_i ++){
+ subread_read_number_t frag_BC_no = fli -> fragment_numbers[fli_i]/2;
+ int is_Read_b_close_to_BreakPoint_P = fli -> fragment_numbers[fli_i]%2;
+ unsigned int source_small, source_large, target_smallQ, target_largeQ, target_smallR, target_largeR, target_large, target_small;
+
+ get_event_two_coordinates(global_context, brkPno, NULL, NULL, &source_small, NULL, NULL, &source_large);
+ get_event_two_coordinates(global_context, brkQno, NULL, NULL, &target_smallQ, NULL, NULL, &target_largeQ);
+ get_event_two_coordinates(global_context, brkRno, NULL, NULL, &target_smallR, NULL, NULL, &target_largeR);
+
+
+ if(target_smallQ <= target_smallR + BREAK_POINT_MAXIMUM_TOLERANCE && target_smallQ >= target_smallR - BREAK_POINT_MAXIMUM_TOLERANCE)
+ {
+ //target_smallQ is target, target_smallR is target
+ target_large = target_smallR;
+ target_small = target_smallQ;
+ }else{
+
+ //target_largeQ is target, target_largeR is target
+ target_large = target_largeQ;
+ target_small = target_largeR;
+ }
+
+
+ mapping_result_t res_BC_close_P_body, res_BC_close_Q_body;
+
+ mapping_result_t * res_BC_close_P = &res_BC_close_P_body, * res_BC_close_Q = & res_BC_close_Q_body;
+
+ bigtable_readonly_result(global_context, NULL, frag_BC_no, 0, is_Read_b_close_to_BreakPoint_P, res_BC_close_P, NULL);
+ bigtable_readonly_result(global_context, NULL, frag_BC_no, 0, !is_Read_b_close_to_BreakPoint_P, res_BC_close_Q, NULL);
+
+ unsigned int P_pos = isInv?( isFliB?source_large:source_small ):( isFliB?source_small:source_large );
+ unsigned int Q_pos = isInv?( isFliB?target_large:target_small ):( isFliB?target_small:target_large );
+
+ SUBREADprintf("TRALOG: PQR_TARGET P=%u~%u; Q=%u~%u, R=%u~%u ; Ppos=%u, Qpos=%u, Pread=%u, Qread=%u on %s\n", source_small, source_large, target_smallQ, target_largeQ, target_smallR, target_largeR, P_pos, Q_pos, res_BC_close_P -> selected_position, res_BC_close_Q -> selected_position, isInv?"INV":"STR");
+
+ long long dist;
+ dist = res_BC_close_P -> selected_position;
+ dist -= P_pos;
+ if(abs(dist) < global_context -> config.maximum_pair_distance){
+ dist = res_BC_close_Q -> selected_position;
+ dist -= Q_pos;
+ if(abs(dist) < global_context -> config.maximum_pair_distance)
+ (*nSup)++;
+ }
+ }
+ }
+ //return nSupB + 1 >= fliB -> fragments/2 && nSupC + 1 >= fliC-> fragments/2 ;
+ SUBREADprintf("TRALOG: PQR_NSUP: B=%d, C=%d on %s\n", nSupB, nSupC, isInv?"INV":"STR");
+ return nSupB > 0 && nSupC > 0 && nSupB + 2 >= fliB->fragments / 2 && nSupC + 2 >= fliC->fragments / 2;
+}
+
+// fragnoD1_mates and fragnoD2_mates are poteltial E reads 1/2.
+// D1: D's small read; D2: D's large read
+// E2 ~ D2
+// E1 ~ D1
+// E2.start > Y.large
+// E1.start > Y.small
+
+int breakpoint_YZ_supported(global_context_t * global_context, unsigned int brkYno, unsigned int brkZno, unsigned long long * fragnoD1_mates, int fragnoD1len, unsigned long long * fragnoD2_mates, int fragnoD2len){
+ int x1;
+ int is_D2_mates;
+
+ unsigned int inversion_small_edge, inversion_large_edge;
+ get_event_two_coordinates(global_context, brkYno, NULL, NULL, &inversion_small_edge, NULL, NULL, &inversion_large_edge);
+
+
+ int nSupD1mates = 0, nSupD2mates = 0;
+ for(is_D2_mates = 0; is_D2_mates < 2; is_D2_mates ++){
+ unsigned long long * fragno_Dmates = is_D2_mates?fragnoD2_mates:fragnoD1_mates;
+ int fragno_Dno = is_D2_mates?fragnoD2len:fragnoD1len;
+ int * nSupMates = is_D2_mates?&nSupD2mates:&nSupD1mates;
+ for(x1 = 0; x1 < fragno_Dno; x1++){
+ subread_read_number_t fragno_Dmate = fragno_Dmates[x1] / 2;
+ int is_large_read_far_from_D = fragno_Dmates[x1] % 2;
+
+ mapping_result_t frag_D_mate_a_body, frag_D_mate_b_body;
+ mapping_result_t * frag_D_mate_a = &frag_D_mate_a_body, * frag_D_mate_b = & frag_D_mate_b_body;
+
+ bigtable_readonly_result(global_context, NULL, fragno_Dmate, 0, 0, frag_D_mate_a, NULL);
+ bigtable_readonly_result(global_context, NULL, fragno_Dmate, 0, 1, frag_D_mate_b, NULL);
+
+ mapping_result_t * frag_D_mate_1 = (frag_D_mate_a -> selected_position > frag_D_mate_b -> selected_position)?frag_D_mate_b:frag_D_mate_a;
+ mapping_result_t * frag_D_mate_2 = (frag_D_mate_a -> selected_position <=frag_D_mate_b -> selected_position)?frag_D_mate_b:frag_D_mate_a;
+
+ mapping_result_t * res_to_support_small_edge = (is_D2_mates ^ is_large_read_far_from_D)?frag_D_mate_2:frag_D_mate_1;
+ mapping_result_t * res_to_support_large_edge = (is_D2_mates ^ is_large_read_far_from_D)?frag_D_mate_1:frag_D_mate_2;
+
+ long long distsm;
+ distsm = res_to_support_small_edge -> selected_position;
+ distsm -= inversion_small_edge;
+
+ long long distla;
+ distla = res_to_support_large_edge -> selected_position;
+ distla -= inversion_large_edge;
+
+ //SUBREADprintf("INVLOG: Dist_SM=%lld, Dist_LA=%lld\n", distsm, distla);
+
+ if(distsm > -8 && distsm < global_context -> config.maximum_pair_distance){
+
+ if(distla > -8 && distla < global_context -> config.maximum_pair_distance)
+ (*nSupMates) ++;
+ }
+
+
+ }
+ }
+
+ //SUBREADprintf("INVLOG: breakpoint_YZ_supported nSupD1=%d >= %d, nSupD2=%d >= %d\n", nSupD1mates, fragnoD1len, nSupD2mates, fragnoD2len);
+ return nSupD1mates > 0 && nSupD2mates > 0 && nSupD1mates + 2 >= fragnoD1len / 2 && nSupD2mates + 2 >= fragnoD2len / 2;
+}
+
+#define _PQR_LIST_SIZE 48
+
+int find_translocation_brk_PQR(global_context_t * global_context, mapping_result_t * resA1, mapping_result_t * resA2, fragment_list_t * fliB, fragment_list_t * fliC, unsigned int * brkPno, unsigned int * brkQno, unsigned int * brkRno, int isInv, unsigned int * is_cand_P_found)
+{
+ unsigned int event_pos_list_A1[_PQR_LIST_SIZE];
+ void * event_ptr_list_A1[_PQR_LIST_SIZE];
+
+ char * chroA=NULL;
+ unsigned int posA1=0;
+
+ locate_gene_position(resA1 -> selected_position, &global_context -> chromosome_table, &chroA, &posA1);
+
+
+ int candA1i, found_PQR = 0;
+ int candA1Number = bktable_lookup(&global_context -> breakpoint_table_P, chroA, posA1, global_context -> config.maximum_pair_distance , event_pos_list_A1, event_ptr_list_A1, _PQR_LIST_SIZE);
+ indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
+ chromosome_event_t * candBrkPlist [_PQR_LIST_SIZE];
+ int candBrkPi , candBrkPNumber=0;
+
+ //SUBREADprintf("A FOUND %d P ", candA1Number);
+
+ for(candA1i = 0; candA1i < candA1Number ; candA1i++){
+ int event_no = event_ptr_list_A1[candA1i] - NULL;
+ chromosome_event_t * event_body = indel_context -> event_space_dynamic + event_no;
+
+ long long small_dist = event_body -> event_small_side, large_dist = event_body -> event_large_side;
+ small_dist -= resA1 -> selected_position;
+ large_dist -= resA2 -> selected_position;
+
+ if(small_dist > 0 && small_dist < global_context -> config.maximum_pair_distance && large_dist < 0 && large_dist > -1ll * global_context -> config.maximum_pair_distance && event_body -> small_side_increasing_coordinate == 0)
+ candBrkPlist[candBrkPNumber++] = event_body;
+ }
+
+ //SUBREADprintf(", (%d may be used)\n", candBrkPNumber);
+
+ *is_cand_P_found = candBrkPNumber;
+
+ for(candBrkPi = 0; candBrkPi < candBrkPNumber; candBrkPi++){
+ unsigned int event_no_P = event_ptr_list_A1[candBrkPi] - NULL;
+ chromosome_event_t * event_body_P = indel_context -> event_space_dynamic + event_no_P;
+
+ unsigned int anchor_for_brkQ = isInv?event_body_P -> event_large_side:event_body_P -> event_small_side;
+ unsigned int anchor_for_brkR = isInv?event_body_P -> event_small_side:event_body_P -> event_large_side;
+
+ unsigned int event_pos_list_Q[_PQR_LIST_SIZE];
+ void * event_ptr_list_Q[_PQR_LIST_SIZE];
+
+ unsigned int event_pos_list_R[_PQR_LIST_SIZE];
+ void * event_ptr_list_R[_PQR_LIST_SIZE];
+
+ char * charAncQ = NULL, * charAncR = NULL;
+ unsigned int posAncQ=0, posAncR = 0;
+ locate_gene_position(anchor_for_brkQ, &global_context -> chromosome_table, &charAncQ, &posAncQ);
+ locate_gene_position(anchor_for_brkR, &global_context -> chromosome_table, &charAncR, &posAncR);
+
+ int candQi, candQnumber = bktable_lookup(&global_context -> breakpoint_table_QR, charAncQ, posAncQ - BREAK_POINT_MAXIMUM_TOLERANCE , 2* BREAK_POINT_MAXIMUM_TOLERANCE , event_pos_list_Q, event_ptr_list_Q, _PQR_LIST_SIZE);
+ int candRi, candRnumber = bktable_lookup(&global_context -> breakpoint_table_QR, charAncR, posAncR - BREAK_POINT_MAXIMUM_TOLERANCE , 2* BREAK_POINT_MAXIMUM_TOLERANCE , event_pos_list_R, event_ptr_list_R, _PQR_LIST_SIZE);
+
+ SUBREADprintf("P [%s] FOUND %d Q AT %s:%u and %d R AT %s:%u\n", isInv?"INV":"STR", candQnumber, charAncQ, posAncQ, candRnumber, charAncR, posAncR);
+
+ for(candQi = 0 ; candQi < candQnumber ; candQi++){
+ unsigned int event_no_Q = event_ptr_list_Q[candQi] - NULL;
+ chromosome_event_t * event_body_Q = indel_context -> event_space_dynamic + event_no_Q;
+
+ long long cand_Q_small_dist = event_body_Q -> event_small_side;
+ cand_Q_small_dist -= isInv?event_body_P -> event_large_side:event_body_P -> event_small_side;
+
+ int is_Q_small_side_close_to_P = abs(cand_Q_small_dist) <= BREAK_POINT_MAXIMUM_TOLERANCE;
+
+ SUBREADprintf("Q: SMALL_CLOSE_P = %d, DIR = %c %c\n", is_Q_small_side_close_to_P, event_body_Q -> small_side_increasing_coordinate?'>':'<', event_body_Q -> large_side_increasing_coordinate?'>':'<');
+
+ if( is_Q_small_side_close_to_P && event_body_Q -> large_side_increasing_coordinate == 1) continue; // the large side is the target location.
+ if((!is_Q_small_side_close_to_P) && event_body_Q -> small_side_increasing_coordinate == 1) continue; // the small side is the target location.
+
+
+ if( isInv && event_body_Q -> large_side_increasing_coordinate != event_body_Q -> small_side_increasing_coordinate) continue;
+ if((!isInv) && event_body_Q -> large_side_increasing_coordinate == event_body_Q -> small_side_increasing_coordinate) continue;
+
+ for(candRi = 0 ; candRi < candRnumber ; candRi++){
+ unsigned int event_no_R = event_ptr_list_R[candRi] - NULL;
+ chromosome_event_t * event_body_R = indel_context -> event_space_dynamic + event_no_R;
+
+ long long cand_R_dist_to_Q = is_Q_small_side_close_to_P?event_body_Q -> event_large_side:event_body_Q -> event_small_side;
+ cand_R_dist_to_Q -= is_Q_small_side_close_to_P?event_body_R -> event_large_side:event_body_R-> event_small_side;
+
+ SUBREADprintf("R: candDist=%lld, DIR = %c %c\n", cand_R_dist_to_Q, event_body_Q -> small_side_increasing_coordinate?'>':'<', event_body_Q -> large_side_increasing_coordinate?'>':'<');
+
+ if(abs(cand_R_dist_to_Q) > BREAK_POINT_MAXIMUM_TOLERANCE) continue;
+ int is_R_small_side_close_to_P = is_Q_small_side_close_to_P;
+
+ if( is_R_small_side_close_to_P && !event_body_R -> large_side_increasing_coordinate) continue;
+ if(!(is_R_small_side_close_to_P) && !event_body_R -> small_side_increasing_coordinate) continue;
+
+ if( isInv && event_body_R -> large_side_increasing_coordinate != event_body_R -> small_side_increasing_coordinate) continue;
+ if(!(isInv) && event_body_R -> large_side_increasing_coordinate == event_body_R -> small_side_increasing_coordinate) continue;
+ (*brkPno) = event_no_P;
+ (*brkQno) = event_no_Q;
+ (*brkRno) = event_no_R;
+ found_PQR++;
+ return 1;
+ }
+ }
+ }
+
+ return found_PQR;
+}
+
+
+void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, unsigned int * small_pos, unsigned int * small_abs, char ** large_chro, unsigned int * large_pos, unsigned int * large_abs){
+
+ indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
+ chromosome_event_t * event_body = indel_context -> event_space_dynamic + event_no;
+
+ if(small_abs)(*small_abs) = event_body -> event_small_side;
+ if(large_abs)(*large_abs) = event_body -> event_large_side;
+
+ if(small_chro && small_pos)
+ locate_gene_position(event_body -> event_small_side, &global_context -> chromosome_table, small_chro, small_pos);
+ if(large_chro && large_pos)
+ locate_gene_position(event_body -> event_large_side, &global_context -> chromosome_table, large_chro, large_pos);
+}
+
+
+void create_or_update_translocation_imprecise_result(global_context_t * global_context , unsigned int guessed_P_small, unsigned int guessed_tra_len, unsigned int guessed_Q_small , int paired_BC_reads, int isInv){
+
+ char * brkPchr;
+ unsigned int brkPsmall;
+ void * trans_old_ptrs [_PQR_LIST_SIZE];
+ unsigned int trans_old_poses [_PQR_LIST_SIZE];
+
+ locate_gene_position(guessed_P_small, &global_context -> chromosome_table, &brkPchr, &brkPsmall);
+
+ int is_trans_found = 0, old_res_i, old_res_number = bktable_lookup(&global_context -> translocation_result_table, brkPchr, brkPsmall - BREAK_POINT_MAXIMUM_TOLERANCE, 2*BREAK_POINT_MAXIMUM_TOLERANCE, trans_old_poses, trans_old_ptrs, _PQR_LIST_SIZE);
+ for(old_res_i = 0; old_res_i < old_res_number; old_res_i++){
+ translocation_result_t * old_res = (translocation_result_t * )trans_old_ptrs[old_res_i];
+
+ long long target_dist = old_res -> target_left_side;
+ target_dist -= guessed_Q_small;
+
+ if(abs(target_dist) < BREAK_POINT_MAXIMUM_TOLERANCE && isInv == old_res -> is_inv){
+ target_dist = old_res -> length;
+ target_dist -= guessed_tra_len;
+ if(abs(target_dist) < BREAK_POINT_MAXIMUM_TOLERANCE){
+ old_res -> all_sup_P ++;
+ old_res -> max_sup_QR = max(old_res -> max_sup_QR , paired_BC_reads);
+ is_trans_found = 1;
+ break;
+ }
+ }
+ }
+
+ if(0 == is_trans_found){
+ translocation_result_t * new_res = malloc(sizeof(translocation_result_t));
+ memset(new_res, 0, sizeof(translocation_result_t));
+ new_res -> target_left_side = guessed_Q_small;
+ new_res -> length = guessed_tra_len;
+ new_res -> source_left_side = guessed_P_small;
+ new_res -> is_precisely_called = 0;
+ new_res -> all_sup_P = 1;
+ new_res -> max_sup_QR = paired_BC_reads;
+ new_res -> is_inv = isInv;
+
+ bktable_append(&global_context -> translocation_result_table,brkPchr, brkPsmall, new_res);
+ }
+
+}
+
+void create_or_update_translocation_result(global_context_t * global_context , unsigned int brkPno, unsigned int brkQno, unsigned int brkRno , int paired_BC_reads, int isInv){
+
+ char *brkPchr, *brkQchr, *tmpchr;
+ unsigned int brkPsmall, brkPlarge, brkQsmall, tmpint, brkPabs_small, brkQabs_small, brkRabs_small, brkRabs_large, brkQabs_large;
+
+ SUBREADprintf("\nTRALOG: FINALLY_CONFIRMED: %s ; %d PE_MATES\n", isInv?"INV":"STR", paired_BC_reads);
+
+ get_event_two_coordinates(global_context, brkPno, &brkPchr, &brkPsmall, &brkPabs_small, &tmpchr, &brkPlarge, NULL);
+ get_event_two_coordinates(global_context, brkQno, &brkQchr, &brkQsmall, &brkQabs_small, &tmpchr, &tmpint, &brkQabs_large);
+ get_event_two_coordinates(global_context, brkRno, NULL, NULL, &brkRabs_small, NULL, NULL, &brkRabs_large);
+
+ SUBREADprintf("TRARES: %s:%u (len=%d) => %s:%u (Coor: last_base_before)\n", brkPchr, brkPsmall, brkPlarge - brkPsmall - 1, brkQchr, brkQsmall);
+
+ void * trans_old_ptrs [_PQR_LIST_SIZE];
+ unsigned int trans_old_poses [_PQR_LIST_SIZE];
+
+ unsigned int new_target_left_side, new_length;
+
+
+ if(brkQabs_small >= brkRabs_small - BREAK_POINT_MAXIMUM_TOLERANCE && brkQabs_small <= brkRabs_small + BREAK_POINT_MAXIMUM_TOLERANCE)
+ {
+ // Q small and R large are target
+ new_target_left_side = brkQabs_small;
+ } else{
+ // Q large and R small are target
+ new_target_left_side = brkQabs_large;
+ }
+
+ new_length = brkPlarge - brkPsmall - 1;
+
+ int is_trans_found = 0, old_res_i, old_res_number = bktable_lookup(&global_context -> translocation_result_table, brkPchr, brkPsmall - BREAK_POINT_MAXIMUM_TOLERANCE, 2*BREAK_POINT_MAXIMUM_TOLERANCE, trans_old_poses, trans_old_ptrs, _PQR_LIST_SIZE);
+ for(old_res_i = 0; old_res_i < old_res_number; old_res_i++){
+ translocation_result_t * old_res = (translocation_result_t * )trans_old_ptrs[old_res_i];
+
+ long long target_dist = old_res -> target_left_side;
+ target_dist -= new_target_left_side;
+
+ if(abs(target_dist) < BREAK_POINT_MAXIMUM_TOLERANCE && isInv == old_res -> is_inv){
+ target_dist = old_res -> length;
+ target_dist -= new_length;
+ if(abs(target_dist) < BREAK_POINT_MAXIMUM_TOLERANCE){
+ old_res -> all_sup_P ++;
+ old_res -> max_sup_QR = max(old_res -> max_sup_QR , paired_BC_reads);
+ is_trans_found = 1;
+ break;
+ }
+ }
+ }
+
+ if(0 == is_trans_found){
+
+ translocation_result_t * new_res = malloc(sizeof(translocation_result_t));
+ memset(new_res, 0, sizeof(translocation_result_t));
+ new_res -> target_left_side = new_target_left_side;
+ new_res -> length = new_length;
+ new_res -> source_left_side = brkPabs_small;
+ new_res -> is_precisely_called = 1;
+ new_res -> event_P_number = brkPno;
+ new_res -> event_Q_number = brkQno;
+ new_res -> event_R_number = brkRno;
+ new_res -> all_sup_P = 1;
+ new_res -> max_sup_QR = paired_BC_reads;
+ new_res -> is_inv = isInv;
+
+ bktable_append(&global_context -> translocation_result_table,brkPchr, brkPsmall, new_res);
+ }
+}
+
+
+void finalise_translocations(global_context_t * global_context){
+
+ void ** s1_ptrs, **s2_ptrs;
+ unsigned int * s1_poses, * s2_poses;
+
+ s1_ptrs = malloc(sizeof(void *) * S12_LIST_CAPACITY);
+ s2_ptrs = malloc(sizeof(void *) * S12_LIST_CAPACITY);
+
+ s1_poses = malloc(sizeof(int) * S12_LIST_CAPACITY);
+ s2_poses = malloc(sizeof(int) * S12_LIST_CAPACITY);
+
+ unsigned long long * s1_selected_list = malloc(sizeof(long long) * S12_LIST_CAPACITY); // fragment_no * 2 + is_second_read
+ unsigned long long * s2_selected_list = malloc(sizeof(long long) * S12_LIST_CAPACITY);
+
+ mapping_result_t ** s1_result_ptr_list = malloc(sizeof(mapping_result_t *) * S12_LIST_CAPACITY);
+ mapping_result_t ** s2_result_ptr_list = malloc(sizeof(mapping_result_t *) * S12_LIST_CAPACITY);
+
+ int frag_Q_larger_read;
+ subread_read_number_t frag_A_i;
+
+ for(frag_A_i = 0; frag_A_i < global_context -> funky_list_A.fragments; frag_A_i ++){
+ fragment_list_t fli_STR_B, fli_STR_C, fli_INV_B, fli_INV_C;
+
+ fraglist_init(&fli_STR_B);
+ fraglist_init(&fli_STR_C);
+ fraglist_init(&fli_INV_B);
+ fraglist_init(&fli_INV_C);
+
+ subread_read_number_t frag_A_no = global_context -> funky_list_A.fragment_numbers[frag_A_i];
+
+ mapping_result_t q_res_A_body, q_res_B_body;
+
+ mapping_result_t * q_res_A = &q_res_A_body;
+ mapping_result_t * q_res_B = &q_res_B_body;
+
+ bigtable_readonly_result(global_context, NULL, frag_A_no, 0, 0, q_res_A, NULL);
+ bigtable_readonly_result(global_context, NULL, frag_A_no, 0, 1, q_res_B, NULL);
+
+ mapping_result_t * q_res_1 = q_res_A -> selected_position > q_res_B -> selected_position?q_res_B:q_res_A;
+ mapping_result_t * q_res_2 = q_res_A -> selected_position <= q_res_B -> selected_position?q_res_B:q_res_A;
+
+ /***************************************************************************************************
+ *
+ * is_q1_negative and is_q2_negative describes the strandness of the original FASTQ read sequence.
+ *
+ * For the very normal mappings, is_q1_negative must be 0 and is_q2_negative must be 1.
+ *
+ * If is_q1_negative != is_q2_negative, then there is a strand-jumpping fusion between the two reads.
+ */
+
+ int is_q1_negative = (q_res_1 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int is_q2_negative = (q_res_2 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+
+ if(q_res_B == q_res_1)is_q1_negative=!is_q1_negative;
+ if(q_res_B == q_res_2)is_q2_negative=!is_q2_negative;
+
+ long long dist = q_res_A ->selected_position;
+ dist -= q_res_B->selected_position;
+
+ if( abs(dist) < 1000 && !(is_q1_negative == 0 && is_q2_negative == 1))
+ {
+ SUBREADprintf("TRALOG: STRANDNESS_BUG %08llu\n", frag_A_no);
+ }
+
+
+ for(frag_Q_larger_read = 0; frag_Q_larger_read < 2; frag_Q_larger_read++){
+ void ** s_ptrs = frag_Q_larger_read?s2_ptrs:s1_ptrs;
+ unsigned int * s_poses = frag_Q_larger_read?s2_poses:s1_poses, q_res_offset = 0;
+ mapping_result_t * q_res = frag_Q_larger_read?q_res_2:q_res_1;
+
+ char * q_res_chro = NULL;
+ locate_gene_position(q_res -> selected_position, &global_context -> chromosome_table, &q_res_chro, &q_res_offset);
+ q_res_offset +=1 ; // all tables are one-based.
+
+ unsigned int q_search_start = q_res_offset;
+ if(q_search_start > FUNKY_COLOCATION_TOLERANCE) q_search_start -= FUNKY_COLOCATION_TOLERANCE;
+ else q_search_start = 0;
+
+ int cand_i, canidate_s_items = bktable_lookup(&global_context -> funky_table_BC, q_res_chro, q_search_start, 2*FUNKY_COLOCATION_TOLERANCE, s_poses, s_ptrs, S12_LIST_CAPACITY);
+
+ if(0 && frag_A_no == 143736){
+ SUBREADprintf("TRALOG: SEARCH CLOSE TO %s READ: %s:%u ; HAD %d HITS\n", frag_Q_larger_read?"LARGE":"SMALL", q_res_chro, q_search_start, canidate_s_items);
+ }
+
+ // scan if candidate is reversed.
+ // s_ptrs - NULL is the fragment no.
+ for(cand_i = 0; cand_i < canidate_s_items; cand_i ++){
+ subread_read_number_t frag_S_no = (s_ptrs[cand_i] - NULL)/ 2;
+ int frag_S_is_read_B = (s_ptrs[cand_i] - NULL) % 2;
+
+ mapping_result_t read_S_res_body, mate_S_res_body;
+ mapping_result_t * read_S_res = &read_S_res_body;
+ mapping_result_t * mate_S_res = &mate_S_res_body;
+
+ bigtable_readonly_result(global_context, NULL, frag_S_no, 0, frag_S_is_read_B, read_S_res, NULL);
+ bigtable_readonly_result(global_context, NULL, frag_S_no, 0, !frag_S_is_read_B, mate_S_res, NULL);
+
+ int is_read_S_negative = (read_S_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int is_mate_S_negative = (mate_S_res -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ if(frag_S_is_read_B) is_read_S_negative = !is_read_S_negative;
+ else is_mate_S_negative = !is_mate_S_negative;
+
+ int is_INV_TRA = is_mate_S_negative == is_read_S_negative;
+
+ if(is_INV_TRA && is_read_S_negative == !frag_Q_larger_read){
+ if(frag_Q_larger_read)
+ fraglist_append(&fli_INV_B, frag_S_no * 2 + frag_S_is_read_B);
+ else
+ fraglist_append(&fli_INV_C, frag_S_no * 2 + frag_S_is_read_B);
+ }
+
+ if((!is_INV_TRA) && is_read_S_negative == !frag_Q_larger_read){
+ if(frag_Q_larger_read)
+ fraglist_append(&fli_STR_C, frag_S_no * 2 + frag_S_is_read_B);
+ else
+ fraglist_append(&fli_STR_B, frag_S_no * 2 + frag_S_is_read_B);
+ }
+ }
+ }
+
+ unsigned int guesed_p_small, guessed_tra_length, guessed_q_small, is_brkP_cand_found = 0;
+
+ if(fli_INV_B.fragments >= 1 && fli_INV_C.fragments >= 1){
+ int PEmates = find_translocation_BC_mates(global_context, q_res_1, q_res_2, &fli_INV_B, &fli_INV_C, 1, s1_selected_list, s2_selected_list, s1_poses, s2_poses, &guesed_p_small, &guessed_tra_length, &guessed_q_small);
+ int ConformPE = find_translocation_BC_conformation(global_context, PEmates, s1_poses, s2_poses);
+ int brkPQR_are_found = 0;
+ unsigned int brkPno, brkQno, brkRno;
+
+ char out1pos[100], out2pos[100];
+ absoffset_to_posstr(global_context, q_res_1 -> selected_position, out1pos);
+ absoffset_to_posstr(global_context, q_res_2 -> selected_position, out2pos);
+ SUBREADprintf("TRALOG: A_READ: %09llu: INV : %s ~ %s ; %d PE_MATES (%s)\n", frag_A_no, out1pos, out2pos, PEmates, ConformPE?"CONFORMABLE":"INCONSISTENT");
+
+ //SUBREADputs("TRALOG: INV_C:");
+ //print_frags(global_context,&fli_INV_C);
+ //SUBREADputs("TRALOG: INV_B:");
+ //print_frags(global_context,&fli_INV_B);
+ if(PEmates)
+ brkPQR_are_found = find_translocation_brk_PQR(global_context, q_res_1, q_res_2, &fli_INV_B, &fli_INV_C, &brkPno, &brkQno, &brkRno, 1, &is_brkP_cand_found);
+
+ if(brkPQR_are_found){
+ brkPQR_are_found = breakpoint_PQR_supported(global_context , brkPno , brkQno, brkRno, &fli_INV_B, &fli_INV_C, 1);
+ SUBREADprintf("TRALOG: A_READ: INV BRK_PQR_SUPPED=%d\n", brkPQR_are_found);
+ }
+ if(brkPQR_are_found)
+ create_or_update_translocation_result( global_context , brkPno, brkQno, brkRno , PEmates, 1);
+ else if(ConformPE && fli_INV_B.fragments > 2 && fli_INV_C.fragments > 2 && is_brkP_cand_found)
+ create_or_update_translocation_imprecise_result(global_context, guesed_p_small, guessed_tra_length, guessed_q_small, PEmates, 1);
+ }
+
+ if(fli_STR_B.fragments >= 1 && fli_STR_C.fragments >= 1){
+ int PEmates = find_translocation_BC_mates(global_context, q_res_1, q_res_2, &fli_STR_B, &fli_STR_C, 0, s1_selected_list, s2_selected_list, s1_poses, s2_poses, &guesed_p_small, &guessed_tra_length, &guessed_q_small);
+ int ConformPE = find_translocation_BC_conformation(global_context, PEmates, s1_poses, s2_poses);
+
+ char out1pos[100], out2pos[100];
+ absoffset_to_posstr(global_context, q_res_1 -> selected_position, out1pos);
+ absoffset_to_posstr(global_context, q_res_2 -> selected_position, out2pos);
+
+ SUBREADprintf("TRALOG: A_READ: %09llu: TRA : %s ~ %s ; %d PE_MATES (%s)\n", frag_A_no, out1pos, out2pos, PEmates, ConformPE?"CONFORMABLE":"INCONSISTENT");
+
+ //SUBREADputs("TRALOG: STR_B:");
+ //print_frags(global_context,&fli_STR_B);
+ //SUBREADputs("TRALOG: STR_C:");
+ //print_frags(global_context,&fli_STR_C);
+
+ int brkPQR_are_found = 0;
+ unsigned int brkPno, brkQno, brkRno;
+
+ if(PEmates)
+ brkPQR_are_found = find_translocation_brk_PQR(global_context, q_res_1, q_res_2, &fli_STR_B, &fli_STR_C, &brkPno, &brkQno, &brkRno, 0, &is_brkP_cand_found);
+
+ if(brkPQR_are_found){
+ brkPQR_are_found = breakpoint_PQR_supported(global_context , brkPno , brkQno, brkRno, &fli_STR_B, &fli_STR_C, 0);
+ }
+
+ if(brkPQR_are_found)
+ create_or_update_translocation_result( global_context , brkPno, brkQno, brkRno , PEmates, 0);
+ else if(ConformPE && fli_INV_B.fragments > 2 && fli_INV_C.fragments > 2 && is_brkP_cand_found)
+ create_or_update_translocation_imprecise_result(global_context, guesed_p_small, guessed_tra_length, guessed_q_small, PEmates, 0);
+ }
+
+ fraglist_destroy(&fli_STR_B);
+ fraglist_destroy(&fli_STR_C);
+ fraglist_destroy(&fli_INV_B);
+ fraglist_destroy(&fli_INV_C);
+ }
+
+ free(s1_result_ptr_list);
+ free(s2_result_ptr_list);
+ free(s1_ptrs);
+ free(s2_ptrs);
+ free(s1_poses);
+ free(s2_poses);
+ free(s1_selected_list);
+ free(s2_selected_list);
+
+}
+
+void finalise_inversions(global_context_t * global_context){
+ subread_read_number_t frag_A_i;
+ void ** s1_ptrs, **s2_ptrs;
+ unsigned int * s1_poses, * s2_poses;
+
+ s1_ptrs = malloc(sizeof(void *) * S12_LIST_CAPACITY);
+ s2_ptrs = malloc(sizeof(void *) * S12_LIST_CAPACITY);
+
+ s1_poses = malloc(sizeof(int) * S12_LIST_CAPACITY);
+ s2_poses = malloc(sizeof(int) * S12_LIST_CAPACITY);
+
+ unsigned long long * s1_selected_list = malloc(sizeof(long long) * S12_LIST_CAPACITY); // fragment_no * 2 + is_second_read
+ unsigned long long * s2_selected_list = malloc(sizeof(long long) * S12_LIST_CAPACITY);
+
+ mapping_result_t ** s1_result_ptr_list = malloc(sizeof(mapping_result_t *) * S12_LIST_CAPACITY);
+ mapping_result_t ** s2_result_ptr_list = malloc(sizeof(mapping_result_t *) * S12_LIST_CAPACITY);
+
+ int frag_Q_larger_read, xk1, xk2;
+
+ for(frag_A_i = 0; frag_A_i < global_context -> funky_list_DE.fragments; frag_A_i ++){
+ int s1_list_items = 0, s2_list_items = 0;
+
+ subread_read_number_t frag_A_no = global_context -> funky_list_DE.fragment_numbers[frag_A_i];
+
+ mapping_result_t q_res_A_body, q_res_B_body;
+
+ mapping_result_t * q_res_A = &q_res_A_body, * q_res_B = &q_res_B_body;
+
+ bigtable_readonly_result(global_context, NULL, frag_A_no, 0, 0, q_res_A, NULL);
+ bigtable_readonly_result(global_context, NULL, frag_A_no, 0, 1, q_res_B, NULL);
+
+ mapping_result_t * q_res_1 = q_res_A -> selected_position > q_res_B -> selected_position?q_res_B:q_res_A;
+ mapping_result_t * q_res_2 = q_res_A -> selected_position <= q_res_B -> selected_position?q_res_B:q_res_A;
+
+
+ /***************************************************************************************************
+ *
+ * is_q1_negative and is_q2_negative describes the strandness of the original FASTQ read sequence.
+ *
+ * For the very normal mappings, is_q1_negative must be 0 and is_q2_negative must be 1.
+ *
+ * If is_q1_negative != is_q2_negative, then there is a strand-jumpping fusion between the two reads.
+ */
+
+ int is_q1_negative = (q_res_1 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int is_q2_negative = (q_res_2 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+
+ if(q_res_B == q_res_1)is_q1_negative=!is_q1_negative;
+ if(q_res_B == q_res_2)is_q2_negative=!is_q2_negative;
+
+ if(is_q1_negative == 0 && is_q2_negative == 0) // D READ
+ {
+ for(frag_Q_larger_read = 0; frag_Q_larger_read < 2; frag_Q_larger_read++){
+ int * s_list_items = frag_Q_larger_read?&s2_list_items:&s1_list_items;
+ void ** s_ptrs = frag_Q_larger_read?s2_ptrs:s1_ptrs;
+ unsigned int * s_poses = frag_Q_larger_read?s2_poses:s1_poses, q_res_offset = 0;
+ mapping_result_t * q_res = frag_Q_larger_read?q_res_2:q_res_1;
+ unsigned long long * s_selected_list = frag_Q_larger_read?s2_selected_list:s1_selected_list;
+ mapping_result_t ** s_result_ptr_list = frag_Q_larger_read?s2_result_ptr_list:s1_result_ptr_list;
+
+
+ char * q_res_chro = NULL;
+ locate_gene_position(q_res -> selected_position, &global_context -> chromosome_table, &q_res_chro, &q_res_offset);
+ q_res_offset +=1 ; // all tables are one-based.
+
+ unsigned int q_search_start = q_res_offset;
+ if(q_search_start > FUNKY_COLOCATION_TOLERANCE) q_search_start -= FUNKY_COLOCATION_TOLERANCE;
+ else q_search_start = 0;
+
+ int cand_i, canidate_s_items = bktable_lookup(&global_context -> funky_table_DE, q_res_chro, q_search_start, 2*FUNKY_COLOCATION_TOLERANCE, s_poses, s_ptrs, S12_LIST_CAPACITY);
+ // scan if candidate is reversed.
+ // s_ptrs - NULL is the fragment no.
+ for(cand_i = 0; cand_i < canidate_s_items; cand_i ++){
+ subread_read_number_t frag_S_no = (s_ptrs[cand_i] - NULL)/2;
+ int frag_S_larger_read = (s_ptrs[cand_i] - NULL)%2;
+
+ if(frag_S_no == frag_A_no) continue;
+
+ if(frag_S_larger_read == frag_Q_larger_read){
+
+ mapping_result_t res_S_A_body, res_S_B_body;
+ mapping_result_t * res_S_A = &res_S_A_body , * res_S_B = &res_S_B_body;
+
+ bigtable_readonly_result(global_context, NULL, frag_S_no, 0, 0, res_S_A, NULL);
+ bigtable_readonly_result(global_context, NULL, frag_S_no, 0, 1, res_S_B, NULL);
+
+ mapping_result_t * res_S_1 = res_S_A -> selected_position > res_S_B -> selected_position?res_S_B:res_S_A;
+ mapping_result_t * res_S_2 = res_S_A -> selected_position <= res_S_B -> selected_position?res_S_B:res_S_A;
+
+ mapping_result_t * co_locatted_S_res = frag_S_larger_read?res_S_2:res_S_1;
+
+ int is_s1_negative = (res_S_1 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int is_s2_negative = (res_S_2 -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+
+ if(res_S_B == res_S_1) is_s1_negative = !is_s1_negative;
+ if(res_S_B == res_S_2) is_s2_negative = !is_s2_negative;
+
+
+ if( is_s1_negative != 0 && is_s2_negative != 0 ){ // E READ
+ s_selected_list[*s_list_items] = frag_S_no * 2 + frag_S_larger_read;
+ s_result_ptr_list[*s_list_items] = co_locatted_S_res;
+ (*s_list_items)++;
+ }
+ }
+ }
+ }
+ }
+
+ int found_INV_frags = 0;
+ unsigned long long guessed_Z_large_abs_sum = 0, guessed_Y_small_abs_sum = 0;
+
+ for(xk1 = 0; xk1 < s1_list_items; xk1++){
+ for(xk2 = 0; xk2 < s2_list_items ; xk2 ++){
+ if(s1_selected_list[xk1]/2 == s2_selected_list[xk2]/2)
+ {
+ found_INV_frags ++;
+ // now there is only one D fragment. here we found the E fragment for it (E fragment is in s1[xk1] and s2[xk2])
+ // s1 is the E read that is close to D_1; s2 is the E read that is close to D_2; D_1 is the D read with smaller coordinate.
+ // res_E1 is the read that is close to D_2; mapping location of E_1 should be larger than D_2
+
+ mapping_result_t * res_D1 = q_res_1;
+ mapping_result_t * res_D2 = q_res_2;
+
+ mapping_result_t * res_E1 = s2_result_ptr_list[xk2];
+ mapping_result_t * res_E2 = s1_result_ptr_list[xk1];
+
+ int Gap_a_length = res_E2 -> selected_position - res_D1 -> selected_position - res_D1 -> read_length;
+ int Gap_b_length = res_E1 -> selected_position - res_D2 -> selected_position - res_D2 -> read_length;
+ int average_gap_len = (Gap_b_length + Gap_a_length)/2;
+ guessed_Y_small_abs_sum += res_D1 -> selected_position + res_D1 -> read_length - average_gap_len / 2;
+ guessed_Z_large_abs_sum += res_E1 -> selected_position - average_gap_len / 2;
+ SUBREADprintf("INVLOG: GUESSED_LEN = %d + %d / 2 = %d\n", Gap_a_length, Gap_b_length, average_gap_len);
+ }
+ }
+ }
+
+ unsigned int brkYno=0xffffffff, brkZno=0xffffffff;
+ int cand_YZ_breakpoints = 0;
+ if(found_INV_frags > 0)
+ {
+ char * q_small_chro = NULL;
+ unsigned int q_small_pos = 0;
+
+ guessed_Y_small_abs_sum /= found_INV_frags;
+ guessed_Z_large_abs_sum /= found_INV_frags;
+ SUBREADprintf("INVLOG: GUESSED_YZ=%llu, %llu\n", guessed_Y_small_abs_sum, guessed_Z_large_abs_sum);
+
+ locate_gene_position(q_res_1 -> selected_position, &global_context -> chromosome_table, &q_small_chro, &q_small_pos);
+ int cand_Y, cand_Z;
+ cand_YZ_breakpoints = bktable_lookup(&global_context -> breakpoint_table_YZ, q_small_chro, q_small_pos, global_context -> config.maximum_pair_distance , s1_poses, s1_ptrs, S12_LIST_CAPACITY);
+
+ //SUBREADprintf("INVLOG: %09u FOUND %d CANDIDATE BKs AT %s:%u\n", frag_A_no, cand_YZ_breakpoints, q_small_chro, q_small_pos);
+
+ indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
+
+ for(cand_Y = 0; cand_Y < cand_YZ_breakpoints ; cand_Y ++){
+ if(brkYno < 0xffffffff) break;
+
+ int event_no_Y = s1_ptrs[cand_Y] - NULL;
+ chromosome_event_t * event_body_Y = indel_context -> event_space_dynamic + event_no_Y;
+
+ if(event_body_Y -> small_side_increasing_coordinate) continue;
+ if(event_body_Y -> small_side_increasing_coordinate != event_body_Y -> large_side_increasing_coordinate)
+ assert(0);
+
+ if(abs(event_body_Y -> event_large_side - q_res_2 -> selected_position) < global_context -> config.maximum_pair_distance){
+
+ for(cand_Z = 0; cand_Z < cand_YZ_breakpoints ; cand_Z ++){
+ int event_no_Z = s1_ptrs[cand_Z] - NULL;
+ chromosome_event_t * event_body_Z = indel_context -> event_space_dynamic + event_no_Z;
+
+ if(!event_body_Z -> small_side_increasing_coordinate) continue;
+ if(event_body_Z -> small_side_increasing_coordinate != event_body_Z -> large_side_increasing_coordinate)
+ assert(0);
+
+ long long dist_small = event_body_Z -> event_small_side , dist_large = event_body_Z -> event_large_side;
+ dist_small -= event_body_Y -> event_small_side;
+ dist_large -= event_body_Y -> event_large_side;
+
+ long long dist_small_large_diff = dist_small;
+ dist_small_large_diff -= dist_large;
+
+ if(abs(dist_small_large_diff) <= BREAK_POINT_MAXIMUM_TOLERANCE && abs(dist_large) <= BREAK_POINT_MAXIMUM_TOLERANCE && event_body_Z -> small_side_increasing_coordinate != event_body_Y -> small_side_increasing_coordinate){
+
+ brkYno = event_no_Y;
+ brkZno = event_no_Z;
+
+ break;
+ }
+ }
+
+
+ if(1)
+ {
+ char outpos1[100], outpos2[100];
+ absoffset_to_posstr(global_context, event_body_Y -> event_small_side, outpos1);
+ absoffset_to_posstr(global_context, event_body_Y -> event_large_side, outpos2);
+
+ SUBREADprintf("INVLOG: %09llu FOUND BREAKPOINT YZ: %s ~ %s, INC_COR: %c %c , nSUP=%d\n", frag_A_no, outpos1, outpos2, event_body_Y -> small_side_increasing_coordinate?'>':'<', event_body_Y -> large_side_increasing_coordinate?'>':'<' , event_body_Y -> final_counted_reads);
+
+ }
+
+ }
+ }
+ }
+
+
+ char *brkYchr = "NULL";
+ unsigned int brkYlarge = 0, brkYsmall = 0, brkYabs_small = 0, brkYabs_large;
+ int is_precisely_called = 0, is_roughly_called = 0;
+ if(brkYno < 0xffffffff){
+ // s1_selected_list : 2 * fragment_S_no + frag_S_larger_read
+ int is_passed_YZ = breakpoint_YZ_supported(global_context, brkYno, brkZno, s1_selected_list, s1_list_items, s2_selected_list, s2_list_items);
+ if(is_passed_YZ)
+ {
+ is_precisely_called = 1;
+
+ get_event_two_coordinates(global_context, brkYno, &brkYchr, &brkYsmall, &brkYabs_small, &brkYchr, &brkYlarge, &brkYabs_large);
+
+ }
+ else is_roughly_called = 1;
+ //SUBREADprintf("\nINVLOG: FINALLY_%sCONFIRMED: %09u %s:%u (len=%d) INVERSED!\n", is_passed_YZ?"":"NOT ", frag_A_no, brkYchr, brkYsmall, brkYlarge - brkYsmall);
+ }
+
+ //SUBREADprintf("\nINVLOG: FINALLY_GUESSED: %09u found_INV_frags=%d, s1_list_items=%d, s2_list_items=%d, cand_YZ_breakpoints=%d\n", frag_A_no, found_INV_frags, s1_list_items, s2_list_items, cand_YZ_breakpoints);
+
+ //for(xk1 = 0; xk1 < s1_list_items; xk1++) SUBREADprintf("INVLOG: %09d S_1 MATES: %09llu\n" , frag_A_no , s1_selected_list[xk1]/2);
+ //for(xk1 = 0; xk1 < s2_list_items; xk1++) SUBREADprintf("INVLOG: %09d S_2 MATES: %09llu\n" , frag_A_no , s2_selected_list[xk1]/2);
+
+
+
+ /*
+ if(found_INV_frags >= min(s1_list_items , s2_list_items) - 2 && found_INV_frags > 1 && !is_precisely_called && cand_YZ_breakpoints>0){
+ // guess brkYlarge, brkYsmall, brkZlarge, brkZsmall, brkYabsLarge, brkZabsLarge...
+ locate_gene_position(guessed_Y_small_abs_sum, &global_context -> chromosome_table, &brkYchr, &brkYsmall);
+ locate_gene_position(guessed_Z_large_abs_sum, &global_context -> chromosome_table, &brkYchr, &brkYlarge);
+ //SUBREADprintf("\nINVLOG: FINALLY_GUESSED: %09u %s:%u (len=%llu) INVERSED!\n", frag_A_no, brkYchr, brkYsmall, guessed_Z_large_abs_sum - guessed_Y_small_abs_sum);
+ is_roughly_called = 1;
+ }*/
+
+ if( is_precisely_called || is_roughly_called )
+ {
+ void * old_ptrs[_PQR_LIST_SIZE];
+ unsigned int old_poses[_PQR_LIST_SIZE];
+ int old_found = 0, old_i, old_inversions = bktable_lookup(&global_context -> inversion_result_table, brkYchr, brkYsmall - BREAK_POINT_MAXIMUM_TOLERANCE, 2*BREAK_POINT_MAXIMUM_TOLERANCE, old_poses, old_ptrs, _PQR_LIST_SIZE);
+ for(old_i = 0; old_i < old_inversions; old_i ++){
+ inversion_result_t * inv_res_old = (inversion_result_t *) old_ptrs[old_i];
+ long long old_dist = inv_res_old -> length;
+ old_dist -= brkYlarge - brkYsmall; // the difference on inversion length.
+ if(abs(old_dist) < BREAK_POINT_MAXIMUM_TOLERANCE){
+ inv_res_old -> all_sup_D ++;
+ inv_res_old -> max_sup_E = max(inv_res_old -> max_sup_E , found_INV_frags);
+ old_found = 1;
+ break;
+ }
+ }
+
+ if(0 == old_found){
+ inversion_result_t * inv_res_new = malloc(sizeof(chromosome_event_t));
+ memset(inv_res_new, 0 , sizeof(chromosome_event_t));
+
+ inv_res_new -> length = brkYlarge - brkYsmall;
+ inv_res_new -> is_precisely_called = is_precisely_called;
+ if(is_precisely_called){
+ inv_res_new -> event_Y_number = brkYno;
+ inv_res_new -> event_Z_number = brkZno;
+ inv_res_new -> small_side = brkYabs_small;
+ }else{
+ inv_res_new -> event_Y_rough_small_abs = guessed_Y_small_abs_sum;
+ inv_res_new -> event_Z_rough_large_abs = guessed_Z_large_abs_sum;
+ inv_res_new -> small_side = guessed_Y_small_abs_sum;
+ }
+ inv_res_new -> all_sup_D = 1;
+ inv_res_new -> max_sup_E = found_INV_frags;
+
+ bktable_append(&global_context -> inversion_result_table, brkYchr, brkYsmall, inv_res_new);
+ }
+ }
+ }
+
+ free(s1_result_ptr_list);
+ free(s2_result_ptr_list);
+ free(s1_ptrs);
+ free(s2_ptrs);
+ free(s1_poses);
+ free(s2_poses);
+ free(s1_selected_list);
+ free(s2_selected_list);
+}
+
+void build_breakpoint_tables(global_context_t * global_context){
+
+ int xk1;
+ indel_context_t * indel_context = (indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID];
+
+ for(xk1 = 0; xk1 < indel_context -> total_events ; xk1++)
+ {
+ char * chro_name_left= NULL,* chro_name_right = NULL;
+ unsigned int chro_pos_left= 0, chro_pos_right = 0;
+
+ chromosome_event_t * event_body = indel_context -> event_space_dynamic + xk1;
+
+ if(event_body -> event_type != CHRO_EVENT_TYPE_FUSION && event_body -> event_type != CHRO_EVENT_TYPE_JUNCTION)
+ continue;
+
+ locate_gene_position(event_body -> event_small_side, &global_context -> chromosome_table, &chro_name_left, &chro_pos_left);
+ locate_gene_position(event_body -> event_large_side, &global_context -> chromosome_table, &chro_name_right, &chro_pos_right);
+
+ long long dist = chro_pos_left;
+ dist -= chro_pos_right;
+ if(dist<0)dist=-dist;
+
+ int breakpoint_group = -1;
+
+ if(event_body -> is_strand_jumped){
+ // breakpoint QR or YZ
+ if(chro_name_left != chro_name_right || dist > global_context -> config.maximum_translocation_length)
+ breakpoint_group = 2; // QR
+ else
+ breakpoint_group = 3; // YZ
+ }else{
+ // breakpoint QR or P
+ if(chro_name_left != chro_name_right || dist > global_context -> config.maximum_translocation_length)
+ breakpoint_group = 2; // QR
+ else
+ breakpoint_group = 1; // P
+ }
+
+
+ bucketed_table_t * index_table = breakpoint_group == 1?
+ &global_context -> breakpoint_table_P :
+ (breakpoint_group == 2?
+ &global_context -> breakpoint_table_QR:
+ (breakpoint_group == 3?
+ &global_context -> breakpoint_table_YZ:
+ NULL
+ )
+ );
+
+ //SUBREADprintf("BPLOG: %s:%u ~ %s:%u (%c) GRP=%d (%p)\n", chro_name_left, chro_pos_left, chro_name_right, chro_pos_right, event_body -> is_strand_jumped?'X':'=', breakpoint_group, index_table);
+
+ if(index_table) bktable_append(index_table, chro_name_left, chro_pos_left, NULL + xk1);
+ if(index_table) bktable_append(index_table, chro_name_right, chro_pos_right, NULL + xk1);
+ }
+}
+
+void finalise_structural_variances(global_context_t * global_context){
+ SUBREADprintf("Funky Tables: A:%llu, BC:%llu, DE:%llu\n", global_context -> funky_list_A.fragments, global_context -> funky_table_BC.fragments / 2, global_context -> funky_list_DE.fragments);
+
+ build_breakpoint_tables(global_context);
+ SUBREADprintf("Breakpoint Tables: P:%llu, QR:%llu, YZ:%llu\n", global_context -> breakpoint_table_P.fragments, global_context -> breakpoint_table_QR.fragments, global_context -> breakpoint_table_YZ.fragments);
+ finalise_translocations(global_context);
+ finalise_inversions(global_context);
+}
diff --git a/src/core-junction.h b/src/core-junction.h
index 37bbf77..9e7868a 100644
--- a/src/core-junction.h
+++ b/src/core-junction.h
@@ -23,7 +23,14 @@
#include "hashtable.h"
#include "core.h"
-#define MAX_EVENTS_IN_READ 8
+#define FUNKY_FRAGMENT_A 1 // same strand and gapped (0<gap<tra_len)
+#define FUNKY_FRAGMENT_BC 2 // very far far away (>=tra_len) or chimeric.
+#define FUNKY_FRAGMENT_DE 4 // tlen < tra_len and strand jumpped
+#define NOT_FUNKY 0 // normal fragment
+#define FUNKY_COLOCATION_TOLERANCE 500
+#define BREAK_POINT_MAXIMUM_TOLERANCE 80
+#define S12_LIST_CAPACITY 100
+
// as the python sub-string rule: start is the first wanted base and end is the first unwanted base.
typedef struct{
@@ -40,19 +47,26 @@ typedef struct{
typedef struct{
// result context
- unsigned char back_search_confirmed_sections;
- unsigned char front_search_confirmed_sections;
+ //unsigned char back_search_confirmed_sections;
+ //unsigned char front_search_confirmed_sections;
// NOTE THAT EVERYTHING IN back_search_junctions IS BACKWARD.
// 1, ORDER OF EXONS ARE BACKWARD
// 2, "ABS_OFFSET_FOR_START" ARE ACTUALLY AT END OF SECTIONS
- perfect_section_in_read_t back_search_junctions[MAX_EVENTS_IN_READ];
- perfect_section_in_read_t front_search_junctions[MAX_EVENTS_IN_READ];
+ //perfect_section_in_read_t back_search_junctions[MAX_EVENTS_IN_READ];
+ //perfect_section_in_read_t front_search_junctions[MAX_EVENTS_IN_READ];
// middle result context
unsigned char tmp_search_sections;
perfect_section_in_read_t tmp_search_junctions [MAX_EVENTS_IN_READ];
char current_is_strand_jumped;
+ perfect_section_in_read_t result_back_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ];
+ perfect_section_in_read_t result_front_junctions [MAX_ALIGNMENT_PER_ANCHOR][MAX_EVENTS_IN_READ];
+ int result_back_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR];
+ int result_front_junction_numbers[MAX_ALIGNMENT_PER_ANCHOR];
+ int all_back_alignments;
+ int all_front_alignments;
+
// unsigned int tmp_jump_length;
// unsigned int best_jump_length;
@@ -77,11 +91,12 @@ typedef struct{
// input context
int full_read_len;
+ int is_fully_covered;
char * full_read_text;
char * full_qual_text;
char * read_name;
int is_confirmed_section_negative_strand;
- int pair_number;
+ subread_read_number_t pair_number;
int is_second_read;
int best_read_id;
}explain_context_t;
@@ -102,10 +117,10 @@ void new_explain_try_replace(global_context_t* global_context, thread_context_t
int init_junction_tables(global_context_t * context);
int destroy_junction_tables(global_context_t * context);
-int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, int pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads);
+int process_voting_junction(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, gene_vote_t * vote_1, gene_vote_t * vote_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, int read_len_1, int read_len_2, int is_negative_strand, gene_vote_number_t v1_all_subreads, gene_vote_number_t v2_all_subreads);
int init_junction_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task);
int finalise_junction_thread(global_context_t * global_context, thread_context_t * thread_context, int task);
-int explain_read(global_context_t * global_context, thread_context_t * thread_context, int pair_number,int read_len, char * read_name , char *read_text, char *qual, int is_second_read, int best_read_id, int is_negative_strand);
+unsigned int explain_read(global_context_t * global_context, thread_context_t * thread_context, realignment_result_t * realigns, subread_read_number_t pair_number,int read_len, char * read_name , char *read_text, char *qual, int is_second_read, int best_read_id, int is_negative_strand);
int write_junction_final_results(global_context_t * global_context);
// back_search_read_tail IS THE EXACT VERY SURE POSITION IN THE READ
@@ -113,24 +128,30 @@ int write_junction_final_results(global_context_t * global_context);
int do_explain_back_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int back_search_read_tail, unsigned int back_search_tail_position);
int do_explain_front_search(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text, char * read_qual, int front_search_read_head, unsigned int front_search_head_position);
-int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context);
+unsigned int finalise_explain_CIGAR(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, realignment_result_t * realigns);
-void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements);
+void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump);
-void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements);
+void search_events_to_back(global_context_t * global_context, thread_context_t * thread_context, explain_context_t * explain_context, char * read_text , char * qual_text, unsigned int read_tail_abs_offset, short read_tail_pos, short sofar_matched, int suggested_movements, int do_not_jump);
-void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id);
+void find_new_junctions(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id);
-int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indels, int right_indels, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * inserted_bases);
+int donor_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int left_indels, int right_indels, int normally_arranged, int guess_start, int guess_end, char * read_text, int read_len, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * inserted_bases, int * small_side_inc_coor, int * large_side_inc_coor, char *read_name);
-int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int is_left_part_on_left_as_reversed, int is_second_read, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found);
+int donor_jumped_score(global_context_t * global_context, thread_context_t * thread_context, unsigned int left_virtualHead_abs_offset, unsigned int right_virtualHead_abs_offset, int guess_start, int guess_end, char * read_text, int read_len, int is_left_half_negative, int is_right_half_negative, int is_left_part_on_left_as_reversed, int * final_split_point, int * is_GT_AG_strand, int * is_donor_found, int * small_side_inc_coor, int * large_side_inc_coor);
int write_fusion_final_results(global_context_t * global_context);
-int is_ambiguous_voting(global_context_t * global_context,int pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative);
+int is_ambiguous_voting(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int max_vote, int max_start,int max_end, int read_len, int is_negative);
void core_search_short_exons(global_context_t * global_context, thread_context_t * thread_context, char * read_text, char * qualityb0, int rl, unsigned int P1_Pos, unsigned int P2_Pos, short read_coverage_start, short read_coverage_end);
-void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1);
+void core_fragile_junction_voting(global_context_t * global_context, thread_context_t * thread_context, char * rname, char * read, char * qual, unsigned int full_rl, int negative_strand, int color_space, unsigned int low_border, unsigned int high_border, gene_vote_t *vote_p1);
+
+int is_funky_fragment(global_context_t * global_context, char * rname1, char * chr1, unsigned int pos1, int rlen1, int is_1_negative, char * cigar1, char * seq1, char * rname2, char * chr2, unsigned int pos2, int rlen2, int is_2_negative, char * cigar2, char * seq2, int tlen_removed_intron);
+
+void finalise_structural_variances(global_context_t * global_context);
+
+void get_event_two_coordinates(global_context_t * global_context, unsigned int event_no, char ** small_chro, unsigned int * small_pos, unsigned int * small_abs, char ** large_chro, unsigned int * large_pos, unsigned int * large_abs);
#endif
diff --git a/src/core.c b/src/core.c
index bb10687..821c4b0 100644
--- a/src/core.c
+++ b/src/core.c
@@ -28,11 +28,14 @@
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
+#include <time.h>
+#include <sys/time.h>
#include <getopt.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <unistd.h>
#include <sys/stat.h>
+#include <locale.h>
#include <ctype.h>
@@ -43,6 +46,7 @@
#include "input-files.h"
#include "sorted-hashtable.h"
+#include "core-bigtable.h"
#include "core-indel.h"
#include "core-junction.h"
@@ -54,12 +58,6 @@ static struct option long_options[] =
int (*progress_report_callback)(int, int, int);
-int is_result_in_PE(alignment_result_t *al)
-{
- if(al->Score_H & 0x8000000000000000llu)return 1;
- return 0;
-}
-
void core_version_number(char * program)
{
SUBREADprintf("\n%s v%s\n\n" , program, SUBREAD_VERSION);
@@ -82,7 +80,6 @@ void warning_file_limit()
}
}
-
void print_in_box(int line_width, int is_boundary, int options, char * pattern,...)
{
int put_color_for_colon, is_center;
@@ -259,7 +256,6 @@ void print_in_box(int line_width, int is_boundary, int options, char * pattern,.
-
int show_summary(global_context_t * global_context)
{
@@ -280,18 +276,18 @@ int show_summary(global_context_t * global_context)
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "");
print_in_box(80, 1,1,"Summary");
print_in_box(80, 0,1,"");
- print_in_box(80, 0,0," Processed : %llu %s" , global_context -> all_processed_reads, global_context->input_reads.is_paired_end_reads?"fragments":"reads");
- print_in_box(81, 0,0," Mapped : %llu %s (%.1f%%%%)", global_context -> all_mapped_reads, global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_mapped_reads*100.0 / global_context -> all_processed_reads);
+ print_in_box(80, 0,0," Processed : %'llu %s" , global_context -> all_processed_reads, global_context->input_reads.is_paired_end_reads?"fragments":"reads");
+ print_in_box(81, 0,0," Mapped : %'llu %s (%.1f%%%%)", global_context -> all_mapped_reads, global_context->input_reads.is_paired_end_reads?"fragments":"reads" , global_context -> all_mapped_reads*100.0 / global_context -> all_processed_reads);
if(global_context->input_reads.is_paired_end_reads)
- print_in_box(80, 0,0," Correctly paired : %llu fragments", global_context -> all_correct_PE_reads);
+ print_in_box(80, 0,0," Correctly paired : %'llu fragments", global_context -> all_correct_PE_reads);
if(global_context->config.output_prefix[0])
{
- if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC)
- print_in_box(80, 0,0," Junctions : %u", global_context -> all_junctions);
+ if(global_context->config.entry_program_name == CORE_PROGRAM_SUBJUNC && ( global_context -> config.prefer_donor_receptor_junctions || !global_context -> config.do_fusion_detection))
+ print_in_box(80, 0,0," Junctions : %'u", global_context -> all_junctions);
if(global_context->config.do_fusion_detection)
- print_in_box(80, 0,0," Fusions : %u", global_context -> all_fusions);
- print_in_box(80, 0,0," Indels : %u", global_context -> all_indels);
+ print_in_box(80, 0,0," Fusions : %'u", global_context -> all_fusions);
+ print_in_box(80, 0,0," Indels : %'u", global_context -> all_indels);
}
@@ -302,6 +298,12 @@ int show_summary(global_context_t * global_context)
}
print_in_box(80, 0,1,"");
print_in_box(80, 0,0," Running time : %.1f minutes", (miltime()-global_context->start_time)*1./60);
+/*
+ print_in_box(80, 0,0," Running time 0 : %.2f minutes", global_context->timecost_load_index/60);
+ print_in_box(80, 0,0," Running time 1 : %.2f minutes", global_context->timecost_voting/60);
+ print_in_box(80, 0,0," Running time 2 : %.2f minutes", global_context->timecost_before_realign/60);
+ print_in_box(80, 0,0," Running time 3 : %.2f minutes", global_context->timecost_for_realign/60);
+*/
print_in_box(80, 0,1,"");
print_in_box(80, 2,1,"http://subread.sourceforge.net/");
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "");
@@ -313,69 +315,79 @@ int show_summary(global_context_t * global_context)
void show_progress(global_context_t * global_context, thread_context_t * thread_context, unsigned int current_read_no, int task)
{
+
+ // Read_chunk_start is the file_offset of the very first read in the entire file.
+ // current_circle_start_position_file1 is the file_offset of the first read in this 5-million read chunk (or whatever the chunk size is)
+
+
if(thread_context&&thread_context->thread_id)
{
SUBREADputs("show_progress can only be called by thread#0\n");
return;
}
- gene_input_t * ginp1 = thread_context?(thread_context->ginp1):(&global_context->input_reads.first_read_file);
- unsigned long long ginp1_file_pos = ftello(ginp1->input_fp);
+ gene_input_t * ginp1 = &global_context->input_reads.first_read_file;
+
+ unsigned long long ginp1_file_pos = geinput_file_offset(ginp1);
if(task == STEP_VOTING)
{
- unsigned long long real_read_number = global_context -> all_processed_reads + current_read_no;// * global_context -> config.all_threads;
+ unsigned long long real_read_number = global_context -> all_processed_reads + current_read_no;
if(real_read_number>1000)
- global_context -> input_reads . avg_read_length = (ginp1_file_pos - ginp1->read_chunk_start) * 1./real_read_number ;
+ global_context -> input_reads . avg_read_length = (ginp1_file_pos - ginp1 -> read_chunk_start) * 1./real_read_number ;
}
unsigned long long total_file_size = global_context -> input_reads.first_read_file_size;
unsigned long long guessed_all_reads = total_file_size / global_context -> input_reads . avg_read_length;
- //printf("FS=%llu; AVG=%f; GAR=%llu; CURRENT_NO=%u\n", total_file_size, global_context -> input_reads . avg_read_length , guessed_all_reads, current_read_no);
- unsigned long long current_block_start_file_offset = global_context -> current_circle_start_position_file1;
+ unsigned long long current_block_start_file_offset = global_context -> current_circle_start_abs_offset_file1;
- unsigned long long guessed_this_chunk_all_reads = (total_file_size - current_block_start_file_offset) / global_context -> input_reads . avg_read_length ;
- if(guessed_this_chunk_all_reads > global_context ->config.reads_per_chunk) guessed_this_chunk_all_reads = global_context ->config.reads_per_chunk;
+ unsigned long long guessed_this_chunk_reads = (total_file_size - current_block_start_file_offset) / global_context -> input_reads . avg_read_length ;
+ if(guessed_this_chunk_reads > global_context ->config.reads_per_chunk) guessed_this_chunk_reads = global_context ->config.reads_per_chunk;
unsigned long long guessed_all_reads_before_this_chunk = current_block_start_file_offset / global_context -> input_reads . avg_read_length ;
- unsigned long long reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;//* global_context -> config.all_threads;
+ unsigned long long reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;
+
+ int is_thred_step_running = global_context->config.is_third_iteration_running ? 1:0;
+
if(task != STEP_VOTING)
- reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;
-
+ reads_finished_in_this_chunk = (ginp1_file_pos - current_block_start_file_offset) / global_context -> input_reads . avg_read_length;
- unsigned long long finished_steps = guessed_all_reads_before_this_chunk * (global_context -> index_block_number * 6 + 4);
+ unsigned long long finished_steps = guessed_all_reads_before_this_chunk * (global_context -> index_block_number * 2 + 1 + is_thred_step_running);
+
+ // add steps for voting
if(task == STEP_VOTING)
- finished_steps += guessed_this_chunk_all_reads * global_context -> current_index_block_number * 6 ;//* global_context -> config.all_threads;
- if(task >= STEP_ITERATION_ONE)
- finished_steps += guessed_this_chunk_all_reads * global_context -> index_block_number * 6 ;//*global_context -> config.all_threads;
- if(task > STEP_ITERATION_ONE)
- finished_steps += guessed_this_chunk_all_reads ;//* global_context -> config.all_threads;
- if(task > STEP_ITERATION_TWO)
- finished_steps += guessed_this_chunk_all_reads;
-
- if(task == STEP_VOTING) finished_steps += reads_finished_in_this_chunk*5*global_context -> config.all_threads;
- if(task > STEP_ITERATION_TWO)
- finished_steps += reads_finished_in_this_chunk * 2;
- else
- finished_steps += reads_finished_in_this_chunk*global_context -> config.all_threads;
+ finished_steps += guessed_this_chunk_reads * global_context -> current_index_block_number * 2;
+ else if(task == STEP_ITERATION_TWO)
+ finished_steps += guessed_this_chunk_reads * global_context -> index_block_number * 2;
+ else if(task > STEP_ITERATION_TWO)
+ finished_steps += guessed_this_chunk_reads *(global_context -> index_block_number * 2+1);
+
+ if(task == STEP_VOTING)
+ finished_steps += reads_finished_in_this_chunk*2;
+ else finished_steps += reads_finished_in_this_chunk;
- unsigned long long guessed_all_steps = guessed_all_reads * (global_context -> index_block_number * 6 + 4);
+ unsigned long long guessed_all_steps = guessed_all_reads * (global_context -> index_block_number *2 + 1 + is_thred_step_running);
float finished_rate = finished_steps*1./guessed_all_steps;
float reads_per_second = 0;
if(task == STEP_VOTING)
- reads_per_second = finished_steps / (miltime() - global_context -> align_start_time) / (global_context -> index_block_number*6 + 4);
+ reads_per_second = finished_rate *1.*guessed_all_reads / (miltime() - global_context -> align_start_time);
else
- reads_per_second = finished_steps / (miltime() - global_context -> start_time) / (global_context -> index_block_number*6 + 4);
- //float exp_mins = (miltime() - global_context -> start_time) / finished_rate / 60;
-
- //fprintf(stderr, "FINISHED=%llu, FINISHED_READS=%llu, ALL=%llu, ALLREADS=%llu, ALLCHUNKREADS=%llu; BEFORE_CHUK=%llu; CUR-BLK=%d; IND-BLK=%d\n", finished_steps, reads_finished_in_this_chunk, guessed_all_steps, guessed_all_reads,guessed_this_chunk_all_reads, guessed_all_reads_before_this_chunk, global_context -> current_index_block_number , global_context -> index_block_number );
+ reads_per_second = finished_rate *1.*guessed_all_reads / (miltime() - global_context -> start_time);
if(current_read_no>1000 && !progress_report_callback)
- print_in_box(81,0,0, "%4d%%%% completed, %3d mins elapsed, total=%dk %s, rate=%2.1fk/s\r", (int)(finished_rate*100), (int)((miltime() - global_context -> start_time)/60),(int)(guessed_all_reads*1./1000), global_context -> input_reads.is_paired_end_reads?"frags":"reads", reads_per_second/1000, reads_finished_in_this_chunk);
+ {
+ char minchr[10];
+ float min_value = (miltime() - global_context -> start_time)*1./60;
+ if(min_value < 9.91)
+ sprintf(minchr, "%.01f", min_value);
+ else sprintf(minchr, "% 3d", (int)min_value);
+
+ print_in_box(81,0,0, "%4d%%%% completed, %s mins elapsed, total=%dk %s, rate=%2.1fk/s\r", (int)(finished_rate*100), minchr,(int)(guessed_all_reads*1./1000), global_context -> input_reads.is_paired_end_reads?"frags":"reads", reads_per_second/1000);
+ }
if(progress_report_callback)
{
@@ -450,7 +462,7 @@ int parse_opts_core(int argc , char ** argv, global_context_t * global_context)
global_context->config.ambiguous_mapping_tolerance = 39;
global_context->config.extending_search_indels = 0;
- global_context->config.is_rna_seq_reads = 1;
+ global_context->config.do_breakpoint_detection = 1;
global_context->config.total_subreads = 14;
global_context->config.minimum_subread_for_first_read = 3;
global_context->config.minimum_subread_for_second_read = 1;
@@ -611,14 +623,16 @@ int check_configuration(global_context_t * global_context)
int core_main(int argc , char ** argv, int (parse_opts (int , char **, global_context_t * )))
{
- //int memory_optimisation = 0;
+ struct timeval xtime;
+ gettimeofday(&xtime,NULL);
+ srand(time(NULL)^xtime.tv_usec);
+
global_context_t * global_context;
global_context = (global_context_t*)malloc(sizeof(global_context_t));
init_global_context(global_context);
int ret = parse_opts(argc , argv, global_context);
- if(ret) return ret;
//global_context->config.reads_per_chunk = 200*1024;
if(global_context->config.max_indel_length > 20 && !global_context->input_reads.is_paired_end_reads)
@@ -628,14 +642,17 @@ int core_main(int argc , char ** argv, int (parse_opts (int , char **, global_co
global_context->config.do_superlong_indel_detection = 1;
}
+ if(global_context->config.fast_run){
+ global_context -> config.top_scores = 1;
+ global_context -> config.max_vote_combinations = 1;
+ global_context -> config.max_vote_simples = 1;
+ global_context -> config.multi_best_reads = 1;
+ }
- ret = print_configuration(global_context);
-
+ ret = ret || print_configuration(global_context);
ret = ret || check_configuration(global_context);
ret = ret || load_global_context(global_context);
ret = ret || init_modules(global_context);
-
-
ret = ret || read_chunk_circles(global_context);
ret = ret || write_final_results(global_context);
ret = ret || destroy_modules(global_context);
@@ -819,62 +836,54 @@ int core_geinput_open(global_context_t * global_context, gene_input_t * fp, int
}
}
-void relocate_geinputs(global_context_t * global_context, thread_context_t * thread_context)
-{
- if(thread_context)
- {
- thread_context -> reads_to_be_done = global_context -> input_reads.reads_in_blocks[thread_context -> thread_id];
- thread_context -> read_block_start = global_context -> input_reads.start_read_number_blocks[thread_context -> thread_id];
-
- thread_context -> ginp1 = (gene_input_t *)malloc(sizeof(gene_input_t));
- core_geinput_open(global_context, thread_context -> ginp1,1, 0);
- fseeko(thread_context -> ginp1 -> input_fp, global_context -> input_reads.first_file_blocks[thread_context -> thread_id], SEEK_SET);
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
- thread_context -> ginp2 = (gene_input_t *)malloc(sizeof(gene_input_t));
- core_geinput_open(global_context, thread_context -> ginp2, 2, 0);
- fseeko(thread_context -> ginp2-> input_fp, global_context -> input_reads.second_file_blocks[thread_context -> thread_id], SEEK_SET);
- }
- }
-}
-
-int fetch_next_read_pair(global_context_t * global_context, thread_context_t * thread_context, gene_input_t* ginp1, gene_input_t* ginp2, int *read_len_1, int *read_len_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char *qual_text_2, int remove_color_head)
+int fetch_next_read_pair(global_context_t * global_context, thread_context_t * thread_context, gene_input_t* ginp1, gene_input_t* ginp2, int *read_len_1, int *read_len_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char *qual_text_2, int remove_color_head, subread_read_number_t * read_no_in_chunk)
{
- int rl1, rl2=0;
+ int rl1=0, rl2=0;
int is_second_R1, is_second_R2;
+ subread_read_number_t this_number = -1;
- do
+ subread_lock_occupy(&global_context -> input_reads.input_lock);
+ if(global_context -> running_processed_reads_in_chunk < global_context -> config.reads_per_chunk)
{
- is_second_R1 = 0; is_second_R2 = 0;
- rl1 = geinput_next_read_trim(ginp1, read_name_1, read_text_1 , qual_text_1, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R1);
- if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
+ do
{
- if(isalpha(read_text_1[0]))
+ is_second_R1 = 0; is_second_R2 = 0;
+ rl1 = geinput_next_read_trim(ginp1, read_name_1, read_text_1 , qual_text_1, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R1);
+ //SUBREADprintf("%s LEN=%d\n", read_name_1, rl1);
+ if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
{
- int xk1;
- for(xk1=2; read_text_1[xk1]; xk1++)
- read_text_1[xk1-2]=read_text_1[xk1];
- read_text_1[xk1-2]=0;
+ if(isalpha(read_text_1[0]))
+ {
+ int xk1;
+ for(xk1=2; read_text_1[xk1]; xk1++)
+ read_text_1[xk1-2]=read_text_1[xk1];
+ read_text_1[xk1-2]=0;
+ }
}
- }
- if(ginp2)
- {
- rl2 = geinput_next_read_trim(ginp2, read_name_2, read_text_2 , qual_text_2, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R2);
- if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
+ if(ginp2)
{
- if(isalpha(read_text_2[0]))
+ rl2 = geinput_next_read_trim(ginp2, read_name_2, read_text_2 , qual_text_2, global_context->config.read_trim_5, global_context->config.read_trim_3, &is_second_R2);
+ if(global_context->config.space_type == GENE_SPACE_COLOR && remove_color_head)
{
- int xk1;
- for(xk1=2; read_text_2[xk1]; xk1++)
- read_text_2[xk1-2]=read_text_2[xk1];
- read_text_2[xk1-2]=0;
+ if(isalpha(read_text_2[0]))
+ {
+ int xk1;
+ for(xk1=2; read_text_2[xk1]; xk1++)
+ read_text_2[xk1-2]=read_text_2[xk1];
+ read_text_2[xk1-2]=0;
+ }
}
}
+ if(rl1 <= 0 || (rl2 <= 0 && ginp2)) break;
+ } while(is_second_R1||is_second_R2) ;
+ if(rl1 >0 || (rl2 > 0 && ginp2)){
+ this_number = global_context -> running_processed_reads_in_chunk;
+ global_context -> running_processed_reads_in_chunk ++;
}
- } while(is_second_R1||is_second_R2) ;
+ }
+ subread_lock_release(&global_context -> input_reads.input_lock);
if( global_context->config.space_type == GENE_SPACE_COLOR)
{
@@ -882,7 +891,7 @@ int fetch_next_read_pair(global_context_t * global_context, thread_context_t * t
}
- if(rl1>0 && (rl2>0 || !ginp2))
+ if(rl1>0 && (rl2>0 || !ginp2) && this_number>=0)
{
if(global_context->config.is_first_read_reversed)
{
@@ -898,21 +907,29 @@ int fetch_next_read_pair(global_context_t * global_context, thread_context_t * t
reverse_quality(qual_text_2, rl2);
}
+ *read_no_in_chunk = this_number;
*read_len_1 = rl1;
if(ginp2)
*read_len_2 = rl2;
return 0;
}
- else return 1;
+ else{
+ *read_no_in_chunk = -1;
+ return 1;
+ }
}
int write_final_results(global_context_t * context)
{
+
+ if(context -> config.do_fusion_detection && context -> config.do_structural_variance_detection)
+ finalise_structural_variances(context);
+
if(context -> config.output_prefix[0])
{
write_indel_final_results(context);
- if(context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC)
+ if(context -> config.entry_program_name == CORE_PROGRAM_SUBJUNC && (context -> config.prefer_donor_receptor_junctions||!context -> config.do_fusion_detection))
write_junction_final_results(context);
if(context -> config.do_fusion_detection)
@@ -946,22 +963,24 @@ typedef struct{
char current_cigar_decompress[CORE_MAX_CIGAR_STR_LEN + 1];
char cigar [CORE_MAX_CIGAR_STR_LEN];
+ unsigned short chimeric_sections;
unsigned int out_poses[CIGAR_PERFECT_SECTIONS];
short out_lens[CIGAR_PERFECT_SECTIONS];
char out_cigars[CIGAR_PERFECT_SECTIONS][60];
char out_strands[CIGAR_PERFECT_SECTIONS];
char additional_information[CORE_ADDITIONAL_INFO_LENGTH + 1];
- alignment_result_t * raw_result;
+ mapping_result_t * raw_result;
unsigned int linear_position;
- unsigned int raw_linear;
short soft_clipping_movements;
char * chro;
unsigned int offset;
int strand;
+ int is_first_section_jumpped;
int mapping_quality;
+ int is_NM_appied;
}subread_output_tmp_t;
typedef struct{
@@ -970,33 +989,33 @@ typedef struct{
subread_output_tmp_t *r1;
subread_output_tmp_t *r2;
subread_output_tmp_t ** out_pairs;
- alignment_result_t ** out_raws;
+ mapping_result_t ** out_raws;
} subread_output_context_t;
-void init_output_context(global_context_t * global_context ,subread_output_context_t * out_context)
+void init_output_context(global_context_t * global_context, subread_output_context_t * out_context)
{
int xk1;
- out_context -> r1 = malloc(sizeof(subread_output_tmp_t) *global_context->config.multi_best_reads );
+ memset(out_context, 0, sizeof(subread_output_context_t));
+ out_context -> r1 = malloc(sizeof(subread_output_tmp_t));
for(xk1=0;xk1<CIGAR_PERFECT_SECTIONS;xk1++)
out_context -> out_cigar_buffer[xk1] = malloc(60);
out_context -> out_pairs = malloc(sizeof( subread_output_context_t *) * global_context->config.multi_best_reads * 2);
- out_context -> out_raws = malloc(sizeof( subread_output_context_t *) * global_context->config.multi_best_reads * 2);
+ out_context -> out_raws = malloc(sizeof( mapping_result_t * *) * global_context->config.multi_best_reads * 2);
if(global_context -> input_reads.is_paired_end_reads)
{
out_context -> PE_distance = malloc(sizeof(long long) * global_context->config.multi_best_reads);
- out_context -> r2 = malloc(sizeof(subread_output_tmp_t) *global_context->config.multi_best_reads );
- }
- else{
+ out_context -> r2 = malloc(sizeof(subread_output_tmp_t));
+ } else {
out_context -> PE_distance = NULL;
out_context -> r2 = NULL;
}
}
-void destroy_output_context(global_context_t * global_context ,subread_output_context_t * out_context)
+void destroy_output_context(global_context_t * global_context , subread_output_context_t * out_context)
{
int xk1;
for(xk1=0;xk1<CIGAR_PERFECT_SECTIONS;xk1++)
@@ -1012,16 +1031,10 @@ void destroy_output_context(global_context_t * global_context ,subread_output_co
}
}
-int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, alignment_result_t * result, int rlen);
-int calc_edit_dist(global_context_t * global_context, alignment_result_t * current_result, char * cigar , unsigned int pos , char * read_text)
+int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, mapping_result_t * result, int rlen);
+int calc_edit_dist(global_context_t * global_context, mapping_result_t * current_result, char * cigar , unsigned int pos , char * read_text, int all_mm)
{
- locate_current_value_index(global_context, NULL, current_result, 1);
- gene_value_index_t * current_value_index = global_context->current_value_index;
-
- int cigar_cursor=0;
- unsigned int chro_cursor = pos, tmpi=0;
- int read_cursor = 0;
- int all_mm = 0;
+ unsigned int cigar_cursor = 0, tmpi=0;
while(1)
{
@@ -1033,28 +1046,10 @@ int calc_edit_dist(global_context_t * global_context, alignment_result_t * curre
tmpi = tmpi*10+nch-'0';
}
else{
- if(nch == 'M')
- {
- int matched = match_chro(read_text + read_cursor, current_value_index, chro_cursor, tmpi, 0, global_context -> config.space_type);
- all_mm += tmpi - matched;
- chro_cursor += tmpi;
- read_cursor += tmpi;
- }
- else if(nch == 'N' || nch == 'D')
- {
- if('D' == nch) all_mm+=tmpi;
- chro_cursor += tmpi;
- }
- else if(nch == 'I')
+ if(nch == 'I' || nch == 'D')
{
- read_cursor += tmpi;
all_mm+=tmpi;
}
- else if(nch == 'S')
- {
- chro_cursor += tmpi;
- read_cursor += tmpi;
- }
tmpi = 0;
}
@@ -1062,74 +1057,80 @@ int calc_edit_dist(global_context_t * global_context, alignment_result_t * curre
return all_mm;
}
-int convert_read_to_tmp(global_context_t * global_context , subread_output_context_t * output_context, int read_number, int is_second_read, int read_len, char * read_text, char * qual_text, alignment_result_t * current_result, subread_output_tmp_t * r)
+unsigned int move_to_read_head(unsigned int tailpos, char * cigar){
+ int cigar_i = 0, nch;
+ unsigned int tmpi = 0;
+ while(0<(nch = cigar[cigar_i++])){
+ if(isdigit(nch)){
+ tmpi = tmpi * 10 + nch - '0';
+ }else{
+ if(nch == 'N' || nch == 'M' || nch == 'D') tailpos -= tmpi;
+ tmpi = 0;
+ }
+ }
+ return tailpos;
+}
+
+int convert_read_to_tmp(global_context_t * global_context , subread_output_context_t * output_context, int read_number, int is_second_read, int read_len, char * read_text, char * qual_text, realignment_result_t * current_result, subread_output_tmp_t * r, char * read_name)
{
int is_r_OK;
- r -> raw_result = current_result;
+ r -> raw_result = current_result -> mapping_result;
r -> additional_information[0]=0;
- is_r_OK = (current_result -> result_flags & CORE_IS_FULLY_EXPLAINED) > 0;
+ is_r_OK = (current_result -> mapping_result -> result_flags & CORE_IS_FULLY_EXPLAINED) > 0;
if(is_r_OK)
- if((current_result->result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
+ if((current_result -> mapping_result -> result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
is_r_OK = 0;
+ if(0 && FIXLENstrcmp("V0112_0155:7:1101:5279:29143#ATCACG", read_name) == 0)
+ SUBREADprintf("%s R_%d CPOINT1 : is_OK=%d\n", read_name , is_second_read + 1 , is_r_OK);
- if(is_r_OK){
-
- int current_strand = (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
- int is_first_section_jumped = 0;
+ if(is_r_OK) {
-
- if( current_result -> cigar_string[0] == -1)
- {
- bincigar2cigar( r-> current_cigar_decompress, CORE_MAX_CIGAR_STR_LEN, current_result -> cigar_string + 1, CORE_MAX_CIGAR_LEN, read_len);
- is_first_section_jumped = 1;
- }
- else
- bincigar2cigar( r-> current_cigar_decompress, CORE_MAX_CIGAR_STR_LEN, current_result -> cigar_string, CORE_MAX_CIGAR_LEN, read_len);
+ int current_strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ int is_first_section_jumped = current_result -> first_base_is_jumpped;
+ if(is_first_section_jumped) assert(global_context -> config.do_fusion_detection);
+ strcpy(r-> current_cigar_decompress, current_result -> cigar_string);
int chimeric_sections = 0;
- int current_repeated_times;
- current_repeated_times = is_ambiguous_voting(global_context, read_number, is_second_read, current_result->selected_votes, current_result->confident_coverage_start, current_result->confident_coverage_end, read_len, current_strand);
-
- r->raw_linear = current_result -> selected_position;
- r->linear_position = current_result -> selected_position;
+ r->is_first_section_jumpped = is_first_section_jumped;
+ r->linear_position = current_result -> first_base_position;
r->mapping_quality = current_result -> final_quality;
- //if(current_repeated_times>1) r->mapping_quality = 0;
- if(current_result->result_flags & CORE_IS_BREAKEVEN) r->mapping_quality = 0;
+ if(current_result -> mapping_result -> result_flags & CORE_IS_BREAKEVEN)
+ r ->mapping_quality = 0;
strcpy(r->cigar, r -> current_cigar_decompress);
- r->strand = (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
+ r->strand = (current_result -> mapping_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
//sprintf(r->additional_information, "\tSM:i:%d",current_result -> final_mismatched_bases);
//printf("SM='%s'\n", r->additional_information);
- //#warning "THIS LINE CAN WORK VERY BADLY!!! DO NOT INCLUDE IT IN THE RELEASE UNTIL IT IS FULLY TESTED!"
- if(global_context -> config.SAM_extra_columns)
- sprintf(r->additional_information + strlen( r->additional_information), "\tSB:i:%d\tSC:i:%d\tSD:i:%d\tSN:i:%u\tSP:Z:%s", current_result -> used_subreads_in_vote, current_result -> selected_votes, current_result -> noninformative_subreads_in_vote, read_number, (current_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
-
if(global_context -> config.do_fusion_detection)
{
- chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped , r->current_cigar_decompress , r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
+ chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
int xk1;
+ r->chimeric_sections = chimeric_sections;
strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
for(xk1=1; xk1<chimeric_sections; xk1++)
{
unsigned int chimeric_pos;
char * chimaric_chr;
strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
+ unsigned int vitual_head_pos = r->out_poses[xk1];
+ char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
- if(0==locate_gene_position_max(r->out_poses[xk1],& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, 0+r->out_lens[xk1]))
+ //if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
+
+ if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, 0+r->out_lens[xk1]))
{
int soft_clipping_movement = 0;
soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
- char strand_xor = (r->out_strands[xk1] == '-');
assert(chimaric_chr);
sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , chimeric_pos + soft_clipping_movement + 1, strand_xor?'-':'+' , chimaric_chr );
}
@@ -1142,10 +1143,6 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
}
r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
}
- else if(global_context -> config.SAM_extra_columns)
- {
- sprintf(r->additional_information + strlen( r->additional_information), "\tSB:i:%d\tSC:i:%d\tSD:i:%d\tSN:i:%u\tSP:Z:%s", current_result -> used_subreads_in_vote, current_result -> selected_votes, current_result -> noninformative_subreads_in_vote, read_number, (current_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
- }
if(is_r_OK)
{
@@ -1159,9 +1156,9 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
assert(r-> chro);
}
- if(global_context -> config.is_rna_seq_reads && !(current_result -> result_flags & CORE_NOTFOUND_DONORS))
+ if(global_context -> config.do_breakpoint_detection && !(current_result -> realign_flags & CORE_NOTFOUND_DONORS))
{
- sprintf(r->additional_information + strlen(r->additional_information), "\tXS:A:%c", (current_result -> result_flags & CORE_IS_GT_AG_DONORS)?'+':'-');
+ sprintf(r->additional_information + strlen(r->additional_information), "\tXS:A:%c", (current_result -> realign_flags & CORE_IS_GT_AG_DONORS)?'+':'-');
}
/*
@@ -1178,207 +1175,140 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
}
-int calculate_fragment_combinations(global_context_t * global_context, subread_output_context_t * out_context, int read_number , int read_len_1, int read_len_2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2)
-{
- int mapping_location_number;
-
- // Features that need attention:
-
- // (1) fusion cigar split
- // (2) soft-clipping moving mapping location
- // (3) making the two ends to have the best strand: if both ends mapped, as what they should be;
- // if only one end is mapped: the other end is flipped to the same strand.
- // (4) ambiguous voting
-
- // Additional criteria:
- // (1) a read must be entirely in a chromosome, or it is reported as unmapped.
-
- // The return value is the number of locations.
- // It must be greater than 0, even both reads were unmapped.
- int read_1_locations = 0;
- int read_2_locations = 0;
-
- if(global_context -> config.report_multiple_best_in_pairs)
- {
- int retX = 0, is_two_read_mapped = 0;
- int is_2_OK = 0, is_1_OK = 0;
+int add_event_detected_from_cigar(global_context_t * global_context, unsigned int left_last_base, unsigned int right_first_base, int is_indel, int indel_len, int left_extend, int right_extend, int read_length, char * read_name)
+{
+ HashTable * event_table = NULL;
+ chromosome_event_t * event_space = NULL;
+ event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_entry_table;
+ event_space = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID]) -> event_space_dynamic;
+ chromosome_event_t *site_events[MAX_EVENT_ENTRIES_PER_SITE];
+ int search_types ;
+ if(is_indel) search_types = CHRO_EVENT_TYPE_INDEL;
+ else search_types = CHRO_EVENT_TYPE_JUNCTION | CHRO_EVENT_TYPE_FUSION;
- for( mapping_location_number = 0 ; mapping_location_number < global_context->config.multi_best_reads; mapping_location_number++)
- {
- assert(global_context->input_reads.is_paired_end_reads); // this function is not provided for the public.
- alignment_result_t * current_result = _global_retrieve_alignment_ptr(global_context , read_number, 0, mapping_location_number);
- is_1_OK = convert_read_to_tmp(global_context, out_context, read_number , 0 , read_len_1, read_text_1, qual_text_1, current_result, &out_context->r1[read_1_locations]);
+ if(left_last_base > right_first_base) {
+ unsigned int ttt;
+ ttt = right_first_base;
+ right_first_base = left_last_base;
+ left_last_base = ttt;
+ }
- alignment_result_t * current_result2 = _global_retrieve_alignment_ptr(global_context , read_number, 1, mapping_location_number);
- is_2_OK = convert_read_to_tmp(global_context , out_context, read_number , 1, read_len_2, read_text_2, qual_text_2, current_result2, &out_context->r2[read_2_locations]);
+ int xk1, site_events_no = search_event(global_context, event_table , event_space , left_last_base, EVENT_SEARCH_BY_BOTH_SIDES , search_types , site_events);
-
- if(mapping_location_number == 0)
- is_two_read_mapped = is_1_OK && is_2_OK;
+ int is_found = 0;
+ for(xk1 = 0; xk1 < site_events_no; xk1++){
+ chromosome_event_t * this_event = site_events[xk1];
+ if(is_indel && this_event -> event_type != CHRO_EVENT_TYPE_INDEL) continue;
+ if((!is_indel) && this_event -> event_type == CHRO_EVENT_TYPE_INDEL) continue;
- if(mapping_location_number == 0 || (is_1_OK && is_2_OK))
- retX++;
- else break;
+ if(0 && ( memcmp(read_name, "V0112_0155:7:1104:17648:117432",28)==0 || ( this_event -> event_small_side > 2613701363 - 200 && this_event -> event_small_side < 2613701363 + 100) ))
+ SUBREADprintf("EVENT: L=%u, R=%u (TABLE L=%u , R=%u), LEXT=%d, REXT=%d -- %s\n", left_last_base, right_first_base, this_event -> event_small_side, this_event -> event_large_side, left_extend, right_extend, read_name);
- if(is_1_OK)
- {
- out_context -> out_pairs[mapping_location_number*2 ] = out_context->r1 + read_1_locations;
- assert(out_context -> out_pairs[mapping_location_number*2 ]->chro);
- }
- if(is_2_OK)
- {
- out_context -> out_pairs[mapping_location_number*2+1] = out_context->r2 + read_2_locations;
- assert(out_context -> out_pairs[mapping_location_number*2+1]->chro);
+ if(this_event -> event_small_side == left_last_base && this_event -> event_large_side == right_first_base){
+ if(is_indel && this_event -> indel_length == indel_len){
+ this_event -> final_counted_reads ++;
+ is_found = 1;
+ break;
}
+ if(!is_indel){
+ this_event -> final_counted_reads ++;
- if(global_context -> config.SAM_extra_columns)
- {
- out_context -> out_raws[mapping_location_number*2] = current_result;
- out_context -> out_raws[mapping_location_number*2+1] = current_result2;
+ //
+ if(read_length >= 80)
+ {
+ if(left_extend > 30 && right_extend > 30) this_event -> critical_supporting_reads ++;
+ }else{
+ if(left_extend > 18 && right_extend > 18) this_event -> critical_supporting_reads ++;
+ }
+ this_event -> junction_flanking_left = max(this_event -> junction_flanking_left, left_extend);
+ this_event -> junction_flanking_right = max(this_event -> junction_flanking_right, right_extend);
+ is_found = 1;
+ break;
}
-
- if(is_1_OK)read_1_locations++;
- if(is_2_OK)read_2_locations++;
-
- if(!is_two_read_mapped) break;
}
-
-
- return retX;
}
- // First, counting the number of mapping locations for each end.
- //
- for( mapping_location_number = 0 ; mapping_location_number < global_context->config.multi_best_reads; mapping_location_number++)
- {
- alignment_result_t * current_result = _global_retrieve_alignment_ptr(global_context , read_number, 0, mapping_location_number);
-
- int is_2_OK = 0, is_1_OK = 0, xx1, is_fresh;
-
- is_fresh = 1;
- for(xx1=0;xx1 < read_1_locations; xx1++)
- {
-
- if(current_result -> selected_position == out_context->r1[xx1].raw_linear)
- is_fresh = 0;
- }
-
- //printf("SB:i:%d\tSC:i:%d\tSD:i:%d\tSP:Z:%s\n", current_result -> used_subreads_in_vote, current_result -> selected_votes, current_result -> noninformative_subreads_in_vote, (current_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
- if(is_fresh)
- is_1_OK = convert_read_to_tmp(global_context, out_context, read_number , 0 , read_len_1, read_text_1, qual_text_1, current_result, &out_context->r1[read_1_locations]);
-
- if(global_context->input_reads.is_paired_end_reads)
- {
- alignment_result_t * current_result2 = _global_retrieve_alignment_ptr(global_context , read_number, 1, mapping_location_number);
-
-
- is_fresh = 1;
- for(xx1=0;xx1 < read_2_locations; xx1++)
- if(current_result2 -> selected_position == out_context->r2[xx1].linear_position)
- is_fresh = 0;
-
- if(is_fresh)
- is_2_OK = convert_read_to_tmp(global_context , out_context, read_number , 1, read_len_2, read_text_2, qual_text_2, current_result2, &out_context->r2[read_2_locations]);
-
- if(is_2_OK)read_2_locations++;
- }
- //printf("L1,2_OK = %d, %d\n", is_1_OK, is_2_OK);
-
- if(is_1_OK)read_1_locations++;
-
- if(! ( is_2_OK || is_1_OK))
- break;
+ if(!is_found){
+ //SUBREADprintf("\nEVENT NOT FOUND!\n\n");
+ return 1;
}
+ return 0;
+}
-
- // now all the potential locations were written into out_context->r1 and out_context->r2.
- // we find the best combinations between out_context->r1 and out_context->r2 now.
+int get_junction_right_extension(char * remainder_cigar){
int ret = 0;
- if(global_context-> config.multi_best_reads == 1)
- {
- out_context -> out_pairs[0] = read_1_locations?out_context -> r1 : NULL;
- out_context -> out_pairs[1] = read_2_locations?out_context -> r2 : NULL;
- ret = 1;
- }
- else if(global_context->input_reads.is_paired_end_reads)
- {
- int r1_xx, r2_xx, yy, zz;
- memset(out_context->PE_distance, 0x7F, sizeof(long long) * global_context->config.multi_best_reads);
-
-
- out_context -> out_pairs[0] = NULL;
- out_context -> out_pairs[1] = NULL;
+ unsigned int tmpi = 0;
+ int cigar_cursor, nch;
- if(read_1_locations > 0 && read_2_locations > 0)
- {
- for(r1_xx=0; r1_xx < read_1_locations; r1_xx ++)
- for(r2_xx=0; r2_xx < read_2_locations; r2_xx ++)
- {
- long long int distance = 0x100000000ll;
+ for(cigar_cursor = 0;;cigar_cursor++){
+ nch = remainder_cigar[cigar_cursor];
+ if(0 == nch)break;
+ if(isdigit(nch))tmpi = tmpi*10 + nch - '0';
+ else{
+ if(nch == 'M' || nch == 'D')
+ ret += tmpi;
+ if(nch == 'N' || nch == 'n' || nch == 'B' || nch == 'b')
+ break;
- // the chro in the data structure is a pointer to the offset table; they can be compaired directly.
- if(out_context->r1 [r1_xx] . chro == out_context->r2 [r2_xx].chro)
- {
- distance = out_context->r1 [r1_xx] . linear_position;
- distance -= out_context->r2 [r2_xx] . linear_position;
- if(0>distance) distance=-distance;
- }
+ tmpi = 0;
+ }
+ }
+ return ret;
+}
- if(distance < out_context -> PE_distance[global_context->config.multi_best_reads - 1])
- {
- for(yy = 0; yy < global_context->config.multi_best_reads; yy++)
- {
- if(distance < out_context -> PE_distance[yy])
- break;
- }
+void remove_soft_clipping(char * dst, char * src){
+ unsigned int tmpi = 0, head_clip = 0, tail_clip = 0, last_M = 0;
+ int cigar_cursor, is_first_s = 1, nch, nch2;
- // now replace the 2*yy-th and (2*yy+1)-th items in array output_records with the current couple.
+ dst[0] = 0;
- for(zz = global_context->config.multi_best_reads - 2 ; zz >= yy; zz--)
- {
- out_context -> PE_distance[zz+1] = out_context -> PE_distance[zz];
- memcpy(&(out_context -> out_pairs[zz*2 + 2]), &(out_context -> out_pairs[zz*2]), 2*sizeof(void *));
- }
- if(yy < global_context->config.multi_best_reads)
- {
- out_context -> PE_distance[yy] = distance;
- out_context -> out_pairs[yy*2] = out_context -> r1 + r1_xx;
- out_context -> out_pairs[yy*2+1] = out_context -> r2 + r2_xx;
+ for(cigar_cursor = 0;;cigar_cursor++){
+ nch = src[cigar_cursor];
+ nch2 = src[cigar_cursor+1];
- ret ++;
- }
- if(ret > global_context->config.multi_best_reads) ret = global_context->config.multi_best_reads;
- }
+ if(0 == nch)break;
+ if(isdigit(nch))tmpi = tmpi*10 + nch - '0';
+ else{
+ if('S' == nch){
+ if(is_first_s) head_clip = tmpi;
+ if(nch2 == 0) tail_clip = tmpi;
+ }
+ else if('M' == nch){
+ last_M = tmpi;
+ }
+ else{
+ if(last_M){
+ sprintf(dst + strlen(dst), "%uM", last_M + head_clip);
+ last_M = 0;
+ head_clip = 0;
}
- }
- else
- {
- for(yy = 0; yy < max(read_1_locations, read_2_locations); yy++)
- {
- out_context -> out_pairs[yy*2 ] = read_1_locations == 0? NULL : out_context -> r1 + yy;
- out_context -> out_pairs[yy*2+1] = read_2_locations == 0? NULL : out_context -> r2 + yy;
+ sprintf(dst + strlen(dst), "%u%c", tmpi, nch);
}
- ret = max(read_1_locations, read_2_locations);
+ tmpi=0;
+ is_first_s = 0;
}
-
-// if(memcmp("V0112_0155:7:1102:10778:2461", read_name_1, 27)==0)
-// SUBREADprintf("NNNLLL=%d,%d,%d\n", read_1_locations, read_2_locations, ret);
}
- else
- {
- int yy;
- ret = read_1_locations;
- for(yy = 0; yy < read_1_locations; yy++)
- out_context -> out_pairs[yy ] = out_context -> r1 +yy;
+ if(last_M){
+ sprintf(dst + strlen(dst), "%uM" , last_M + tail_clip + head_clip);
+ }
+
+}
+
+int getFirstM(char * cig){
+ int tmpi = 0, nch, x1;
+
+ for(x1=0; 0!=(nch = cig[x1]) ; x1++){
+ if(isdigit(nch)){
+ tmpi=tmpi*10 + nch - '0';
+ }else break;
}
- //assert(ret <=global_context->config.multi_best_reads);
- return max(1, ret);
+ return tmpi;
}
int calc_tlen(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int read_len_1, int read_len_2);
-int calc_flags(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int is_second_read, int read_len_1, int read_len_2, int current_location_no)
+int calc_flags(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int is_second_read, int read_len_1, int read_len_2, int current_location_no , int tlen)
{
int ret;
@@ -1400,7 +1330,7 @@ int calc_flags(global_context_t * global_context, subread_output_tmp_t * rec1 ,
if(rec1 && rec2)
{
- int TLEN = calc_tlen(global_context , rec1, rec2, read_len_1, read_len_2);
+ int TLEN = tlen;//calc_tlen(global_context , rec1, rec2, read_len_1, read_len_2);
int is_PEM = 0;
if(TLEN >= global_context->config. minimum_pair_distance && TLEN <= global_context-> config.maximum_pair_distance && this_rec->strand == mate_rec->strand)
{
@@ -1447,16 +1377,76 @@ int calc_flags(global_context_t * global_context, subread_output_tmp_t * rec1 ,
int calc_tlen(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int read_len_1, int read_len_2)
{
- long long int ret = rec1->offset;
- ret -= rec2->offset;
- if(ret<0)ret=-ret;
+ int ret = -1;
+
+ unsigned int r1_head_pos = rec1 -> offset;// - rec1 -> soft_clipping_movements;
+ unsigned int r2_head_pos = rec2 -> offset;// - rec2 -> soft_clipping_movements;
+
+ if(r1_head_pos == r2_head_pos)
+ ret = max(read_len_1, read_len_2); // the two reads are fully overlapping
+ else{
+ int is_r2_smaller = r2_head_pos < r1_head_pos;
+ subread_output_tmp_t * smaller_rec = is_r2_smaller?rec2:rec1;
+ unsigned int smaller_head_pos = is_r2_smaller?r2_head_pos:r1_head_pos;
+ unsigned int larger_head_pos = is_r2_smaller?r1_head_pos:r2_head_pos;
+
+ int len_larger_rec = is_r2_smaller?read_len_1:read_len_2;
+ int len_smaller_rec = is_r2_smaller?read_len_2:read_len_1;
+
+ unsigned int tmpi = 0;
+ unsigned int chro_cursor = smaller_head_pos;
+ unsigned int section_start = smaller_head_pos, section_end = 0;
+ unsigned int read_cursor = 0;
+ int cigar_cursor, nch, nch2;
+ for(cigar_cursor = 0; ; cigar_cursor++){
+ nch = smaller_rec -> cigar[cigar_cursor];
+ nch2 = smaller_rec -> cigar[cigar_cursor+1];
+
+ if(nch > 0){
+ if(isdigit(nch)) tmpi = tmpi * 10 + nch - '0';
+ else{
+ if(nch == 'M' || nch == 'S'){
+ chro_cursor += tmpi;
+ read_cursor += tmpi;
+ section_end = chro_cursor;
+ }
+ if(nch == 'N' || nch == 'B' || nch == 'b' || nch == 'n' || nch == 'I' || nch == 'D' || nch2 == 0){
+ if(nch == 'N' || nch == 'D')
+ chro_cursor += tmpi;
+
+ //SUBREADprintf("NCH=%c, NCH2=%d, SEC_END=%u, LARGE_HEAD=%u\n", nch, nch2, section_end, larger_head_pos);
+ if(section_end >= larger_head_pos)
+ {
+ ret = read_cursor + larger_head_pos - section_end + len_larger_rec;
+ break;
+ }
+ section_start = chro_cursor;
+ }
+
+ if(nch == 'I'){
+ read_cursor += tmpi;
+ }
+
+ if(nch == 'B' || nch == 'b' || nch == 'n') break; // fusion block, unable to determine the TLEN after the fusion point.
+
+ tmpi = 0;
+ }
+ }else{
+ break;
+ }
- if(rec1->offset - rec1->soft_clipping_movements < rec2->offset - rec2->soft_clipping_movements ) ret += read_len_2;
- else ret += read_len_1;
+ }
+
+ if(ret < 0)
+ ret = larger_head_pos - section_end + len_larger_rec + len_smaller_rec;
+ }
+
+
+ //SUBREADprintf("TLEN = %d :: \tREC1 : %s:%u (%s - %d) ; REC2: %s:%u (%s - %d )\n", ret, rec1 -> chro, rec1 -> offset, rec1 -> cigar, rec1 -> soft_clipping_movements,
+ // rec2 -> chro, rec2 -> offset, rec2 -> cigar, rec2 -> soft_clipping_movements);
+
+ return ret;
- ret = max(read_len_1, ret);
- ret = max(read_len_2, ret);
- return (int)ret;
}
int calc_should_reverse(global_context_t * global_context, subread_output_tmp_t * rec1 , subread_output_tmp_t * rec2, int is_second_read)
@@ -1500,63 +1490,252 @@ void remove_nm_i(char * st)
}
}
-#define write_chunk_results_145 write_chunk_results
+void write_buffered_output_file(global_context_t *global_context, output_fragment_buffer_t * rec){
+
+ if(global_context -> config.is_BAM_output){
+ SamBam_writer_add_read(global_context -> output_bam_writer, rec->r1.read_name, rec->r1.flags, rec->r1.chro_name , rec->r1.location , rec->r1.map_quality, rec->r1.cigar, rec->r1.other_chro_name , rec->r1.other_location, rec->r1.tlen, rec->r1.rlen, rec->r1.read_text, rec->r1.qual_text, rec->r1.additional_columns);
-// rec1 or rec2 is OK if they are not NULL.
-void write_single_fragment(global_context_t * global_context, subread_output_tmp_t * rec1, alignment_result_t * raw_r1, subread_output_tmp_t * rec2, alignment_result_t * raw_r2, int all_locations , int current_location , char * read_name_1, char * read_name_2, int read_len_1, int read_len_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2, int * is_read1_reversed, int * is_read2_reversed )
-{
+ if(global_context->input_reads.is_paired_end_reads)
+ SamBam_writer_add_read(global_context -> output_bam_writer, rec->r2.read_name, rec->r2.flags, rec->r2.chro_name , rec->r2.location , rec->r2.map_quality, rec->r2.cigar, rec->r2.other_chro_name , rec->r2.other_location, rec->r2.tlen, rec->r2.rlen, rec->r2.read_text, rec->r2.qual_text, rec->r2.additional_columns);
+ }
+ else
+ {
+ sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", rec->r1.read_name, rec->r1.flags, rec->r1.chro_name, rec->r1.location, rec->r1.map_quality, rec->r1.cigar, rec->r1.other_chro_name, rec->r1.other_location, rec->r1.tlen, rec->r1.read_text, rec->r1.qual_text, rec->r1.additional_columns[0]?"\t":"", rec->r1.additional_columns);
+ if(global_context->input_reads.is_paired_end_reads)
+ sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", rec->r2.read_name, rec->r2.flags, rec->r2.chro_name, rec->r2.location, rec->r2.map_quality, rec->r2.cigar, rec->r2.other_chro_name, rec->r2.other_location, rec->r2.tlen, rec->r2.read_text, rec->r2.qual_text, rec->r2.additional_columns[0]?"\t":"", rec->r2.additional_columns);
+ }
+}
-// if( all_locations < 2) return;
+void merge_buffered_output_file(global_context_t *global_context, int need_lock, int my_thread_no, int * all_threads_finished){
+ thread_context_t * thread_contexts = global_context -> all_thread_contexts;
+ //SUBREADprintf("merge_start: lock=%d, thread=%d, remain_item=%d\n", need_lock, my_thread_no, thread_contexts[my_thread_no].output_buffer_item);
+ int current_thread_no;
- int flag1 = calc_flags( global_context , rec1, rec2, 0, read_len_1, read_len_2, current_location);
+ if(need_lock){
+ for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++){
+ if(my_thread_no != current_thread_no) {
+ thread_context_t * current_thread = thread_contexts + current_thread_no;
+ subread_lock_occupy(¤t_thread -> output_lock);
+ }
+ }
+ }
- int flag2 = -1;
+ while(1){
+ int has_found = 0;
- if(global_context->input_reads.is_paired_end_reads)
- {
- flag2 = calc_flags( global_context , rec1, rec2, 1, read_len_1, read_len_2, current_location);
- if((0 == current_location) && (flag2 & SAM_FLAG_MATCHED_IN_PAIR)) global_context->all_correct_PE_reads ++;
- }
+ (*all_threads_finished) = 1;
+ for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++){
+ thread_context_t * current_thread = thread_contexts + current_thread_no;
+ if(current_thread_no>0 && !current_thread->is_finished)
+ (*all_threads_finished) = 0;
- int tlen = 0;
+ if( current_thread -> output_buffer_item > 0 ){
+ int earliest_frag_index = current_thread -> output_buffer_pointer - current_thread -> output_buffer_item;
+ if(earliest_frag_index < 0) earliest_frag_index += MULTI_THREAD_OUTPUT_ITEMS * global_context -> config.reported_multi_best_reads;
- // rec -> chro is a pointer to the offset table; the pointers can be compared.
- if(rec1 && rec2 && rec1->chro == rec2->chro)tlen = calc_tlen(global_context , rec1, rec2, read_len_1, read_len_2);
+ output_fragment_buffer_t * src = current_thread -> output_buffer + earliest_frag_index;
+ subread_read_number_t earliest_frag_number = src -> fragment_number_in_chunk;
+ //if(161430 <= earliest_frag_number) SUBREADprintf("The %d-th thread has earlist = %u ; want %u\n", current_thread_no, earliest_frag_number , global_context -> last_written_fragment_number);
+ if(earliest_frag_number <= global_context -> last_written_fragment_number){
+ int need_more = max(1, src -> multi_mapping_locations);
+ need_more -= src -> this_mapping_location;
+
+ if(0 && src -> this_mapping_location < src -> multi_mapping_locations)
+ {
+ int next_frag_index = (earliest_frag_index == MULTI_THREAD_OUTPUT_ITEMS * global_context -> config.reported_multi_best_reads - 1)?0:earliest_frag_index + 1;
+ output_fragment_buffer_t * nextsrc = current_thread -> output_buffer + next_frag_index;
+ if(nextsrc -> fragment_number_in_chunk != src -> fragment_number_in_chunk){
+ output_fragment_buffer_t * prevsrc = current_thread -> output_buffer + earliest_frag_index - 1;
+ SUBREADprintf("RN=%s , %s, THIS: %d/%d, R-1=%u R1=%u, R2=%u, REMAIN=%d, THIS_IDX=%d\n", src -> r1.read_name, nextsrc -> r1.read_name, src -> this_mapping_location , src -> multi_mapping_locations, prevsrc-> fragment_number_in_chunk, src -> fragment_number_in_chunk, nextsrc -> fragment_number_in_chunk, current_thread -> output_buffer_item, earliest_frag_index);
+ }
+ assert(nextsrc -> fragment_number_in_chunk == src -> fragment_number_in_chunk);
+ }
+ //if(161430 <= earliest_frag_number) SUBREADprintf("WRITTEN [%u]? %d/%d MORE=%d\n", earliest_frag_number, src -> this_mapping_location , src -> multi_mapping_locations,need_more);
+ if(current_thread -> output_buffer_item >= need_more){
+ //SUBREADprintf("merge: %d >= 1 ? this=%d, all=%d \n", src -> this_mapping_location, need_more, src -> this_mapping_location , src -> multi_mapping_locations);
+ if(need_more <= 1)
+ global_context -> last_written_fragment_number = earliest_frag_number + 1;
+
+ write_buffered_output_file(global_context, src);
+ current_thread -> output_buffer_item --;
+ has_found = 1;
+ }
+ }
+ }
+ }
+ if(0==has_found)
+ break;
+ }
- int applied_reverse_space;
- applied_reverse_space = global_context->config.space_type;
- if(global_context -> config.convert_color_to_base)
- {
- colorread2base(read_text_1, read_len_1+1);
- if(global_context->input_reads.is_paired_end_reads)
- colorread2base(read_text_2, read_len_2+1);
- applied_reverse_space = GENE_SPACE_BASE;
+ if(need_lock){
+ for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++){
+ if(my_thread_no != current_thread_no) {
+ thread_context_t * current_thread = thread_contexts + current_thread_no;
+ subread_lock_release(¤t_thread -> output_lock);
+ }
+ }
}
+ global_context -> need_merge_buffer_now = 0;
- int should_1_reverse = calc_should_reverse( global_context , rec1, rec2, 0);
+ //SUBREADprintf("merge_finished: lock=%d, thread=%d, remain_item=%d, last_id=%u\n", need_lock, my_thread_no, thread_contexts[my_thread_no].output_buffer_item, global_context -> last_written_fragment_number);
+}
- if(should_1_reverse + (*is_read1_reversed) == 1)
- {
- reverse_read(read_text_1, read_len_1 + global_context->config.convert_color_to_base, applied_reverse_space);
- reverse_quality(qual_text_1, read_len_1);
- (*is_read1_reversed) = !(*is_read1_reversed);
- }
- if(global_context->input_reads.is_paired_end_reads)
- {
- int should_2_reverse = calc_should_reverse( global_context , rec1, rec2, 1);
- if(should_2_reverse + (*is_read2_reversed) == 1)
- {
- reverse_read(read_text_2, read_len_2 + global_context->config.convert_color_to_base, applied_reverse_space);
- reverse_quality(qual_text_2, read_len_2);
- (*is_read2_reversed) = !(*is_read2_reversed);
+#define BUFFER_TICK_SLEEP_TIME 1000
+void add_buffered_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_read_number_t pair_number ,
+ char * read_name1, unsigned int flags1, char * chro_name1, unsigned int chro_position1, int mapping_quality1, char * cigar1,
+ char * next_chro_name1, unsigned int next_chro_pos1, int temp_len1, int read_len1,
+ char * read_text1, char * qual_text1, char * additional_columns1,
+ char * read_name2, unsigned int flags2, char * chro_name2, unsigned int chro_position2, int mapping_quality2, char * cigar2,
+ char * next_chro_name2, unsigned int next_chro_pos2, int temp_len2, int read_len2,
+ char * read_text2, char * qual_text2, char * additional_columns2,
+ int all_locations, int this_location
+ ){
+
+ while(1){
+ int done = 0, all_finished=0;
+ subread_lock_occupy(&thread_context -> output_lock);
+ if(thread_context -> thread_id == 0 && ( thread_context -> output_buffer_item > MULTI_THREAD_OUTPUT_ITEMS* global_context -> config.reported_multi_best_reads /4 || global_context -> need_merge_buffer_now)){
+ merge_buffered_output_file(global_context, 1, thread_context -> thread_id, &all_finished);
}
- }
- remove_backslash(read_name_1);
+ if(thread_context -> output_buffer_item < MULTI_THREAD_OUTPUT_ITEMS * global_context -> config.reported_multi_best_reads) {
+ //SUBREADprintf("BUFFER PNTR=%d\n", thread_context -> output_buffer_pointer);
+ output_fragment_buffer_t * target = thread_context -> output_buffer+thread_context -> output_buffer_pointer;
+ target -> multi_mapping_locations = all_locations;
+ target -> this_mapping_location = this_location;
+ target -> fragment_number_in_chunk = pair_number;
+ strcpy(target -> r1.read_name, read_name1);
+ target -> r1.flags = flags1;
+ strcpy(target -> r1.chro_name, chro_name1);
+ target -> r1.location = chro_position1;
+ target -> r1.map_quality = mapping_quality1;
+ strcpy(target -> r1.cigar, cigar1);
+ strcpy(target -> r1.other_chro_name, next_chro_name1);
+ target -> r1.other_location = next_chro_pos1;
+ target -> r1.tlen = temp_len1;
+ target -> r1.rlen = read_len1;
+ strcpy(target -> r1.read_text, read_text1);
+ strcpy(target -> r1.qual_text, qual_text1);
+ strcpy(target -> r1.additional_columns, additional_columns1);
+
+ if(global_context->input_reads.is_paired_end_reads){
+ strcpy(target -> r2.read_name, read_name2);
+ target -> r2.flags = flags2;
+ strcpy(target -> r2.chro_name, chro_name2);
+ target -> r2.location = chro_position2;
+ target -> r2.map_quality = mapping_quality2;
+ strcpy(target -> r2.cigar, cigar2);
+ strcpy(target -> r2.other_chro_name, next_chro_name2);
+ target -> r2.other_location = next_chro_pos2;
+ target -> r2.tlen = temp_len2;
+ target -> r2.rlen = read_len2;
+ strcpy(target -> r2.read_text, read_text2);
+ strcpy(target -> r2.qual_text, qual_text2);
+ strcpy(target -> r2.additional_columns, additional_columns2);
+ }
+ thread_context -> output_buffer_pointer ++;
+ if(thread_context -> output_buffer_pointer >= MULTI_THREAD_OUTPUT_ITEMS * global_context -> config.reported_multi_best_reads)
+ thread_context -> output_buffer_pointer =0;
+ thread_context -> output_buffer_item ++;
+ done = 1;
+ if(thread_context -> thread_id > 0 && thread_context -> output_buffer_item > MULTI_THREAD_OUTPUT_ITEMS*global_context -> config.reported_multi_best_reads * 3 / 5)
+ global_context -> need_merge_buffer_now = 1;
+ }
+
+ subread_lock_release(&thread_context -> output_lock);
+ if(done)break;
+ usleep(BUFFER_TICK_SLEEP_TIME);
+ }
+
+}
+
+#define write_chunk_results_145 write_chunk_results
+
+// rec1 or rec2 is OK if they are not NULL.
+void write_single_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_output_tmp_t * rec1, realignment_result_t * raw_r1, subread_output_tmp_t * rec2, realignment_result_t * raw_r2, int all_locations , int current_location , char * read_name_1, char * read_name_2, int read_len_1, int read_len_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2, subread_read_number_t pair_number, int non_informative_subread_r1, int non_infor [...]
+{
+
+
+ //assert(all_locations <= global_context -> config.reported_multi_best_reads);
+ int tlen = 0;
+
+ if(rec1 && rec2 && rec1->chro == rec2->chro)tlen = calc_tlen(global_context , rec1, rec2, read_len_1, read_len_2);
+
+ if(0)
+ if(global_context -> config.do_fusion_detection && rec1 && rec2 && current_location == 0 && rec1 -> chimeric_sections == 1 && rec2 -> chimeric_sections == 1){
+ // when current_location == 0, both r1 and r2 were on the same strand.
+ int is_funky = is_funky_fragment(global_context, read_name_1, rec1 -> chro, rec1 -> offset, read_len_1, rec1 -> strand, rec1 -> out_cigars[0], read_text_1, read_name_2, rec2 -> chro, rec2 -> offset, read_len_2, rec2 -> strand, rec2 -> out_cigars[0] , read_text_2, tlen);
+ //if (is_funky)
+ // SUBREADprintf("RNAME %s is %d, CHRO = '%s' %p '%s' %p, POS=%u, %u\n", read_name_1, is_funky, rec1 -> chro,rec1 -> chro,rec2 -> chro,rec2 -> chro, rec1 -> offset, rec2 -> offset);
+ if(is_funky & FUNKY_FRAGMENT_A){
+ fraglist_append(&global_context -> funky_list_A, pair_number);
+ }
+ if(is_funky & FUNKY_FRAGMENT_BC){
+ //#warning "LOGIC WRONG: R1 AND R2 SHOULD BE DECIDED BY THEIR MAPPING POSITIONS"
+ bktable_append(&global_context -> funky_table_BC, rec1 -> chro, rec1 -> offset + rec1 -> soft_clipping_movements, NULL + (2*pair_number));
+ bktable_append(&global_context -> funky_table_BC, rec2 -> chro, rec2 -> offset + rec2 -> soft_clipping_movements, NULL + (2*pair_number+1));
+ }
+ if(is_funky & FUNKY_FRAGMENT_DE){
+ fraglist_append(&global_context -> funky_list_DE, pair_number);
+ bktable_append(&global_context -> funky_table_DE, rec1 -> chro, rec1 -> offset + rec1 -> soft_clipping_movements, NULL + (2*pair_number + (rec1 -> offset > rec2 -> offset ? 1:0)));
+ bktable_append(&global_context -> funky_table_DE, rec2 -> chro, rec2 -> offset + rec2 -> soft_clipping_movements, NULL + (2*pair_number + (rec1 -> offset < rec2 -> offset ? 1:0)));
+ }
+ }
+
+ int flag1 = calc_flags( global_context , rec1, rec2, 0, read_len_1, read_len_2, current_location, tlen);
+
+ int flag2 = -1;
+
+ if(global_context->input_reads.is_paired_end_reads)
+ {
+ flag2 = calc_flags( global_context , rec1, rec2, 1, read_len_1, read_len_2, current_location, tlen);
+ if((0 == current_location) && (flag2 & SAM_FLAG_MATCHED_IN_PAIR)) global_context->all_correct_PE_reads ++;
+ }
+
+
+ // rec -> chro is a pointer to the offset table; the pointers can be compared.
+
+
+
+ int applied_reverse_space;
+ applied_reverse_space = global_context->config.space_type;
+ if(global_context -> config.convert_color_to_base)
+ {
+
+ //SUBREADprintf("BEST_READ_NO = %d / %d\n", current_location, all_locations);
+ //SUBREADprintf("ORGI: %s\n", read_text_1);
+ colorread2base(read_text_1, read_len_1+1);
+ //SUBREADprintf("CONV: %s\n\n", read_text_1);
+
+ if(global_context->input_reads.is_paired_end_reads)
+ colorread2base(read_text_2, read_len_2+1);
+
+ applied_reverse_space = GENE_SPACE_BASE;
+ }
+
+ int should_1_reverse = calc_should_reverse( global_context , rec1, rec2, 0);
+
+ if(should_1_reverse)
+ {
+ reverse_read(read_text_1, read_len_1 + global_context->config.convert_color_to_base, applied_reverse_space);
+ reverse_quality(qual_text_1, read_len_1);
+ }
+
+ if(global_context->input_reads.is_paired_end_reads)
+ {
+ int should_2_reverse = calc_should_reverse( global_context , rec1, rec2, 1);
+
+ if(should_2_reverse)
+ {
+ reverse_read(read_text_2, read_len_2 + global_context->config.convert_color_to_base, applied_reverse_space);
+ reverse_quality(qual_text_2, read_len_2);
+ }
+ }
+ remove_backslash(read_name_1);
remove_backslash(read_name_2);
int display_offset1 = 0, display_tailgate1 = 0;
@@ -1620,13 +1799,13 @@ void write_single_fragment(global_context_t * global_context, subread_output_tmp
if(global_context -> config.space_type == GENE_SPACE_BASE){
- if(rec1 && !strstr(rec1->additional_information, "\tNM:i:")){
- short rec1_edit = calc_edit_dist(global_context, rec1->raw_result, rec1->cigar , rec1->linear_position, read_text_1);
+ if(rec1) {// && !strstr(rec1->additional_information, "\tNM:i:")){
+ short rec1_edit = calc_edit_dist(global_context, rec1->raw_result, rec1->cigar , rec1->linear_position, read_text_1, raw_r1 -> final_mismatched_bases);
sprintf(rec1->additional_information + strlen( rec1->additional_information), "\tNM:i:%d", rec1_edit );
}
- if(global_context->input_reads.is_paired_end_reads && rec2 && !strstr(rec2->additional_information, "\tNM:i:"))
+ if(global_context->input_reads.is_paired_end_reads && rec2) //&& !strstr(rec2->additional_information, "\tNM:i:"))
{
- short rec2_edit = calc_edit_dist(global_context, rec2->raw_result, rec2->cigar , rec2->linear_position, read_text_2);
+ short rec2_edit = calc_edit_dist(global_context, rec2->raw_result, rec2->cigar , rec2->linear_position, read_text_2, raw_r2 -> final_mismatched_bases);
sprintf(rec2->additional_information + strlen( rec2->additional_information), "\tNM:i:%d", rec2_edit );
}
}
@@ -1639,9 +1818,11 @@ void write_single_fragment(global_context_t * global_context, subread_output_tmp
if(global_context -> config.SAM_extra_columns)
{
if(raw_r1)
- sprintf(extra_additional_1, "SB:i:%d\tSC:i:%d\tSD:i:%d\tSP:Z:%s\t", raw_r1 -> used_subreads_in_vote, raw_r1 -> selected_votes, raw_r1 -> noninformative_subreads_in_vote, (raw_r1 -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
+ sprintf(extra_additional_1, "SB:i:%d\tSC:i:%d\tSD:i:%d\tSP:Z:%s\t", raw_r1 -> mapping_result-> used_subreads_in_vote, raw_r1 -> mapping_result -> selected_votes, non_informative_subread_r1, (raw_r1 -> mapping_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
+ else sprintf(extra_additional_1, "SD:i:%d\t", non_informative_subread_r1);
if(raw_r2)
- sprintf(extra_additional_2, "SB:i:%d\tSC:i:%d\tSD:i:%d\tSP:Z:%s\t", raw_r2 -> used_subreads_in_vote, raw_r2 -> selected_votes, raw_r2 -> noninformative_subreads_in_vote, (raw_r2 -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
+ sprintf(extra_additional_2, "SB:i:%d\tSC:i:%d\tSD:i:%d\tSP:Z:%s\t", raw_r2 -> mapping_result-> used_subreads_in_vote, raw_r2 -> mapping_result -> selected_votes, non_informative_subread_r2, (raw_r2 -> mapping_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
+ else sprintf(extra_additional_2, "SD:i:%d\t", non_informative_subread_r2);
}
@@ -1669,871 +1850,103 @@ void write_single_fragment(global_context_t * global_context, subread_output_tmp
else
{
out_chro1 = "*";
- out_cigar1 = "*";
- }
-
- if(rec2)
- {
- assert(rec2->chro);
- strcat(extra_additional_2, rec2->additional_information);
- assert(rec2->chro);
- out_chro2 = rec2->chro;
- out_cigar2 = rec2->cigar;
- }
- else
- {
- out_chro2 = "*";
- out_cigar2 = "*";
- }
-
- int out_offset1=0, out_offset2=0;
- long long int out_tlen1, out_tlen2;
-
- out_tlen1 = tlen;
- out_tlen2 = tlen;
- if(rec1 && rec2)
- {
- if( rec1->offset > rec2->offset) out_tlen1 = - out_tlen1;
- else out_tlen2 = -out_tlen2;
- }
-
- if(0==current_location)
- {
- if(global_context -> input_reads.is_paired_end_reads)
- {
- if(rec1 || rec2)
- global_context -> all_mapped_reads++;
- }
- else if(rec1)
- global_context -> all_mapped_reads++;
- }
-
- if(rec1)
- out_offset1 = rec1->offset + rec1 -> soft_clipping_movements;
- if(rec2)
- out_offset2 = rec2->offset + rec2 -> soft_clipping_movements;
-
-
- int out_mapping_quality1 = 0, out_mapping_quality2 = 0;
- if(rec1)
- out_mapping_quality1 = rec1->mapping_quality;
- if(rec2)
- out_mapping_quality2 = rec2->mapping_quality;
-
- char * mate_chro_for_1 = out_chro2;
- char * mate_chro_for_2 = out_chro1;
-
- if(out_chro1 == out_chro2 && out_chro1 && out_chro1[0]!='*'){
- mate_chro_for_1="=";
- mate_chro_for_2="=";
- }
-
-
- if(global_context -> config.is_BAM_output){
- SamBam_writer_add_read(global_context -> output_bam_writer, read_name_1, flag1, out_chro1 , out_offset1, out_mapping_quality1, out_cigar1, out_chro2 , out_offset2, out_tlen1, read_len_1, read_text_1 + display_offset1, qual_text_1, extra_additional_1);
-
- if(global_context->input_reads.is_paired_end_reads)
- SamBam_writer_add_read(global_context -> output_bam_writer, read_name_2, flag2, out_chro2 , out_offset2, out_mapping_quality2, out_cigar2, out_chro1 , out_offset1, out_tlen2, read_len_2, read_text_2 + display_offset2, qual_text_2, extra_additional_2);
- }
- else
- {
- sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_1, flag1, out_chro1, out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1, out_offset2, out_tlen1, read_text_1 + display_offset1, qual_text_1, extra_additional_1[0]?"\t":"", extra_additional_1);
- if(global_context->input_reads.is_paired_end_reads)
- sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_2, flag2, out_chro2, out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2, out_offset1, out_tlen2, read_text_2 + display_offset2, qual_text_2, extra_additional_2[0]?"\t":"", extra_additional_2);
- }
-}
-
-int write_chunk_results_145(global_context_t * global_context)
-{
-
- /**********************************************
- **********************************************
- ** Initiate the memory blocks
- **********************************************
- **********************************************
- */
- gene_input_t * ginp1, * ginp2=NULL;
-
- ginp1 = &global_context->input_reads.first_read_file;
- if(global_context->input_reads.is_paired_end_reads)
- ginp2 = &global_context->input_reads.second_read_file;
-
- int read_number, sqr_read_number = 0, sqr_interval = global_context -> processed_reads_in_chunk/10;
-
- subread_output_context_t out_context;
- init_output_context(global_context, &out_context);
-
- char * read_text_1, * read_text_2;
- char * qual_text_1, * qual_text_2;
- char * read_name_1, * read_name_2;
- char * output_line_buffer;
-
- int read_len_1 = 0, read_len_2 = 0;
- read_text_1 = malloc(sizeof(char) * (MAX_READ_LENGTH+1));
- read_text_2 = malloc(sizeof(char) * (MAX_READ_LENGTH+1));
- qual_text_1 = malloc(sizeof(char) * (MAX_READ_LENGTH+1));
- qual_text_2 = malloc(sizeof(char) * (MAX_READ_LENGTH+1));
- read_name_1 = malloc(sizeof(char) * (MAX_READ_NAME_LEN+1));
- read_name_2 = malloc(sizeof(char) * (MAX_READ_NAME_LEN+1));
- output_line_buffer = malloc(sizeof(char) * ( 2* MAX_READ_LENGTH + 2 * MAX_CHROMOSOME_NAME_LEN + MAX_READ_NAME_LEN + CORE_MAX_CIGAR_STR_LEN + CORE_ADDITIONAL_INFO_LENGTH + 100));
-
- /**********************************************
- **********************************************
- ** Going through all reads in this chunk.
- **********************************************
- **********************************************
- */
- for(read_number = 0; read_number < global_context -> processed_reads_in_chunk ; read_number++)
- {
- int output_alignment_number, is_read1_reversed, is_read2_reversed;
-
- if(sqr_read_number > sqr_interval)
- {
- show_progress(global_context, NULL, read_number, STEP_WRITE_CHUNK_RESULTS);
- sqr_read_number = 0;
- }
-
- sqr_read_number ++;
- fetch_next_read_pair(global_context, NULL, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 0);
- if(qual_text_1[0] && global_context -> config.phred_score_format == FASTQ_PHRED64)
- {
- fastq_64_to_33(qual_text_1);
- if(global_context->input_reads.is_paired_end_reads)
- fastq_64_to_33(qual_text_2);
-
- }
-
- is_read1_reversed = 0;
- is_read2_reversed = 0;
-
- memset( out_context.out_pairs, 0, sizeof(void *) *2* global_context->config.multi_best_reads);
- memset( out_context.out_raws, 0, sizeof(void *) *2* global_context->config.multi_best_reads);
-
- // Array output_records is organised in the order of [R1, R2, R1, R2, R1, R2...]. The total number of items is output_fragment_combinations (*2 for paired-end reads).
- int output_fragment_combinations = calculate_fragment_combinations(global_context , &out_context, read_number, read_len_1, read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2);
-
-
- /**********************************************
- **********************************************
- ** Write the fragment results. Note that each fragment MUST be paired.
- **********************************************
- **********************************************
- */
-
- // if(output_fragment_combinations >= 0) printf("MM READ=%d : %s\n", output_fragment_combinations, read_name_1);
-
- for(output_alignment_number = 0; output_alignment_number < max(1,output_fragment_combinations);output_alignment_number ++)
- {
- char * read_text_1_raw =NULL, * read_text_2_raw =NULL, is_null_qual = 0;
- if(global_context -> config.space_type == GENE_SPACE_COLOR)
- {
- read_text_1_raw = malloc(1250);
- read_text_2_raw = malloc(1250);
- is_null_qual = qual_text_1[0]==0;
- strcpy(read_text_1_raw, read_text_1);
- strcpy(read_text_2_raw, read_text_2);
- // for color-space reads, the read text is re-written everytime because it is impossible to re-reverse the reads.
- }
-
-
-
-
- subread_output_tmp_t * rec1 = out_context.out_pairs[2*output_alignment_number];
- subread_output_tmp_t * rec2 = out_context.out_pairs[2*output_alignment_number+1];
-
-
- if(rec1)
- {
- // SUBREADprintf("OUTN=%d; selected_votes=%d\n\n", output_alignment_number, rec1->raw_result -> selected_votes);
- assert(rec1->chro);
- }
- if(rec2)
- {
- assert(rec2->chro);
- // SUBREADprintf("OUTT=%d; selected_votes=%d\n\n", output_alignment_number, rec2->raw_result -> selected_votes);
- }
-
- alignment_result_t * raw_r1, * raw_r2;
-
- if(rec1 || rec2 || !global_context->config.ignore_unmapped_reads)
- {
- if(global_context->input_reads.is_paired_end_reads)
- {
- raw_r1 = out_context.out_raws[2*output_alignment_number];
- raw_r2 = out_context.out_raws[2*output_alignment_number+1];
- write_single_fragment(global_context, out_context.out_pairs[2*output_alignment_number], raw_r1, out_context.out_pairs[2*output_alignment_number + 1], raw_r2, output_fragment_combinations , output_alignment_number , read_name_1, read_name_2, read_len_1, read_len_2, read_text_1, read_text_2, qual_text_1, qual_text_2, &is_read1_reversed, &is_read2_reversed );
- }
- else
- {
- raw_r1 = out_context.out_raws[output_alignment_number];
- write_single_fragment(global_context, out_context.out_pairs[output_alignment_number], raw_r1, NULL, NULL, output_fragment_combinations , output_alignment_number , read_name_1, read_name_2, read_len_1, read_len_2, read_text_1, read_text_2, qual_text_1, qual_text_2, &is_read1_reversed, &is_read2_reversed );
- }
- }
-
- if(global_context -> config.space_type == GENE_SPACE_COLOR)
- {
- // for color-space reads, the read text is re-written everytime because it is impossible to re-reverse the reads.
- is_read1_reversed = 0;
- is_read2_reversed = 0;
- strcpy(read_text_1, read_text_1_raw);
- strcpy(read_text_2, read_text_2_raw);
- if(is_null_qual){
- qual_text_1[0]=0;
- qual_text_2[0]=0;
- }
- free(read_text_2_raw);
- free(read_text_1_raw);
- }
- }
-
- }
-
- free(read_text_1);
- free(read_text_2);
- free(qual_text_1);
- free(qual_text_2);
- free(read_name_1);
- free(read_name_2);
- free(output_line_buffer);
- destroy_output_context(global_context, &out_context);
-
- return 0;
-}
-
-
-
-int write_chunk_results_144(global_context_t * global_context)
-{
- unsigned int read_number, sqr_read_number = 0, sqr_interval;
- gene_input_t * ginp1, * ginp2=NULL;
- char * additional_information = malloc(1800);
- short current_display_offset = 0, current_display_tailgate = 0;
-
- unsigned int out_poses[CIGAR_PERFECT_SECTIONS+1], xk1;
- char * out_cigars[CIGAR_PERFECT_SECTIONS+1], *out_mate_cigars[CIGAR_PERFECT_SECTIONS+1];
- char out_strands[CIGAR_PERFECT_SECTIONS+1];
- short out_read_lens[CIGAR_PERFECT_SECTIONS+1];
-
- for(xk1 = 0; xk1 < CIGAR_PERFECT_SECTIONS+1; xk1++) out_cigars[xk1] = malloc(100);
- for(xk1 = 0; xk1 < CIGAR_PERFECT_SECTIONS+1; xk1++) out_mate_cigars[xk1] = malloc(100);
-
- //if(global_context -> config.space_type == GENE_SPACE_COLOR && !global_context -> config.convert_color_to_base)
- // current_display_offset = 1;
-
-
- ginp1 = &global_context->input_reads.first_read_file;
- if(global_context->input_reads.is_paired_end_reads)
- ginp2 = &global_context->input_reads.second_read_file;
-
- sqr_interval = global_context -> processed_reads_in_chunk/10;
-
- for(read_number = 0; read_number < global_context -> processed_reads_in_chunk ; read_number++)
- {
- char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
- char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
- char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
- int read_len_1, read_len_2=0;
- int best_read_id = 0;
- int best_read_id_HI = 0;
- int total_best_reads = 0;
- int read1_has_reversed = 0;
- int read2_has_reversed = 0;
-
- int is_second_read;
- int applied_reverse_space;
-
- if(sqr_read_number > sqr_interval)
- {
- show_progress(global_context, NULL, read_number, STEP_WRITE_CHUNK_RESULTS);
- sqr_read_number = 0;
- }
-
- sqr_read_number ++;
- fetch_next_read_pair(global_context, NULL, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 0);
- //printf("ORAW1=%s\nORAW2=%s\n\n", read_text_1,read_text_2);
-
- applied_reverse_space = global_context->config.space_type;
- if(global_context -> config.convert_color_to_base)
- {
- colorread2base(read_text_1, read_len_1+1);
- if(global_context->input_reads.is_paired_end_reads)
- colorread2base(read_text_2, read_len_2+1);
- applied_reverse_space = GENE_SPACE_BASE;
-
- }
- //printf("BAS1=%s\nBAS2=%s\n\n", read_text_1,read_text_2);
-
- //alignment_result_t * prime_result = _global_retrieve_alignment_ptr(global_context , read_number, 0, 0);
- int read_1_repeats = 0, read_1_reported=0;
- int read_2_repeats = 0, read_2_reported=0;
- //int is_PE_OK = -1;
- for(total_best_reads=0; total_best_reads<global_context -> config.multi_best_reads; total_best_reads++)
- {
- alignment_result_t * current_result = _global_retrieve_alignment_ptr(global_context , read_number, 0, total_best_reads);
- alignment_result_t * current_result_2 = NULL;
-
- //if(total_best_reads > 0 && current_result->selected_votes < 1) break;
- //if(total_best_reads > 0 && global_context -> input_reads.is_paired_end_reads && !is_result_in_PE(prime_result)) break;
- int read_1_result_available = 1;
- int read_2_result_available = 1;
-
- if(global_context -> config.multi_best_reads>1)
- {
- char * chro1_chro = NULL;
- unsigned int chro_1_pos = 0;
- if(locate_gene_position_max(current_result -> selected_position, &global_context -> chromosome_table, &chro1_chro, &chro_1_pos, read_len_1))
- {
- read_1_result_available = 0;
- }
- }
-
- if(read_1_result_available)
- if((current_result->result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
- {
- read_1_result_available = 0;
- // SUBREADprintf("Disabled read on time UNIQ:%s\n",current_name);
- }
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
- current_result_2 = _global_retrieve_alignment_ptr(global_context , read_number, 1, total_best_reads);
-
- if(read_2_result_available)
- if((current_result_2->result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
- {
- read_2_result_available = 0;
- }
-
-
- if(global_context -> config.multi_best_reads>1)
- {
- char * chro1_chro = NULL;
- unsigned int chro_1_pos = 0;
- if(locate_gene_position_max(current_result_2 -> selected_position, &global_context -> chromosome_table, &chro1_chro, &chro_1_pos, read_len_2))
- {
- read_2_result_available = 0;
- }
- }
-
- if(current_result_2 -> result_flags & CORE_IS_FULLY_EXPLAINED)if(read_2_result_available) read_2_repeats ++;
- if(current_result -> result_flags & CORE_IS_FULLY_EXPLAINED)if(read_1_result_available) read_1_repeats ++;
- }else
- {
- if(current_result -> result_flags & CORE_IS_FULLY_EXPLAINED)
- if(read_1_result_available)read_1_repeats ++;
- }
- }
-
- for(best_read_id=0; best_read_id<global_context -> config.multi_best_reads; best_read_id++)
- {
- char mate_cigar_decompress[100];
- char current_cigar_decompress[100];
- for(is_second_read = 0; is_second_read < 1+ global_context -> input_reads.is_paired_end_reads; is_second_read++)
- {
- alignment_result_t *current_result, *mate_result = NULL;
- current_result = _global_retrieve_alignment_ptr(global_context , read_number, is_second_read, best_read_id);
-
- if(global_context -> input_reads.is_paired_end_reads)
- mate_result = _global_retrieve_alignment_ptr(global_context , read_number,!is_second_read, best_read_id);
-
- //if(best_read_id>0 && current_result->selected_votes < 1 && (is_second_read == 0 || !global_context -> input_reads.is_paired_end_reads)) break;
- //if(best_read_id>0 && ( global_context -> input_reads.is_paired_end_reads&& !is_result_in_PE(prime_result))) break;
- /*
- if(global_context -> input_reads.is_paired_end_reads)
- {
- if(best_read_id)
- if((!(current_result -> result_flags &CORE_IS_FULLY_EXPLAINED)) && !(CORE_IS_FULLY_EXPLAINED & mate_result -> result_flags))
- break;
- }else
- {
- if(best_read_id)
- if(!(current_result -> result_flags &CORE_IS_FULLY_EXPLAINED))
- break;
- }*/
-
- char * current_name = is_second_read ? read_name_2 : read_name_1;
- char * current_read_text = is_second_read ? read_text_2 : read_text_1;
- char * current_qual_text = is_second_read ? qual_text_2 : qual_text_1;
- int * current_has_reversed = is_second_read ? (&read2_has_reversed):(&read1_has_reversed);
- int current_read_len = is_second_read ? read_len_2 : read_len_1;
- int mate_read_len = is_second_read ? read_len_1 : read_len_2;
- unsigned int mate_linear_pos=0, current_linear_pos=0;
- char mate_strand = 0, current_strand = 0;
- //int current_read_repeats = is_second_read ? read_2_repeats:read_1_repeats;
- int * current_read_reported = is_second_read ? (&read_2_reported):(&read_1_reported);
-
- char * current_chro_name=NULL, * mate_chro_name = NULL;
- unsigned int current_chro_offset=0, mate_chro_offset=0;
- char * current_CIGAR;
- int second_char = -1;
-
- additional_information[0]=0;
-
- int mask = 0;
- int is_mate_ok = 0;
- int is_current_ok = (current_result -> result_flags & CORE_IS_FULLY_EXPLAINED)?1:0;
- int current_repeated_times = 0;
- float current_final_quality = current_result -> final_quality;
- int current_soft_clipping_movement =0, mate_soft_clipping_movement = 0;
-
- current_linear_pos = current_result -> selected_position;
- current_strand = (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
-
- if(global_context -> input_reads.is_paired_end_reads)
- {
- remove_backslash(current_name);
-
- mask |= SAM_FLAG_PAIRED_TASK;
- is_mate_ok = (mate_result -> result_flags & CORE_IS_FULLY_EXPLAINED)?1:0;
-
- if((mate_result->result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
- is_mate_ok = 0;
-
- int mate_repeated_times = 0;
-
- mate_strand = (mate_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
- mate_linear_pos = mate_result -> selected_position;
-
- if(global_context -> config.do_big_margin_filtering_for_reads)
- mate_repeated_times = is_ambiguous_voting(global_context, read_number, !is_second_read, mate_result->selected_votes, mate_result->confident_coverage_start, mate_result->confident_coverage_end, mate_read_len, mate_strand);
-
- if( global_context -> config.do_big_margin_filtering_for_reads && mate_repeated_times > 1)
- is_mate_ok = 0;
-
-
- if(global_context->config.report_no_unpaired_reads && !is_result_in_PE(current_result))
- {
- is_mate_ok = mate_result->selected_votes > current_result->selected_votes?is_mate_ok:0;
- is_current_ok = mate_result->selected_votes < current_result->selected_votes?is_current_ok:0;
- }
-
-
- if(is_second_read)
- mask |= SAM_FLAG_SECOND_READ_IN_PAIR;
- else
- mask |= SAM_FLAG_FIRST_READ_IN_PAIR;
-
-
-
- if(is_mate_ok)
- {
-
- int is_jumped = 0;
- char * mate_CIGAR;
- if( mate_result -> cigar_string[0] == -1)
- {
- is_jumped = 1;
- bincigar2cigar(mate_cigar_decompress, 100, mate_result -> cigar_string + 1, CORE_MAX_CIGAR_LEN - 1, mate_read_len);
-
- mate_CIGAR = mate_cigar_decompress;
- }
- else
- {
- bincigar2cigar(mate_cigar_decompress, 100, mate_result -> cigar_string, CORE_MAX_CIGAR_LEN, mate_read_len);
- mate_CIGAR = mate_cigar_decompress;
-
- }
-
- if(global_context -> config.do_fusion_detection)
- {
- chimeric_cigar_parts(global_context, mate_linear_pos, is_jumped ^ mate_strand, is_jumped , mate_cigar_decompress , out_poses, out_mate_cigars, out_strands, mate_read_len, out_read_lens);
-
- mate_linear_pos = out_poses[0];
- mate_strand = out_strands[0]=='-';
- mate_CIGAR = out_mate_cigars[0];
- }
-
- if(locate_gene_position_max(mate_linear_pos, &global_context -> chromosome_table, & mate_chro_name, & mate_chro_offset, mate_read_len))
- {
- is_mate_ok = 0;
- //if(!is_second_read)
- // read_2_repeats--;
-
- }
- mate_soft_clipping_movement = get_soft_clipping_length(mate_CIGAR);
- mate_chro_offset += mate_soft_clipping_movement;
-
- }
- if(is_mate_ok)
- {
- if(mate_strand + (!is_second_read) == 1) mask |= SAM_FLAG_MATE_REVERSE_STRAND_MATCHED;
- mate_chro_offset++;
-
- }
-
- }
- if(!is_mate_ok)
- {
- mate_chro_name = "*";
- mate_chro_offset = 0;
- }
-
-
- if(global_context -> config.do_big_margin_filtering_for_reads)
- current_repeated_times = is_ambiguous_voting(global_context, read_number, is_second_read, current_result->selected_votes, current_result->confident_coverage_start, current_result->confident_coverage_end, current_read_len, current_strand);
-
-
-
- if(is_current_ok)
- if((current_result->result_flags & CORE_IS_BREAKEVEN) && !global_context -> config.report_multi_mapping_reads)
- {
- is_current_ok = 0;
- // SUBREADprintf("Disabled read on time UNIQ:%s\n",current_name);
- }
-
- if(is_current_ok)
- {
- int is_first_section_jumped = 0;
- if( current_result -> cigar_string[0] == -1)
- {
- bincigar2cigar(current_cigar_decompress, 100, current_result -> cigar_string + 1, CORE_MAX_CIGAR_LEN, current_read_len);
- //current_linear_pos = reverse_cigar(current_linear_pos , current_cigar_decompress, current_cigar_decompress_new);
- current_CIGAR = current_cigar_decompress;
- is_first_section_jumped = 1;
- }
- else
- {
- bincigar2cigar(current_cigar_decompress, 100, current_result -> cigar_string, CORE_MAX_CIGAR_LEN, current_read_len);
- current_CIGAR = current_cigar_decompress;
- }
-
-
- if(global_context -> config.do_fusion_detection)
- {
-
- int chimeric_sections = chimeric_cigar_parts(global_context, current_linear_pos, is_first_section_jumped ^ current_strand, is_first_section_jumped , current_CIGAR , out_poses, out_cigars, out_strands, current_read_len, out_read_lens);
-
- //sprintf(additional_information + strlen(additional_information), "\tXX:Z:%s", current_cigar_decompress);
-
- for(xk1=1; xk1<chimeric_sections; xk1++)
- {
- unsigned int chimeric_pos;
- char * chimaric_chr;
-
- if(0==locate_gene_position_max(out_poses[xk1],& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, 0+out_read_lens[xk1]))
- {
- int soft_clipping_movement = 0;
- soft_clipping_movement = get_soft_clipping_length( out_cigars[xk1]);
- char strand_xor = (out_strands[xk1] == '-')^ is_second_read;
- sprintf(additional_information + strlen(additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", out_cigars[xk1] , chimeric_pos + soft_clipping_movement + 1, strand_xor?'-':'+' , chimaric_chr );
- }
- }
-
-
- current_linear_pos = out_poses[0];
- current_strand = out_strands[0]=='-';
- current_CIGAR = out_cigars[0];
- }
-
- if(locate_gene_position_max(current_linear_pos,& global_context -> chromosome_table, & current_chro_name, & current_chro_offset, current_read_len))
- {
- is_current_ok = 0;
- // if (!is_second_read)
- // read_1_repeats--;
- // current_read_repeats--;
- }
- }
-
- if(is_current_ok)
- {
- if(current_strand + is_second_read == 1)
- mask |= SAM_FLAG_REVERSE_STRAND_MATCHED;
- if(*current_read_reported){
- mask |= SAM_FLAG_SECONDARY_MAPPING;
- }
- (*current_read_reported)=1;
- //if(1639 == read_number)
- // printf("R0=%d ; NEG=%d; SEC=%d\n",(*current_has_reversed), (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0, is_second_read);
- int current_need_reverse = current_strand;
-
- //if(current_result -> cigar_string[0]==-1)
- // current_need_reverse = ! current_need_reverse;
-
- if(current_need_reverse + (*current_has_reversed) == 1)
- {
- reverse_read(current_read_text, current_read_len + global_context->config.convert_color_to_base, applied_reverse_space);
- reverse_quality(current_qual_text , current_read_len);
- (*current_has_reversed)=!(*current_has_reversed);
- // if(1639 == read_number)
- // printf("RR=%d\n\n",(*current_has_reversed));
- }
- current_chro_offset++;
-
- current_soft_clipping_movement = get_soft_clipping_length(current_CIGAR);
- current_chro_offset += current_soft_clipping_movement;
- }
- else
- {
- mask |= SAM_FLAG_UNMAPPED;
- int this_should_nagetive = is_second_read;
-
- if(global_context -> input_reads.is_paired_end_reads && global_context -> config.report_unmapped_using_mate_pos&& is_mate_ok)
- {
-
- // DO NOT SHOW CORRDINATE IF IT IS UNMAPPED.
- //
- //current_chro_name = mate_chro_name;
- //current_chro_offset = mate_chro_offset;
- current_chro_name = "*";
- current_chro_offset = 0;
-
- /////////////////////////////////////////
-
- this_should_nagetive = mate_strand;
- current_strand = mate_strand;
- if(this_should_nagetive + is_second_read ==1)
- mask |= SAM_FLAG_REVERSE_STRAND_MATCHED;
- else
- mask &= ~SAM_FLAG_REVERSE_STRAND_MATCHED;
- }
- else
- {
- current_chro_name = "*";
- current_chro_offset = 0;
- }
-
-
-
- if(this_should_nagetive + (*current_has_reversed) == 1)
- {
- reverse_read(current_read_text, current_read_len + global_context->config.convert_color_to_base, applied_reverse_space);
- reverse_quality(current_qual_text , current_read_len);
- (*current_has_reversed)=!(*current_has_reversed);
- }
-
- current_CIGAR = "*";
- current_final_quality=0;
- }
-
- if(global_context -> config.space_type == GENE_SPACE_COLOR)
- {
- if( is_second_read + current_strand == 1 )
- {
- //if(is_current_ok)
- // current_chro_offset ++;
- current_display_offset = 0;
- current_display_tailgate = 1;
- }
- else
- {
- if(!global_context -> config.convert_color_to_base)
- {
- // the first base was a fake prime base; the second base is the first meaningful base.
- second_char = current_read_text[1];
- current_read_text[1] = color2char(second_char, current_read_text[0]);
- }
- current_display_offset = 1;
- current_display_tailgate = 0;
- }
- }
-
- if(global_context -> input_reads.is_paired_end_reads && !is_mate_ok)
- {
- mask |= SAM_FLAG_MATE_UNMATCHED;
- mate_strand = current_strand;
-
- if(is_current_ok){
-
- int mate_should_nagetive = mate_strand;
- if(mate_should_nagetive + (!is_second_read) ==1)
- mask |= SAM_FLAG_MATE_REVERSE_STRAND_MATCHED;
- else
- mask &= ~SAM_FLAG_MATE_REVERSE_STRAND_MATCHED;
- }
-
-
- }
- long long int mate_distance = 0;
-
- // DO NOT SHOW CORRDINATE IF IT IS UNMAPPED.
- //
- if( 0 && is_current_ok && global_context -> config.report_unmapped_using_mate_pos && global_context -> input_reads.is_paired_end_reads &&!is_mate_ok)
- {
- mate_distance = 0;
-
-
- mate_chro_name = current_chro_name;
- mate_chro_offset = current_chro_offset;
- }
- //////////////////////////////////////////////
-
-
- if(is_current_ok && global_context -> input_reads.is_paired_end_reads && is_mate_ok)
- {
- mate_distance = mate_chro_offset - mate_soft_clipping_movement;
- mate_distance -= current_chro_offset - current_soft_clipping_movement;
- mate_distance = abs(mate_distance);
- if(current_chro_offset >mate_chro_offset)
- mate_distance += current_read_len;
- else
- mate_distance += mate_read_len;
-
- if(current_chro_offset - current_soft_clipping_movement > mate_chro_offset - mate_soft_clipping_movement) mate_distance = -mate_distance;
-
- if(mate_distance>0)
- {
- mate_distance = max(mate_distance, current_read_len);
- mate_distance = max(mate_distance, mate_read_len);
- }
- else
- {
- mate_distance = min(mate_distance, -current_read_len);
- mate_distance = min(mate_distance, -mate_read_len);
- }
- }
-
- if((best_read_id == 0 ) && current_qual_text[0] && global_context -> config.phred_score_format == FASTQ_PHRED64)
- fastq_64_to_33(current_qual_text);
- if(!current_qual_text[0])
- {
- int xi2;
- for(xi2=current_display_offset;current_read_text[xi2 + current_display_tailgate];xi2++) current_qual_text[xi2 - current_display_offset] = 'I';
- current_qual_text[xi2 - current_display_offset]=0;
- }
-
- if(current_repeated_times)
- current_final_quality /= current_repeated_times;
- if(global_context->config.downscale_mapping_quality)
- current_final_quality=current_final_quality/5;
-
- if(mate_chro_name[0]!='*' && mate_chro_name == current_chro_name && global_context -> input_reads.is_paired_end_reads)
- {
- mate_chro_name="=";
-
- //if(2190 == read_number)SUBREADprintf("MATE_DIST=%lld\n", mate_distance);
-
- if(is_mate_ok && is_current_ok && abs(mate_distance) >= global_context->config. minimum_pair_distance && abs(mate_distance) <= global_context-> config.maximum_pair_distance && current_strand == mate_strand)
- {
- // if(2190 == read_number)SUBREADprintf("MATE_DIST 2 =%lld , CUR_OFF=%u, MAT_OFF=%u, CUR_STRAND=%d\n", mate_distance, current_chro_offset, mate_chro_offset, current_strand);
- int is_PEM = 0;
- if(global_context -> config.do_fusion_detection)
- {
- is_PEM = 1;
- }
- else
- {
- if(global_context -> config.is_first_read_reversed && !(global_context -> config.is_second_read_reversed))
- {
- if(current_strand == 0)
- {
- if((is_second_read + (mate_chro_offset > current_chro_offset) == 1) || mate_chro_offset == current_chro_offset)
- is_PEM = 1;
- }
- }
- else
- {
- if(current_strand)
- {
- if((is_second_read + (mate_chro_offset < current_chro_offset) == 1) || mate_chro_offset == current_chro_offset) is_PEM = 1;
- }else{
- if((is_second_read + (mate_chro_offset > current_chro_offset) == 1) || mate_chro_offset == current_chro_offset) is_PEM = 1;
-
- }
- }
- }
-
- //if(2190 == read_number)SUBREADprintf("MATE_DIST 3 =%lld ; PEM=%d\n", mate_distance, is_PEM);
- if(is_PEM){
- mask |= SAM_FLAG_MATCHED_IN_PAIR;
- if(is_second_read)
- global_context->all_correct_PE_reads ++;
- }
- }
+ out_cigar1 = "*";
+ }
+ if(rec2)
+ {
+ assert(rec2->chro);
+ strcat(extra_additional_2, rec2->additional_information);
+ assert(rec2->chro);
+ out_chro2 = rec2->chro;
+ out_cigar2 = rec2->cigar;
+ }
+ else
+ {
+ out_chro2 = "*";
+ out_cigar2 = "*";
+ }
- }
- if(mate_chro_name[0]!='=')
- mate_distance = 0;
+ int out_offset1=0, out_offset2=0;
+ long long int out_tlen1, out_tlen2;
- int tailgate_0 = -1;
- if(current_display_tailgate)
- {
- tailgate_0 = current_read_text[strlen(current_read_text) -1];
- current_read_text[strlen(current_read_text) - 1] = 0;
- }
+ out_tlen1 = tlen;
+ out_tlen2 = tlen;
+ if(rec1 && rec2)
+ {
+ if( rec1->offset > rec2->offset) out_tlen1 = - out_tlen1;
+ else out_tlen2 = -out_tlen2;
+ }
- //if(read_number==2461)
- //printf("CURCIGAR=%s ; FINAL_CIGAR=%s ; OK=%d\n", current_cigar_decompress, current_CIGAR, is_current_ok);
+ if(0==current_location)
+ {
+ if(global_context -> input_reads.is_paired_end_reads)
+ {
+ if(rec1 || rec2)
+ global_context -> all_mapped_reads++;
+ }
+ else if(rec1)
+ global_context -> all_mapped_reads++;
+ }
- if( is_mate_ok ||is_current_ok || (((global_context -> config.multi_best_reads <2) || (best_read_id==0 && read_1_repeats == 0 && read_2_repeats == 0)) && ! global_context->config.ignore_unmapped_reads))
- {
+ if(rec1)
+ out_offset1 = rec1->offset + rec1 -> soft_clipping_movements;
+ if(rec2)
+ out_offset2 = rec2->offset + rec2 -> soft_clipping_movements;
- char hi_tag_out[18];
- hi_tag_out[0]=0;
- if(max(read_2_repeats,read_1_repeats) > 1)
- sprintf(hi_tag_out,"\tHI:i:%d", best_read_id_HI);
+ int out_mapping_quality1 = 0, out_mapping_quality2 = 0;
+ if(rec1)
+ out_mapping_quality1 = rec1->mapping_quality;
+ if(rec2)
+ out_mapping_quality2 = rec2->mapping_quality;
+ char * mate_chro_for_1 = out_chro2;
+ char * mate_chro_for_2 = out_chro1;
- int seq_len = strlen(additional_information);
- seq_len += sprintf(additional_information+seq_len, "\tSH:i:%d\tSM:i:%d\tNH:i:%d%s", (int)((current_result -> Score_H >> 17) & 0xfff), current_result -> final_mismatched_bases, max(read_2_repeats,read_1_repeats), hi_tag_out);
+ if(out_chro1 == out_chro2 && out_chro1 && out_chro1[0]!='*'){
+ mate_chro_for_1="=";
+ mate_chro_for_2="=";
+ }
- if( is_current_ok && global_context -> config.is_rna_seq_reads && !(current_result -> result_flags & CORE_NOTFOUND_DONORS))
- {
- seq_len += sprintf(additional_information+seq_len, "\tXS:A:%c", (current_result -> result_flags & CORE_IS_GT_AG_DONORS)?'+':'-');
- }
+ //if(161436 == pair_number) SUBREADprintf("DOUBLE_ADD: %u %d/%d\n", pair_number, current_location, all_locations);
- if(global_context -> config.SAM_extra_columns)
- {
- if(!is_current_ok){
- current_cigar_decompress[0]='*';
- current_cigar_decompress[1]=0;
- }
- seq_len += sprintf(additional_information+seq_len, "\tSG:Z:%s\tSB:i:%d\tSC:i:%d\tSD:i:%d\tSN:i:%u\tSP:Z:%s",current_cigar_decompress, current_result -> used_subreads_in_vote, current_result -> selected_votes, current_result -> noninformative_subreads_in_vote, read_number, (current_result -> result_flags & CORE_IS_GAPPED_READ)?"GAPPED":"NOEVENT");
- }
+ if(thread_context)
+ add_buffered_fragment(global_context, thread_context, pair_number,
+ read_name_1, flag1, out_chro1 , out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1 , out_offset2, out_tlen1, read_len_1,
+ read_text_1 + display_offset1, qual_text_1, extra_additional_1,
+ read_name_2, flag2, out_chro2 , out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2 , out_offset1, out_tlen2, read_len_2,
+ read_text_2 + display_offset2, qual_text_2, extra_additional_2, all_locations, current_location);
+ else{
+ //subread_lock_occupy(&global_context -> output_lock);
+ if(global_context -> config.is_BAM_output){
+ SamBam_writer_add_read(global_context -> output_bam_writer, read_name_1, flag1, out_chro1 , out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1 , out_offset2, out_tlen1, read_len_1, read_text_1 + display_offset1, qual_text_1, extra_additional_1);
- if(global_context->config.read_group_id[0])
- seq_len += sprintf(additional_information+seq_len, "\tRG:Z:%s", global_context->config.read_group_id);
+ if(global_context->input_reads.is_paired_end_reads)
+ SamBam_writer_add_read(global_context -> output_bam_writer, read_name_2, flag2, out_chro2 , out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2 , out_offset1, out_tlen2, read_len_2, read_text_2 + display_offset2, qual_text_2, extra_additional_2);
+ }
+ else
+ {
+ sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_1, flag1, out_chro1, out_offset1, out_mapping_quality1, out_cigar1, mate_chro_for_1, out_offset2, out_tlen1, read_text_1 + display_offset1, qual_text_1, extra_additional_1[0]?"\t":"", extra_additional_1);
+ if(global_context->input_reads.is_paired_end_reads)
+ sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", read_name_2, flag2, out_chro2, out_offset2, out_mapping_quality2, out_cigar2, mate_chro_for_2, out_offset1, out_tlen2, read_text_2 + display_offset2, qual_text_2, extra_additional_2[0]?"\t":"", extra_additional_2);
+ }
+ //subread_lock_release(&global_context -> output_lock);
+ }
- if(global_context -> config.is_BAM_output)
- SamBam_writer_add_read(global_context -> output_bam_writer, current_name, mask, current_chro_name, current_chro_offset, (int)current_final_quality, current_CIGAR, mate_chro_name, mate_chro_offset, mate_distance, current_read_len, current_read_text + current_display_offset, current_qual_text, additional_information+1);
- else
- sambamout_fprintf(global_context -> output_sam_fp , "%s\t%d\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s\n", current_name, mask, current_chro_name, current_chro_offset, (int)current_final_quality, current_CIGAR, mate_chro_name, mate_chro_offset, mate_distance, current_read_text + current_display_offset, current_qual_text, additional_information);
- }
- if(current_display_tailgate)
- {
- current_read_text[strlen(current_read_text)] = tailgate_0;
- }
+ if(rec1) rec1->raw_result->selected_position += rec1->soft_clipping_movements;
+ if(rec2) rec2->raw_result->selected_position += rec2->soft_clipping_movements;
- if(second_char > 0)
- current_read_text[1] = second_char;
+}
- if(global_context->input_reads.is_paired_end_reads)
- {
- if(is_second_read)
- if(is_current_ok || is_mate_ok)
- {
- global_context -> all_mapped_reads++;
- best_read_id_HI++;
- }
- }
- else
- {
- if(is_current_ok)
- {
- best_read_id_HI++;
- global_context -> all_mapped_reads++;
- }
- }
- }
- }
- }
- free(additional_information);
- for(xk1 = 0; xk1 < CIGAR_PERFECT_SECTIONS; xk1++) free(out_cigars[xk1]);
- for(xk1 = 0; xk1 < CIGAR_PERFECT_SECTIONS; xk1++) free(out_mate_cigars[xk1]);
- return 0;
-}
-void init_chunk_scanning_parameters(global_context_t * global_context, thread_context_t * thread_context, gene_input_t ** ginp1, gene_input_t ** ginp2, unsigned int * read_block_start, unsigned int * reads_to_be_done)
+void init_chunk_scanning_parameters(global_context_t * global_context, thread_context_t * thread_context, gene_input_t ** ginp1, gene_input_t ** ginp2)
{
*ginp2 = NULL;
- *ginp1 = thread_context?thread_context->ginp1: & global_context->input_reads.first_read_file;
+ *ginp1 = &global_context->input_reads.first_read_file;
if(global_context->input_reads.is_paired_end_reads)
- *ginp2 = thread_context?thread_context->ginp2:& global_context->input_reads.second_read_file;
-
- *read_block_start = thread_context?thread_context->read_block_start:0;
- *reads_to_be_done = thread_context?thread_context->reads_to_be_done:global_context -> config.reads_per_chunk;
+ *ginp2 = &global_context->input_reads.second_read_file;
}
gene_value_index_t * find_current_value_index(global_context_t * global_context, unsigned int pos, int len)
@@ -2563,7 +1976,7 @@ gene_value_index_t * find_current_value_index(global_context_t * global_context,
return NULL;
}
//this function selects the correct all_value_indexes from global_context and put it to global_context or thread_context if thread_context is not NULL.
-int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, alignment_result_t * result, int rlen)
+int locate_current_value_index(global_context_t * global_context, thread_context_t * thread_context, mapping_result_t * result, int rlen)
{
int block_no;
@@ -2603,44 +2016,37 @@ int locate_current_value_index(global_context_t * global_context, thread_context
int do_iteration_one(global_context_t * global_context, thread_context_t * thread_context)
{
- unsigned int reads_to_be_done = 0, read_block_start = 0;
int ret;
gene_input_t * ginp1 = NULL , * ginp2 = NULL;
- unsigned int current_read_number;
+ subread_read_number_t current_read_number=0;
char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
int read_len_1, read_len_2=0;
- int need_junction_step = global_context -> config.is_rna_seq_reads || global_context -> config.do_fusion_detection;
+ int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
int sqr_interval, sqr_read_number = 0;
- init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2, & read_block_start, & reads_to_be_done);
- sqr_interval = global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads;
+ init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
+ sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
- for(current_read_number = read_block_start; current_read_number < reads_to_be_done + read_block_start ; current_read_number++)
+ while(1)
{
int is_second_read;
sqr_read_number++;
- ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 1);
+ ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 1, ¤t_read_number);
+ if(current_read_number < 0) break;
// if no more reads
- if(ret)
- break;
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
{
int best_read_id, is_reversed_already = 0;
for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
{
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
-
- // if(strcmp("a4", read_name_1) == 0)
- // printf("IDR=%u VOT=%d PAIR#=%u\n", current_result->selected_position, current_result->selected_votes, current_read_number);
-
-
+ mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
if(current_result -> selected_votes<1) break;
- if(!global_context->config.report_multi_mapping_reads)if(current_result -> result_flags & CORE_IS_BREAKEVEN) continue;
+ //if(!global_context->config.report_multi_mapping_reads)if(current_result -> result_flags & CORE_IS_BREAKEVEN) continue;
char * current_read = is_second_read?read_text_2 : read_text_1;
char * current_qual = is_second_read?qual_text_2 : qual_text_1;
@@ -2666,7 +2072,7 @@ int do_iteration_one(global_context_t * global_context, thread_context_t * threa
find_new_indels(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
if(need_junction_step)
- find_new_junctions(global_context, thread_context, current_read_number, current_read, current_qual, current_rlen, is_second_read, best_read_id);
+ find_new_junctions(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
}
}
@@ -2678,6 +2084,7 @@ int do_iteration_one(global_context_t * global_context, thread_context_t * threa
sqr_read_number = 0;
}
}
+ bigtable_release_result(global_context, thread_context, current_read_number, 1);
}
@@ -2694,10 +2101,9 @@ int finish_iteration_three(global_context_t * global_context, thread_context_t *
}
int do_iteration_three(global_context_t * global_context, thread_context_t * thread_context)
{
- unsigned int reads_to_be_done = 0, read_block_start = 0;
int ret;
gene_input_t * ginp1 = NULL , * ginp2 = NULL;
- unsigned int current_read_number;
+ subread_read_number_t current_read_number=0;
char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
@@ -2707,17 +2113,16 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
//unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
print_in_box(80,0,0,"Prepare for long indel deleteion...");
- init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2, & read_block_start, & reads_to_be_done);
- sqr_interval = global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads;
+ init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
+ sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
- for(current_read_number = read_block_start; current_read_number < reads_to_be_done + read_block_start ; current_read_number++)
+ while(1)
{
int is_second_read;
sqr_read_number++;
- ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 1);
- if(ret)
- break;
+ ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 1, ¤t_read_number);
+ if(ret) break;
int best_read_id = 0;
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
@@ -2725,10 +2130,18 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
int is_reversed_already = 0;
for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
{
- alignment_result_t * current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
- alignment_result_t * mate_result = _global_retrieve_alignment_ptr(global_context, current_read_number,!is_second_read, best_read_id);
+ mapping_result_t current_result_body;
+ mapping_result_t mate_result_body;
+
+ mapping_result_t * current_result = ¤t_result_body, *mate_result = NULL;
+ bigtable_readonly_result(global_context, NULL, current_read_number, best_read_id, is_second_read, current_result, NULL);
- if(best_read_id && (current_result->selected_votes <1)) break;
+ if(global_context -> input_reads.is_paired_end_reads){
+ mate_result = &mate_result_body;
+ bigtable_readonly_result(global_context, NULL, current_read_number, best_read_id,!is_second_read, mate_result, NULL);
+ }
+
+ //if(best_read_id && (current_result->selected_votes <1)) break;
char * current_read_name = is_second_read?read_name_2 : read_name_1;
char * current_read = is_second_read?read_text_2 : read_text_1;
@@ -2736,12 +2149,10 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
int current_rlen = is_second_read?read_len_2:read_len_1;
int mate_rlen = is_second_read?read_len_1:read_len_2;
- //if(!is_ambiguous_voting(global_context , current_read_number, is_second_read, current_result -> selected_votes, current_result -> confident_coverage_start, current_result -> confident_coverage_end, current_rlen, (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0))
+ if((current_result->result_flags & CORE_IS_FULLY_EXPLAINED) || (global_context -> input_reads.is_paired_end_reads &&(mate_result -> result_flags & CORE_IS_FULLY_EXPLAINED)))
{
- // do local reassambly
- // a potential long-indel read has to have minimum supporting subreads, but not as many as total_subread - 1
-
- if(current_result->selected_votes >= global_context->config.minimum_subread_for_first_read)
+ //SUBREADprintf("BUILD BLOCK FOR READ_%d '%s' BEST=%d\n", is_second_read + 1, current_read_name , best_read_id);
+ if(current_result->result_flags & CORE_IS_FULLY_EXPLAINED)
{
int is_negative_strand = ((current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0);
@@ -2752,22 +2163,10 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
is_reversed_already=!is_reversed_already;
}
- build_local_reassembly(global_context , thread_context , current_read_number, current_read_name , current_read , current_qual , current_rlen, 0 , is_second_read, best_read_id, 0);
-
- }
- else if(global_context -> input_reads.is_paired_end_reads && is_result_in_PE(current_result) && current_result -> selected_votes >= global_context->config.minimum_subread_for_second_read)
- {
- int is_negative_strand = (current_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0;
- if(is_negative_strand + is_reversed_already==1)
- {
- reverse_read(current_read, current_rlen, global_context->config.space_type);
- reverse_quality(current_qual, current_rlen);
- is_reversed_already=!is_reversed_already;
- }
+ build_local_reassembly(global_context , thread_context , current_read_number, current_read_name , current_read , current_qual , current_rlen, 0 , is_second_read, best_read_id, 0, current_result, mate_result);
- build_local_reassembly(global_context , thread_context , current_read_number, current_read_name , current_read , current_qual , current_rlen , 0, is_second_read, best_read_id, 0);
}
- else if(global_context -> input_reads.is_paired_end_reads && mate_result -> selected_votes >= global_context->config.minimum_subread_for_first_read)
+ else if(global_context -> input_reads.is_paired_end_reads && (mate_result -> result_flags & CORE_IS_FULLY_EXPLAINED))
{
int is_negative_strand = ((mate_result -> result_flags & CORE_IS_NEGATIVE_STRAND)?1:0);
if(is_negative_strand+is_reversed_already==1)
@@ -2777,7 +2176,7 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
is_reversed_already=!is_reversed_already;
}
- build_local_reassembly(global_context , thread_context , current_read_number , current_read_name , current_read , current_qual , current_rlen, mate_rlen , is_second_read, best_read_id, 1);
+ build_local_reassembly(global_context , thread_context , current_read_number , current_read_name , current_read , current_qual , current_rlen, mate_rlen , is_second_read, best_read_id, 1, current_result, mate_result);
}
}
}
@@ -2798,61 +2197,251 @@ int do_iteration_three(global_context_t * global_context, thread_context_t * thr
}
+void add_realignment_event_support(global_context_t * global_context , realignment_result_t * res){
+ int xk1;
+
+ for(xk1 = 0; xk1 < MAX_EVENTS_IN_READ ; xk1++){
+ chromosome_event_t *sup = res -> supporting_chromosome_events[xk1];
+ if(!sup)break;
+ sup -> final_counted_reads ++;
+ sup -> junction_flanking_left = max(sup -> junction_flanking_left, res -> flanking_size_left[xk1]);
+ sup -> junction_flanking_right = max(sup -> junction_flanking_right, res -> flanking_size_right[xk1]);
+ }
+}
+
+void test_PE_and_same_chro_align(global_context_t * global_context , realignment_result_t * res1, realignment_result_t * res2, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * rname);
+void write_realignments_for_fragment(global_context_t * global_context, thread_context_t * thread_context, subread_output_context_t * out_context, unsigned int read_number, realignment_result_t * res1, realignment_result_t * res2, char * read_name_1, char * read_name_2, char * read_text_1, char * read_text_2, char * qual_text_1, char * qual_text_2 , int rlen1 , int rlen2, int multi_mapping_number, int this_multi_mapping_i, int non_informative_subreads_r1, int non_informative_subreads_r2){
+
+ int is_2_OK = 0, is_1_OK = 0;
+
+
+ if(res1){
+ is_1_OK = convert_read_to_tmp(global_context , out_context, read_number, 0, rlen1, read_text_1, qual_text_1, res1, out_context -> r1, read_name_1);
+ if(is_1_OK) add_realignment_event_support(global_context, res1);
+ }
+ if(res2){
+ is_2_OK = convert_read_to_tmp(global_context , out_context, read_number, 1, rlen2, read_text_2, qual_text_2, res2, out_context -> r2, read_name_2);
+ if(is_2_OK) add_realignment_event_support(global_context, res2);
+ }
+
+ mapping_result_t * raw_r1 = NULL;
+ subread_output_tmp_t * r1_output = NULL;
+
+ mapping_result_t * raw_r2 = NULL;
+ subread_output_tmp_t * r2_output = NULL;
+
+ if(res1){
+ raw_r1 = res1 -> mapping_result;
+ r1_output = out_context -> r1;
+ }
+
+ if(res2){
+ raw_r2 = res2 -> mapping_result;
+ r2_output = out_context -> r2;
+ }
+
+ write_single_fragment(global_context, thread_context, r1_output, res1, r2_output, res2, multi_mapping_number , this_multi_mapping_i , read_name_1, read_name_2, rlen1, rlen2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_number, non_informative_subreads_r1, non_informative_subreads_r2);
+
+}
+
+
+void clear_repeated_buffer(global_context_t * global_context, unsigned int * repeated_buffer_position, char ** repeated_buffer_cigar, int * repeated_count){
+ (*repeated_count) = 0;
+}
+int add_repeated_buffer(global_context_t * global_context, unsigned int * repeated_buffer_position, char ** repeated_buffer_cigar, int * repeated_count, realignment_result_t * res1, realignment_result_t * res2){
+ int x1, is_repeated = 0;
+ char * r1_cigar = "*";
+ unsigned int r1_pos = 0;
+
+ if(res1){
+ r1_cigar = res1 -> cigar_string;
+ r1_pos = res1 -> first_base_position;
+ }
+
+ char * r2_cigar = "*";
+ unsigned int r2_pos = 0;
+
+ if(res2){
+ r2_cigar = res2 -> cigar_string;
+ r2_pos = res2 -> first_base_position;
+ }
+
+ for(x1 = 0; x1 < (*repeated_count); x1 += 2){
+ //if(strcmp(r1_cigar, "48M2769N53M")==0)
+ // SUBREADprintf("KYKY %u=%u %s=%s\n" , r1_pos, repeated_buffer_position[x1], r1_cigar , repeated_buffer_cigar[x1]);
+ if(repeated_buffer_position[x1] == r1_pos && repeated_buffer_position[x1+1] == r2_pos)
+ if(strcmp(repeated_buffer_cigar[x1], r1_cigar) == 0 && strcmp(repeated_buffer_cigar[x1+1], r2_cigar) == 0)
+ {
+ is_repeated = 1;
+ break;
+ }
+ }
+
+ if( (!is_repeated) && (*repeated_count) < MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads){
+
+
+ //if(strcmp(r1_cigar, "48M2769N53M")==0)
+ // SUBREADprintf("CPCP %u=%u %s=%s\n" , r1_pos, repeated_buffer_position[x1], r1_cigar , repeated_buffer_cigar[x1]);
+ repeated_buffer_position[*repeated_count] = r1_pos;
+ repeated_buffer_position[1 + *repeated_count] = r2_pos;
+ strcpy(repeated_buffer_cigar[*repeated_count], r1_cigar);
+ strcpy(repeated_buffer_cigar[1 + *repeated_count], r2_cigar);
+ (*repeated_count) +=2;
+ }
+
+ return is_repeated;
+}
int do_iteration_two(global_context_t * global_context, thread_context_t * thread_context)
{
- unsigned int reads_to_be_done = 0, read_block_start = 0;
int ret;
gene_input_t * ginp1 = NULL , * ginp2 = NULL;
- unsigned int current_read_number;
+ subread_read_number_t current_read_number=0;
char read_text_1[MAX_READ_LENGTH+1], read_text_2[MAX_READ_LENGTH+1];
char qual_text_1[MAX_READ_LENGTH+1], qual_text_2[MAX_READ_LENGTH+1];
+
+ char raw_read_text_1[MAX_READ_LENGTH+1], raw_read_text_2[MAX_READ_LENGTH+1];
+ char raw_qual_text_1[MAX_READ_LENGTH+1], raw_qual_text_2[MAX_READ_LENGTH+1];
char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
+ char * repeated_buffer_cigars[MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads];
+ unsigned int repeated_buffer_pos[MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads];
+ int repeated_count;
int read_len_1, read_len_2=0;
int sqr_interval, sqr_read_number=0;
+ int * final_MATCH_buffer1, *final_MISMATCH_buffer1;
+ int * final_MATCH_buffer2, *final_MISMATCH_buffer2, non_informative_subreads_r1=0, non_informative_subreads_r2=0;
+ int * final_realignment_index1, *final_realignment_index2;
+ unsigned int *final_realignment_number;
+ unsigned long long * final_SCORE_buffer;
+ realignment_result_t * final_realignments;
+ subread_output_context_t out_context;
+
+ init_output_context(global_context, &out_context);
+
+ for(repeated_count = 0;repeated_count < MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads ; repeated_count ++ ){
+ repeated_buffer_cigars[repeated_count] = malloc(2*CORE_MAX_CIGAR_STR_LEN);
+ }
+
+ final_MATCH_buffer1 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+ final_MISMATCH_buffer1 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+ final_realignment_index1 = malloc(sizeof(int) * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+
+ final_MATCH_buffer2 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+ final_MISMATCH_buffer2 = malloc(sizeof(int) * 2 * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+ final_realignment_index2 = malloc(sizeof(int) * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR);
+
+ final_SCORE_buffer = malloc(sizeof(long long) * global_context -> config.multi_best_reads * global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR*MAX_ALIGNMENT_PER_ANCHOR);
+ final_realignments = malloc(sizeof(realignment_result_t) * global_context -> config.multi_best_reads * 2 * MAX_ALIGNMENT_PER_ANCHOR);
+ final_realignment_number = malloc(sizeof(int) * global_context -> config.multi_best_reads * 2);
+
+ mapping_result_t * r1_align_result_buffer, * r2_align_result_buffer;
+ subjunc_result_t * r1_subjunc_result_buffer, * r2_subjunc_result_buffer;
+
+ r1_align_result_buffer = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
+ r2_align_result_buffer = malloc(sizeof(mapping_result_t) * global_context -> config.multi_best_reads);
+
+ r1_subjunc_result_buffer = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
+ r2_subjunc_result_buffer = malloc(sizeof(subjunc_result_t) * global_context -> config.multi_best_reads);
+
//unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
//unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
- init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2, & read_block_start, & reads_to_be_done);
- sqr_interval = global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads;
+ init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
+ sqr_interval = max(5000,global_context -> processed_reads_in_chunk/10/ global_context -> config.all_threads);
+
- for(current_read_number = read_block_start; current_read_number < reads_to_be_done + read_block_start ; current_read_number++)
+ while(1)
{
int is_second_read;
+ int max_votes;
sqr_read_number++;
- ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2,1);
+ ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, 0, ¤t_read_number);
+ strcpy(raw_read_text_1, read_text_1);
+ strcpy(raw_qual_text_1, qual_text_1);
+
+ if(global_context -> input_reads.is_paired_end_reads){
+ strcpy(raw_read_text_2, read_text_2);
+ strcpy(raw_qual_text_2, qual_text_2);
+ }
+
+ if(global_context->config.space_type == GENE_SPACE_COLOR){
+ if(isalpha(read_text_1[0])){
+ //SUBREADprintf("FIRST : %s\n\n", read_text_1);
+ int xk1;
+ for(xk1=2; read_text_1[xk1]; xk1++){
+ read_text_1[xk1-2] = read_text_1[xk1];
+ }
+ read_text_1[xk1-2] = 0;
+ }
+
+ if(global_context -> input_reads.is_paired_end_reads){
+ //SUBREADprintf("SECOND: %s\n\n", read_text_2);
+ if(isalpha(read_text_2[0])){
+ int xk1;
+ for(xk1=1; read_text_2[xk1]; xk1++){
+ read_text_2[xk1-1] = read_text_2[xk1];
+ }
+ read_text_2[xk1-1] = 0;
+ }
+ }
+ }
+
+ if(raw_qual_text_1[0] && global_context -> config.phred_score_format == FASTQ_PHRED64){
+ fastq_64_to_33(raw_qual_text_1);
+ if(global_context->input_reads.is_paired_end_reads)
+ fastq_64_to_33(raw_qual_text_2);
+ }
+
+ if(current_read_number < 0) break;
// if no more reads
- if(ret)
- break;
+ if( global_context -> input_reads.is_paired_end_reads)
+ max_votes = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0, 0)->selected_votes, _global_retrieve_alignment_ptr(global_context, current_read_number, 1, 0)->selected_votes);
+ else max_votes = _global_retrieve_alignment_ptr(global_context, current_read_number, 0, 0)->selected_votes;
+
+
int best_read_id=0;
+
+ //memset(final_MATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
+ //memset(final_MISMATCH_buffer, 0, sizeof(int) * 2 * global_context -> config.multi_best_reads);
+ int r1_candidate_locations = 0, r2_candidate_locations = 0;
+
+ clear_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count);
for (is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
{
- int is_reversed_already = 0;
+ char * current_read_name = is_second_read?read_name_2 : read_name_1;
+ int is_reversed_already = 0, realignment_i;
+ int * current_candidate_locations = is_second_read?&r2_candidate_locations:&r1_candidate_locations;
+
+ int * current_MATCH_buffer = is_second_read?final_MATCH_buffer2:final_MATCH_buffer1;
+ int * current_MISMATCH_buffer = is_second_read?final_MISMATCH_buffer2:final_MISMATCH_buffer1;
+ int * current_realignment_index = is_second_read?final_realignment_index2:final_realignment_index1;
+
for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
{
- alignment_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
- if(best_read_id > 0 && current_result -> selected_votes==0)break;
- if(!global_context->config.report_multi_mapping_reads)if(current_result -> result_flags & CORE_IS_BREAKEVEN) continue;
+ mapping_result_t *current_result = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read, best_read_id);
+ if(best_read_id == 0){
+ if(is_second_read)
+ non_informative_subreads_r2 = current_result -> noninformative_subreads_in_vote;
+ else
+ non_informative_subreads_r1 = current_result -> noninformative_subreads_in_vote;
+ }
- char * current_read_name = is_second_read?read_name_2 : read_name_1;
char * current_read = is_second_read?read_text_2 : read_text_1;
char * current_qual = is_second_read?qual_text_2 : qual_text_1;
int current_rlen = is_second_read?read_len_2:read_len_1;
-
- if(current_result->selected_votes < global_context->config.minimum_subread_for_second_read)
+ if(current_result->selected_votes < global_context->config.minimum_subread_for_second_read || max_votes < global_context->config.minimum_subread_for_first_read)
{
- //SUBREADprintf("RESET0 %d %d\n", current_result->selected_votes);
- current_result->selected_votes = 0;
- current_result -> final_mismatched_bases = 0;
+ // if(current_read_number == 111 || current_read_number == 112)
+ // SUBREADprintf("RESET0 [%d] R_%d SEL=%d MAX=%d\n", current_read_number, is_second_read + 1, current_result->selected_votes, max_votes);
+ current_result -> selected_votes = 0;
continue;
}
if(locate_current_value_index(global_context, thread_context, current_result, current_rlen))
{
- current_result->selected_votes = 0;
- current_result -> final_mismatched_bases = 0;
+ current_result -> selected_votes = 0;
//SUBREADprintf("RESET1 Read position excesses index boundary.\n");
continue;
}
@@ -2866,9 +2455,245 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
is_reversed_already = !is_reversed_already;
}
- explain_read(global_context, thread_context, current_read_number, current_rlen, current_read_name, current_read, current_qual, is_second_read, best_read_id, is_negative_strand);
+ current_result -> result_flags &= ~CORE_IS_FULLY_EXPLAINED;
+
+ unsigned int final_alignments = explain_read(global_context, thread_context , final_realignments + (is_second_read + 2 * best_read_id) * MAX_ALIGNMENT_PER_ANCHOR,
+ current_read_number, current_rlen, current_read_name, current_read, current_qual, is_second_read, best_read_id, is_negative_strand);
+
+ if(0 && FIXLENstrcmp("R000000359", read_name_1)==0)
+ SUBREADprintf("Final alignments=%d, cand = %d , FLAG=%d, final_MATCH=%d\n", final_alignments , (* current_candidate_locations), current_result -> result_flags & CORE_IS_FULLY_EXPLAINED , final_realignments[0].final_matched_bases);
+
+ final_realignment_number[ best_read_id * 2 + is_second_read ] = final_alignments;
+
+ for(realignment_i = 0 ; realignment_i < final_alignments ; realignment_i ++){
+ if((* current_candidate_locations) >= global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR) break;
+
+ int realign_index = (is_second_read + 2 * best_read_id)* MAX_ALIGNMENT_PER_ANCHOR + realignment_i;
+ int final_MATCH = final_realignments[realign_index].final_matched_bases;
+
+ if((current_result -> result_flags & CORE_IS_FULLY_EXPLAINED) && final_MATCH>0) {
+ int final_MISMATCH = final_realignments[realign_index].final_mismatched_bases;
+
+ current_MATCH_buffer[*current_candidate_locations] = final_MATCH;
+ current_MISMATCH_buffer[*current_candidate_locations] = final_MISMATCH;
+ current_realignment_index[*current_candidate_locations] = realign_index;
+ (*current_candidate_locations) ++;
+ }
+ }
+ }
+ }
+
+ if(0 && FIXLENstrcmp("R000000359", read_name_1)==0)
+ SUBREADprintf("Candidate locations = %d (%d), %d (%d)\n", r1_candidate_locations, final_MATCH_buffer1[0] , r2_candidate_locations, final_MATCH_buffer2[0]);
+
+ //if(161430 <= current_read_number) SUBREADprintf("LOC1=%d, LOC2=%d\n", r1_candidate_locations, r2_candidate_locations);
+
+ int output_cursor = 0;
+ if(r2_candidate_locations == 0 || r1_candidate_locations == 0) {
+ int is_second_read, highest_score_occurence =0;
+ for(is_second_read = 0; is_second_read < 1+ global_context -> input_reads.is_paired_end_reads; is_second_read++)
+ {
+ int current_candidate_locations = is_second_read?r2_candidate_locations:r1_candidate_locations;
+ if(current_candidate_locations > 0){
+
+ int * current_MATCH_buffer = is_second_read?final_MATCH_buffer2:final_MATCH_buffer1;
+ int * current_MISMATCH_buffer = is_second_read?final_MISMATCH_buffer2:final_MISMATCH_buffer1;
+ int * current_realignment_index = is_second_read?final_realignment_index2:final_realignment_index1;
+
+ unsigned int best_score_highest = 0, read_record_i;
+ unsigned int scores_array [global_context -> config.multi_best_reads * MAX_ALIGNMENT_PER_ANCHOR];
+
+ for(read_record_i = 0; read_record_i < current_candidate_locations; read_record_i++){
+ realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
+ mapping_result_t *current_result = current_realignment_result -> mapping_result;
+ assert(current_result -> result_flags & CORE_IS_FULLY_EXPLAINED);
+
+ unsigned int this_MATCH = current_MATCH_buffer[read_record_i];
+ unsigned int this_MISMATCH = current_MISMATCH_buffer[read_record_i];
+ unsigned int this_SCORE;
+
+ if(global_context -> config.experiment_type == CORE_EXPERIMENT_DNASEQ){
+ this_SCORE = this_MATCH * 100000 + (10000 - this_MISMATCH);
+ }else{
+ this_SCORE = 100000 * (10000 - this_MISMATCH) + this_MATCH;
+ }
+
+
+ if(0 && FIXLENstrcmp("R:chrX:52790377:100M:J0", read_name_1)==0)
+ SUBREADprintf("%s, %d-th read [%d] : MAT=%d, MISMA=%d, SCORE=%u, BEST_SCORE=%u\n", read_name_1, is_second_read+1, read_record_i , this_MATCH, this_MISMATCH, this_SCORE, best_score_highest );
+
+
+ best_score_highest = max(best_score_highest, this_SCORE);
+ scores_array[read_record_i] = this_SCORE;
+ }
+
+ for(read_record_i = 0; read_record_i < current_candidate_locations ; read_record_i++){
+ realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
+ if( scores_array[read_record_i] >= best_score_highest && (current_realignment_result -> realign_flags & CORE_TOO_MANY_MISMATCHES)==0)
+ {
+ int is_repeated = 0;
+
+ is_repeated = add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, is_second_read?NULL:current_realignment_result , is_second_read?current_realignment_result:NULL);
+
+ if(is_repeated)
+ scores_array[read_record_i] = 0;
+ else{
+ highest_score_occurence ++;
+ //if(161436 == current_read_number)SUBREADprintf("ADD_HIGH for %d (%p)\n",read_record_i, current_realignment_result);
+ }
+ }
+ }
+
+ if(highest_score_occurence<2 || global_context -> config.report_multi_mapping_reads){
+
+ highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
+ for(read_record_i = 0; read_record_i < current_candidate_locations ; read_record_i++){
+ realignment_result_t * current_realignment_result = final_realignments + current_realignment_index[read_record_i];
+
+ if( scores_array[read_record_i] >= best_score_highest && (current_realignment_result -> realign_flags & CORE_TOO_MANY_MISMATCHES)==0
+ && output_cursor < global_context -> config.reported_multi_best_reads){
+ strcpy(read_text_1, raw_read_text_1);
+ strcpy(read_text_2, raw_read_text_2);
+ strcpy(qual_text_1, raw_qual_text_1);
+ strcpy(qual_text_2, raw_qual_text_2);
+
+
+ //if(161430 <= current_read_number) SUBREADprintf("ALL_SE=%d, THIS_HIT=%d\n", highest_score_occurence, output_cursor);
+ //if(161436 == current_read_number)SUBREADprintf("DOUBLE_ADD_SE for %d (%p): %u %d/%d, BEST=%d\n", scores_array[read_record_i] , current_realignment_result, current_read_number, output_cursor , highest_score_occurence, best_score_highest);
+ if(is_second_read)
+ write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, NULL, current_realignment_result, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor, non_informative_subreads_r1, non_informative_subreads_r2);
+ else write_realignments_for_fragment(global_context, thread_context, &out_context , current_read_number, current_realignment_result, NULL, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor, non_informative_subreads_r1, non_informative_subreads_r2);
+ output_cursor ++;
+ }
+ }
+ assert(output_cursor >= highest_score_occurence - 1);
+ }
+ }
+ }
+ } else {
+ int r1_best_id, r2_best_id, highest_score_occurence = 0;
+ unsigned long long highest_score = 0;
+ for(r1_best_id = 0; r1_best_id < r1_candidate_locations; r1_best_id ++) {
+ int r1_matched = final_MATCH_buffer1[r1_best_id];
+ if(r1_matched < 1) continue;
+ realignment_result_t * realignment_result_R1 = final_realignments + final_realignment_index1[r1_best_id];
+ if(0 && FIXLENstrcmp("FINALQUAL R:chrX:52790377:100M:J0", read_name_1)==0)
+ SUBREADprintf("R1 MA=%d, MISMA=%d %u %s \n", realignment_result_R1->final_matched_bases, realignment_result_R1 -> final_mismatched_bases, realignment_result_R1 -> first_base_position, realignment_result_R1 -> cigar_string);
+
+ for(r2_best_id = 0; r2_best_id < r2_candidate_locations; r2_best_id ++) {
+ int r2_matched = final_MATCH_buffer2[r2_best_id];
+ if(r2_matched < 1) continue;
+
+ realignment_result_t * realignment_result_R2 = final_realignments + final_realignment_index2[r2_best_id];
+ if(0 && FIXLENstrcmp("FINALQUAL R:chrX:52790377:100M:J0", read_name_1)==0)
+ SUBREADprintf("R2 MA=%d, MISMA=%d %u %s\n", realignment_result_R2->final_matched_bases, realignment_result_R2 -> final_mismatched_bases, realignment_result_R2 -> first_base_position, realignment_result_R2 -> cigar_string);
+ int is_PE = 0;
+ int is_same_chro = 0;
+ unsigned long long final_SCORE = 0;
+
+ test_PE_and_same_chro_align(global_context , realignment_result_R1 , realignment_result_R2, &is_PE, &is_same_chro , read_len_1, read_len_2, read_name_1);
+
+ if(global_context -> config.experiment_type == CORE_EXPERIMENT_DNASEQ){
+ int weight;
+
+ //#warning " ============ USE THE FIRST THREE WEIGHTS! ======== "
+
+ if(is_PE)
+ weight = global_context -> config.PE_predominant_weight?300:120;
+ //weight = 300;
+ else if(is_same_chro)
+ weight = global_context -> config.PE_predominant_weight?5:100;
+ else weight = global_context -> config.PE_predominant_weight?3:80;
+ //weight = 30;
+ final_SCORE = weight * (final_MATCH_buffer1[r1_best_id] + final_MATCH_buffer2[r2_best_id]);
+ //#warning "=========== ADD BY YANG LIAO FOR MORE MAPPED READS WITH '-u' OPTION ================"
+ final_SCORE = final_SCORE * 1000llu + (final_MISMATCH_buffer1[r1_best_id] + final_MISMATCH_buffer2[r2_best_id]);
+
+
+ } else if (global_context -> config.experiment_type == CORE_EXPERIMENT_RNASEQ) {
+ int weight;
+ if(is_PE)
+ weight = 3000;
+ else if(is_same_chro)
+ weight = global_context -> config.PE_predominant_weight?10:1000;
+ else weight = global_context -> config.PE_predominant_weight?3:300;
+
+ //#warning "=========== ADD BY YANG LIAO ' + 2' ===================="
+ final_SCORE = 100000llu * weight / (final_MISMATCH_buffer1[r1_best_id] + final_MISMATCH_buffer2[r2_best_id] + 1 + 2);
+
+ //#warning "=========== ADD BY YANG LIAO FOR MORE MAPPED READS WITH '-u' OPTION ================"
+ final_SCORE = final_SCORE * 1000llu + (final_MATCH_buffer1[r1_best_id] + final_MATCH_buffer2[r2_best_id]);
+ } else assert(0);
+
+ assert(final_SCORE > 0);
+ final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads + r2_best_id] = final_SCORE;
+
+ if(0 && FIXLENstrcmp("FINALQUAL R:chrX:52790377:100M:J0", read_name_1)==0){
+ SUBREADprintf("Highest=%llu, This=%llu, Occurance=%d\n", highest_score , final_SCORE , highest_score_occurence);
+ }
+
+ if(final_SCORE > highest_score) {
+ highest_score_occurence = 1;
+ highest_score = final_SCORE;
+ clear_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count);
+ add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, realignment_result_R1, realignment_result_R2);
+ } else if(final_SCORE == highest_score) {
+ int is_repeat = 0;
+
+ if(global_context -> config.reported_multi_best_reads)
+ is_repeat = add_repeated_buffer(global_context, repeated_buffer_pos, repeated_buffer_cigars, &repeated_count, realignment_result_R1, realignment_result_R2);
+
+ if(is_repeat)
+ final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads + r2_best_id] = 0;
+ else
+ highest_score_occurence ++;
+ if(0 && FIXLENstrcmp("R000001161", read_name_1)==0)
+ SUBREADprintf("REPEAT OF %s: %d OCCURANCE AFT=%d\n", read_name_1, is_repeat, highest_score_occurence);
+ }
+ }
+ }
+
+ //SUBREADprintf("Highest score = %llu, Occurance = %d\n", highest_score , highest_score_occurence);
+ // Then, copy the (R1, R2) that have the highest score into the align_res buffer.
+ if(highest_score_occurence <= 1 || global_context -> config.report_multi_mapping_reads){
+ highest_score_occurence = min(highest_score_occurence, global_context -> config.reported_multi_best_reads);
+ for(r1_best_id = 0; r1_best_id < r1_candidate_locations; r1_best_id ++)
+ {
+ int r1_matched = final_MATCH_buffer1[r1_best_id];
+ if(r1_matched < 1) continue;
+
+ for(r2_best_id = 0; r2_best_id < r2_candidate_locations; r2_best_id ++)
+ {
+ int r2_matched = final_MATCH_buffer2[r2_best_id];
+ if(r2_matched < 1) continue;
+
+ if(final_SCORE_buffer[r1_best_id * global_context -> config.multi_best_reads + r2_best_id] == highest_score &&
+ output_cursor < global_context -> config.reported_multi_best_reads){
+ realignment_result_t * r1_realign = final_realignments + final_realignment_index1[r1_best_id];
+ realignment_result_t * r2_realign = final_realignments + final_realignment_index2[r2_best_id];
+
+ strcpy(read_text_1, raw_read_text_1);
+ strcpy(read_text_2, raw_read_text_2);
+ strcpy(qual_text_1, raw_qual_text_1);
+ strcpy(qual_text_2, raw_qual_text_2);
+
+ //if(161436 == current_read_number)SUBREADprintf("DOUBLE_ADD_PE: %u %d/%d\n", current_read_number, output_cursor , highest_score_occurence);
+ write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, r1_realign, r2_realign, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2, read_len_1, read_len_2, highest_score_occurence, output_cursor, non_informative_subreads_r1, non_informative_subreads_r2);
+ output_cursor ++;
+ }
+ }
+ }
+ assert(output_cursor >= highest_score_occurence - 1);
}
}
+
+ if(output_cursor<1) {
+ strcpy(read_text_1, raw_read_text_1);
+ strcpy(read_text_2, raw_read_text_2);
+ strcpy(qual_text_1, raw_qual_text_1);
+ strcpy(qual_text_2, raw_qual_text_2);
+ write_realignments_for_fragment(global_context, thread_context, &out_context, current_read_number, NULL, NULL, read_name_1, read_name_2, read_text_1, read_text_2, raw_qual_text_1, raw_qual_text_2, read_len_1, read_len_2, 0, 0, non_informative_subreads_r1, non_informative_subreads_r2);
+ }
if(!thread_context || thread_context->thread_id == 0)
{
@@ -2878,8 +2703,39 @@ int do_iteration_two(global_context_t * global_context, thread_context_t * threa
sqr_read_number=0;
}
}
+ bigtable_release_result(global_context, thread_context, current_read_number, 1);
+ }
+
+ free(final_realignments);
+ free(final_MATCH_buffer1);
+ free(final_MISMATCH_buffer1);
+ free(final_realignment_index1);
+
+ free(final_MATCH_buffer2);
+ free(final_MISMATCH_buffer2);
+ free(final_realignment_index2);
+
+ free(final_SCORE_buffer);
+
+ free(r1_align_result_buffer);
+ free(r1_subjunc_result_buffer);
+ free(r2_align_result_buffer);
+ free(r2_subjunc_result_buffer);
+
+ for(repeated_count = 0;repeated_count < MAX_ALIGNMENT_PER_ANCHOR * 2 * global_context -> config.reported_multi_best_reads ; repeated_count ++ ){
+ free(repeated_buffer_cigars[repeated_count]);
}
+ destroy_output_context(global_context, &out_context);
+
+ if(thread_context && 0 == thread_context -> thread_id){
+ while(1){
+ int all_finished = 0;
+ merge_buffered_output_file(global_context, 1, 0, &all_finished);
+ if(all_finished) break;
+ usleep(100);
+ }
+ }
return 0;
}
@@ -2900,15 +2756,13 @@ int core_get_subread_quality(global_context_t * global_context, thread_context_t
int do_voting(global_context_t * global_context, thread_context_t * thread_context)
{
- unsigned int reads_to_be_done = 0, read_block_start = 0;
int ret, xk1;
gene_input_t * ginp1 = NULL , * ginp2 = NULL;
- unsigned int current_read_number;
+ subread_read_number_t current_read_number=0;
char * read_text_1, * read_text_2;
char * qual_text_1, * qual_text_2;
char read_name_1[MAX_READ_NAME_LEN+1], read_name_2[MAX_READ_NAME_LEN+1];
int read_len_1, read_len_2=0;
- unsigned int processed_reads=0;
int min_first_read_votes = global_context -> config.minimum_subread_for_first_read;
int voting_max_indel_length = min(16, global_context->config.max_indel_length);
int sqr_interval=10000, sqr_read_number = 0;
@@ -2930,29 +2784,30 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
return 1;
}
- init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2, & read_block_start, & reads_to_be_done);
+ init_chunk_scanning_parameters(global_context,thread_context, & ginp1, & ginp2);
unsigned int low_index_border = global_context -> current_value_index -> start_base_offset;
unsigned int high_index_border = global_context -> current_value_index -> start_base_offset + global_context -> current_value_index -> length;
int has_second_read = 1 + global_context -> input_reads.is_paired_end_reads;
- //int need_junction_step = global_context -> config.is_rna_seq_reads || global_context -> config.do_fusion_detection;
+ //int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
if(thread_context)
thread_context -> current_value_index = global_context -> current_value_index;
int GENE_SLIDING_STEP = global_context -> current_index -> index_gap;
+ int need_junction_step = global_context -> config.do_breakpoint_detection || global_context -> config.do_fusion_detection;
- for(current_read_number = read_block_start; current_read_number < reads_to_be_done + read_block_start ; current_read_number++)
+ while(1)
{
int is_second_read;
int subread_no;
int is_reversed, applied_subreads = 0, v1_all_subreads=0, v2_all_subreads=0;
- ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2,1);
- if(ret)
- break;
+ ret = fetch_next_read_pair(global_context, thread_context, ginp1, ginp2, &read_len_1, &read_len_2, read_name_1, read_name_2, read_text_1, read_text_2, qual_text_1, qual_text_2,1, ¤t_read_number);
+ //SUBREADprintf("DO_VOTE:%llu\n", current_read_number);
+ if(current_read_number < 0) break;
- //printf("%s\t%d\n%s\t%d\n", read_name_1, thread_context -> thread_id, read_name_2, thread_context -> thread_id);
+ //SUBREADprintf("RL=%d,%d\n", read_len_1, read_len_2);
for(is_reversed = 0; is_reversed<2; is_reversed++)
{
@@ -2992,8 +2847,8 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
unsigned int current_high_border = high_index_border - current_rlen;
- if(global_context->config.is_rna_seq_reads && current_rlen > EXON_LONG_READ_LENGTH && global_context->config.all_threads<2)
- core_fragile_junction_voting(global_context, thread_context, current_read, current_qual, current_rlen, is_reversed, global_context->config.space_type, low_index_border, current_high_border, vote_fg);
+ if(global_context->config.do_breakpoint_detection && current_rlen > EXON_LONG_READ_LENGTH)//&& global_context->config.all_threads<2)
+ core_fragile_junction_voting(global_context, thread_context, read_name_1, current_read, current_qual, current_rlen, is_reversed, global_context->config.space_type, low_index_border, current_high_border, vote_fg);
if(global_context->config.SAM_extra_columns)
{
@@ -3022,6 +2877,7 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
char * subread_string = current_read + subread_offset;
gehash_key_t subread_integer = genekey2int(subread_string, global_context->config.space_type);
+ //SUBREADprintf("The %d-th subread=%s\n", subread_no, subread_string);
if(global_context -> config.use_quality_score_break_ties)
@@ -3031,8 +2887,7 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
}
-
- //SUBREADprintf("%d ", subread_offset);
+ //SUBREADprintf("%d=%u %s\n", subread_offset, subread_integer, subread_string);
if(global_context->config.is_methylation_reads)
gehash_go_q_CtoT(global_context->current_index, subread_integer , subread_offset, current_rlen, is_reversed, current_vote, 1, subread_quality, 0xffffff, voting_max_indel_length, subread_no, 1, low_index_border, high_index_border - current_rlen);
else
@@ -3058,10 +2913,13 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
short max_noninformative_subreads = -1;
for(xk1=0;xk1<GENE_SLIDING_STEP;xk1++)
+ {
+ //SUBREADprintf("NON-INF [%d] = %d\n", xk1, noninformative_subreads_for_each_gap[xk1]);
if(noninformative_subreads_for_each_gap[xk1] > max_noninformative_subreads)
{
max_noninformative_subreads = noninformative_subreads_for_each_gap[xk1];
}
+ }
current_vote -> noninformative_subreads = max_noninformative_subreads;
}
@@ -3071,16 +2929,14 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
if(is_reversed==1 || !global_context->config.do_fusion_detection)
{
- //if(strcmp(read_name_1,"b1")==0)
-
- //if(current_read_number == 119) {
- // SUBREADprintf("NOINF=%d\n", vote_1 -> noninformative_subreads );
- // #warning =============== COMMENT THIS LINE!!!! ======================
- // print_votes(vote_1, global_context -> config.index_prefix);
- //}
- // print_votes(vote_2, global_context -> config.index_prefix);
- //}
+ if(0 && FIXLENstrcmp("R001135677", read_name_1) ==0 )
+ {
+ SUBREADprintf(">>>%llu<<<\n%s [%d] %s\n%s [%d] %s\n", current_read_number, read_name_1, read_len_1, read_text_1, read_name_2, read_len_2, read_text_2);
+ SUBREADprintf(" ======= PAIR %s = %llu ; NON_INFORMATIVE = %d, %d =======\n", read_name_1, current_read_number, vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
+ print_votes(vote_1, global_context -> config.index_prefix);
+ print_votes(vote_2, global_context -> config.index_prefix);
+ }
//finalise_vote(vote_1);
/*
@@ -3091,15 +2947,21 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
}*/
//if(global_context -> input_reads.is_paired_end_reads) finalise_vote(vote_2);
+ //SUBREADprintf("NON-INFORMARTIVE=%d, %d\n", vote_1 -> noninformative_subreads, vote_2 -> noninformative_subreads);
+
if(global_context -> input_reads.is_paired_end_reads)
process_voting_junction(global_context, thread_context, current_read_number, vote_1, vote_2, read_name_1, read_name_2, read_text_1, read_text_2, read_len_1, read_len_2, is_reversed, v1_all_subreads, v2_all_subreads);
else{
if(vote_1->max_vote >= min_first_read_votes)
process_voting_junction(global_context, thread_context, current_read_number, vote_1, vote_2, read_name_1, NULL , read_text_1, NULL, read_len_1, 0, is_reversed, v1_all_subreads, 0);
- else if(_global_retrieve_alignment(global_context, current_read_number, 0,0).selected_votes < 1)
+ else if(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0) -> selected_votes < 1)
{
- _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->used_subreads_in_vote = max(_global_retrieve_alignment(global_context, current_read_number, 0,0).used_subreads_in_vote, applied_subreads);
- _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote = max(_global_retrieve_alignment(global_context, current_read_number, 0,0).noninformative_subreads_in_vote, vote_1 -> noninformative_subreads);
+ mapping_result_t * allll = _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0);
+ allll -> noninformative_subreads_in_vote = 0;
+
+
+ _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->used_subreads_in_vote = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->used_subreads_in_vote, applied_subreads);
+ _global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote = max(_global_retrieve_alignment_ptr(global_context, current_read_number, 0,0)->noninformative_subreads_in_vote, vote_1 -> noninformative_subreads);
}
}
@@ -3119,14 +2981,67 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
reverse_quality(qual_text_2, read_len_2);
}
}
+ else if(0)
+ {
+ mapping_result_t * current_result_1 = _global_retrieve_alignment_ptr (global_context, current_read_number, 0,0);
+ mapping_result_t * current_result_2 = _global_retrieve_alignment_ptr (global_context, current_read_number, 1,0);
+
+ SUBREADprintf("FIN R %s : V=%d , %d",read_name_1, current_result_1 -> selected_votes, current_result_2 -> selected_votes);
+ }
+
+ if(is_reversed==1)
+ bigtable_release_result(global_context, thread_context, current_read_number, 1);
+
+ }
+
+ int read_1_reversed = 1;
+ int read_2_reversed = 1;
+ int best_read_id;
+
+ if(global_context -> is_final_voting_run){
+ for(is_second_read = 0; is_second_read < 1 + global_context -> input_reads.is_paired_end_reads; is_second_read ++)
+ {
+ int * this_read_has_reversed = is_second_read ? &read_2_reversed:&read_1_reversed;
+ char * current_read_name = is_second_read?read_name_2 : read_name_1;
+ char * current_read = is_second_read?read_text_2 : read_text_1;
+ char * current_qual = is_second_read?qual_text_2 : qual_text_1;
+ int current_rlen = is_second_read?read_len_2:read_len_1;
+
+ for(best_read_id = 0; best_read_id < global_context -> config.multi_best_reads; best_read_id++)
+ {
+ mapping_result_t * current_r = _global_retrieve_alignment_ptr(global_context, current_read_number, is_second_read,best_read_id);
+ if(current_r -> selected_votes < 1) continue;
+
+ int this_read_should_be_reversed = (current_r -> result_flags & CORE_IS_NEGATIVE_STRAND) ? 1:0;
+
+ //SUBREADprintf("DETECT INDEL: should_reverse = %d, this_has_reversed = %d\n", this_read_should_be_reversed, *this_read_has_reversed);
+
+ if(this_read_should_be_reversed != (*this_read_has_reversed))
+ {
+ (*this_read_has_reversed) = !(*this_read_has_reversed);
+ reverse_read(current_read, current_rlen, global_context->config.space_type);
+ if(current_qual)
+ reverse_quality(current_qual , current_rlen);
+ }
+
+ gene_value_index_t * curr_val_index = thread_context? thread_context -> current_value_index: global_context -> current_value_index;
+ locate_current_value_index(global_context, thread_context, current_r, current_rlen);
+ //#warning "==== UNCOMMENT THE NEXT THREE LINES ===="
+ find_new_indels(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
+ if(need_junction_step)
+ find_new_junctions(global_context, thread_context, current_read_number, current_read_name, current_read, current_qual, current_rlen, is_second_read, best_read_id);
+ if(thread_context) thread_context -> current_value_index = curr_val_index;
+ else global_context -> current_value_index = curr_val_index;
+ }
+ }
}
if(!thread_context || thread_context->thread_id == 0)
{
if(sqr_read_number > sqr_interval)
{
- show_progress(global_context, thread_context, processed_reads, STEP_VOTING);
+ show_progress(global_context, thread_context, current_read_number, STEP_VOTING);
sqr_read_number = 0;
unsigned long long total_file_size = global_context -> input_reads.first_read_file_size;
unsigned long long guessed_all_reads = total_file_size / global_context -> input_reads . avg_read_length;// / (1+global_context -> config.is_SAM_file_input);
@@ -3135,18 +3050,10 @@ int do_voting(global_context_t * global_context, thread_context_t * thread_conte
}
-
-
sqr_read_number++;
- processed_reads++;
}
- if(thread_context)
- thread_context -> processed_reads_in_chunk = processed_reads;
- else
- global_context -> processed_reads_in_chunk = processed_reads;
-
free(vote_1);
free(vote_2);
free(vote_fg);
@@ -3186,9 +3093,16 @@ void * run_in_thread(void * pthread_param)
//sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_DETAILS, "finished running %d", task);
+ if(thread_context)
+ thread_context -> is_finished = 1;
+
return NULL;
}
+void finalise_buffered_output_file(global_context_t *global_context){
+// merge_buffered_output_file(global_context, 0 , 0);
+}
+
int run_maybe_threads(global_context_t *global_context, int task)
{
void * thr_parameters [5];
@@ -3197,9 +3111,9 @@ int run_maybe_threads(global_context_t *global_context, int task)
if(task==STEP_VOTING)
print_in_box(80,0,0, "Map %s...", global_context->input_reads.is_paired_end_reads?"fragments":"reads");
else if(task == STEP_ITERATION_ONE)
- print_in_box(80,0,0, "Detect indels%s...", global_context->config.is_rna_seq_reads?" and junctions":"");
+ print_in_box(80,0,0, "Detect indels%s...", global_context->config.do_breakpoint_detection?" and junctions":"");
else if(task == STEP_ITERATION_TWO)
- print_in_box(80,0,0, "Realign %s...", global_context->input_reads.is_paired_end_reads?"fragments":"reads");
+ print_in_box(80,0,0, "Finish the %'llu %s...", global_context -> processed_reads_in_chunk, global_context->input_reads.is_paired_end_reads?"fragments":"reads");
if(global_context->config.all_threads<2)
{
@@ -3217,15 +3131,18 @@ int run_maybe_threads(global_context_t *global_context, int task)
thread_context_t thread_contexts[64];
int ret_values[64];
+ memset(thread_contexts, 0, sizeof(thread_context_t)*64);
+ global_context -> all_thread_contexts = thread_contexts;
+ if(task == STEP_ITERATION_TWO)
+ global_context -> last_written_fragment_number = 0;
+
for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++)
{
thread_contexts[current_thread_no].thread_id = current_thread_no;
init_indel_thread_contexts(global_context, thread_contexts+current_thread_no, task);
- if(global_context->config.is_rna_seq_reads || global_context->config.do_fusion_detection)
+ if(global_context->config.do_breakpoint_detection || global_context->config.do_fusion_detection)
init_junction_thread_contexts(global_context, thread_contexts+current_thread_no, task);
- relocate_geinputs(global_context, thread_contexts+current_thread_no);
-
subread_lock_occupy(&global_context -> thread_initial_lock);
thr_parameters[0] = global_context;
thr_parameters[1] = thread_contexts+current_thread_no;
@@ -3236,32 +3153,20 @@ int run_maybe_threads(global_context_t *global_context, int task)
pthread_create(&thread_contexts[current_thread_no].thread, NULL, run_in_thread, &thr_parameters);
}
- if(task == STEP_VOTING)
- {
- global_context -> processed_reads_in_chunk=0;
- }
for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++)
{
pthread_join(thread_contexts[current_thread_no].thread, NULL);
- geinput_close(thread_contexts[current_thread_no].ginp1);
- free(thread_contexts[current_thread_no].ginp1);
-
- if(global_context->input_reads.is_paired_end_reads)
- {
- geinput_close(thread_contexts[current_thread_no].ginp2);
- free(thread_contexts[current_thread_no].ginp2);
- }
-
ret_value += *(ret_values + current_thread_no);
if(ret_value)break;
+ }
+ if(STEP_ITERATION_TWO == task)
+ finalise_buffered_output_file(global_context);
- if(task == STEP_VOTING)
- {
- //sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_DEBUG, "The %d-th thread processed %u reads.", current_thread_no , thread_contexts[current_thread_no].processed_reads_in_chunk);
- global_context -> processed_reads_in_chunk += thread_contexts[current_thread_no].processed_reads_in_chunk;
- }
-
+ for(current_thread_no = 0 ; current_thread_no < global_context->config.all_threads ; current_thread_no ++)
+ {
+ if(thread_contexts[current_thread_no].output_buffer_item > 0)
+ SUBREADprintf("ERROR: UNFINISHED OUTPUT!\n");
finalise_indel_thread(global_context, thread_contexts+current_thread_no, task);
finalise_junction_thread(global_context, thread_contexts+current_thread_no, task);
}
@@ -3274,139 +3179,37 @@ int run_maybe_threads(global_context_t *global_context, int task)
void clean_context_after_chunk(global_context_t * context)
{
- memset(context -> chunk_alignment_records , 0 , sizeof(alignment_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
- memset(context -> big_margin_record , 0 , sizeof(*context -> big_margin_record) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.big_margin_record_size);
- if(context ->chunk_subjunc_records)
- memset(context ->chunk_subjunc_records , 0 , sizeof(subjunc_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
+ context -> running_processed_reads_in_chunk = 0;
+ context -> processed_reads_in_chunk = 0;
+ init_bigtable_results(context, 1);
+
+ indel_context_t * indel_context = (indel_context_t *)context -> module_contexts[MODULE_INDEL_ID];
+ chromosome_event_t * event_space = indel_context -> event_space_dynamic;
+
+ int event_id;
+ //memset(context -> big_margin_record , 0 , sizeof(*context -> big_margin_record) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.big_margin_record_size);
+ for(event_id = 0; event_id < indel_context->total_events; event_id++){
+ chromosome_event_t * event_body = event_space + event_id;
+ event_body -> critical_read_id = 0xffffffffffffffffllu;
+ }
}
#define SKIP_CORE_NOEMPTY(fp_loc, buf_loc) { while(1){char *ret_loc = fgets(buf_loc, 3000, fp_loc); if(buf_loc[0]!='\n' || !ret_loc) break; } }
-unsigned int split_read_files(global_context_t * global_context)
-{
- unsigned int chunk_reads = global_context->config.reads_per_chunk;
- unsigned int processed_reads = 0;
- unsigned long long * read_position_1;
- unsigned long long * read_position_2 = NULL;
- char * read_line_buf = malloc(3002);
- char * read_line_buf2 = malloc(3002);
- read_position_1 = (unsigned long long*)malloc(global_context->config.reads_per_chunk * sizeof(long long));
- if(global_context->input_reads.is_paired_end_reads)
- read_position_2 = (unsigned long long*)malloc(global_context->config.reads_per_chunk * sizeof(long long));
-
- print_in_box(80,0,0, "Scan read files for multi-threaded alignment...");
-
- if(global_context->config.is_SAM_file_input)
- {
- unsigned long long fhead_pos1;
- unsigned long long fhead_pos2=0;
-
- while(1)
- {
- char * tok_tmp = NULL, * flag, *flag2;
- if(processed_reads >= chunk_reads || feof(global_context->input_reads.first_read_file.input_fp))
- break;
-
- fhead_pos1 = ftello(global_context->input_reads.first_read_file.input_fp);
- if(global_context->input_reads.is_paired_end_reads)
- fhead_pos2 = ftello(global_context->input_reads.second_read_file.input_fp);
-
- fgets(read_line_buf, 3000, global_context->input_reads.first_read_file.input_fp);
- if(global_context->input_reads.is_paired_end_reads)
- fgets(read_line_buf2, 3000, global_context->input_reads.second_read_file.input_fp);
-
- flag = strtok_r(read_line_buf,"\t",&tok_tmp);
- if(!flag) break;
-
- flag = strtok_r(NULL,"\t",&tok_tmp);
- if(!flag) break;
-
-
- int flagi2 = 0;
- if(global_context->input_reads.is_paired_end_reads)
- {
- flag2 = strtok_r(read_line_buf2,"\t",&tok_tmp);
- flag2 = strtok_r(NULL,"\t",&tok_tmp);
- if(!flag2) break;
- flagi2 = atoi(flag2);
- }
-
- int flagi1 = atoi(flag);
-
- if((flagi1 & 0x100) == 0 && (flagi2&0x100) == 0)
- {
- read_position_1[processed_reads] = fhead_pos1;
- if(global_context->input_reads.is_paired_end_reads)
- read_position_2[processed_reads] = fhead_pos2;
- processed_reads++;
- }
- if(global_context->input_reads.is_paired_end_reads)
- {
- fgets(read_line_buf, 3000, global_context->input_reads.first_read_file.input_fp);
- fgets(read_line_buf2, 3000, global_context->input_reads.second_read_file.input_fp);
- }
- }
- //printf("PPPP=%llu\n", processed_reads);
-
- }
- else{
- while(1)
- {
- if(processed_reads >= chunk_reads || feof(global_context->input_reads.first_read_file.input_fp))
- break;
-
- read_position_1[processed_reads] = ftello(global_context->input_reads.first_read_file.input_fp);
- if(global_context->input_reads.is_paired_end_reads)
- read_position_2[processed_reads] = ftello(global_context->input_reads.second_read_file.input_fp);
-
- processed_reads++;
-
- geinput_jump_read(&global_context->input_reads.first_read_file);
- if(global_context->input_reads.is_paired_end_reads)
- geinput_jump_read(&global_context->input_reads.second_read_file);
- }
- }
-
- free(read_line_buf);
- free(read_line_buf2);
-
- int thread_no;
- for(thread_no = 0; thread_no < global_context->config.all_threads; thread_no++)
- {
- unsigned int my_start_read_no = processed_reads / global_context->config.all_threads * thread_no;
- unsigned int my_reads = (thread_no == global_context->config.all_threads-1)?(processed_reads - my_start_read_no):(processed_reads / global_context->config.all_threads);
- unsigned long long my_first_file_start = read_position_1[my_start_read_no];
- unsigned long long my_second_file_start = 0;
- if(global_context->input_reads.is_paired_end_reads)
- my_second_file_start = read_position_2[my_start_read_no];
-
- global_context -> input_reads.first_file_blocks[thread_no] = my_first_file_start;
- global_context -> input_reads.reads_in_blocks[thread_no] = my_reads;
- global_context -> input_reads.start_read_number_blocks[thread_no] = my_start_read_no;
-
- if(global_context->input_reads.is_paired_end_reads)
- global_context -> input_reads.second_file_blocks[thread_no] = my_second_file_start;
- }
-
- free(read_position_1);
- if(read_position_2)
- free(read_position_2);
- return processed_reads;
-}
-
void locate_read_files(global_context_t * global_context, int type)
{
if(type==SEEK_SET)
{
- global_context -> current_circle_start_position_file1 = ftello(global_context -> input_reads.first_read_file.input_fp);
+ global_context -> current_circle_start_abs_offset_file1 = geinput_file_offset(&(global_context -> input_reads.first_read_file));
+ geinput_tell(&global_context -> input_reads.first_read_file, &global_context -> current_circle_start_position_file1);
if(global_context ->input_reads.is_paired_end_reads)
- global_context -> current_circle_start_position_file2 = ftello(global_context -> input_reads.second_read_file.input_fp);
+ geinput_tell(&global_context -> input_reads.second_read_file, &global_context -> current_circle_start_position_file2);
}
else
{
- global_context -> current_circle_end_position_file1 = ftello(global_context -> input_reads.first_read_file.input_fp);
+ geinput_tell(&global_context -> input_reads.first_read_file, &global_context -> current_circle_end_position_file1);
if(global_context ->input_reads.is_paired_end_reads)
- global_context -> current_circle_end_position_file2 = ftello(global_context -> input_reads.second_read_file.input_fp);
+ geinput_tell(&global_context -> input_reads.second_read_file, &global_context -> current_circle_end_position_file2);
}
}
@@ -3414,17 +3217,18 @@ void reward_read_files(global_context_t * global_context, int type)
{
if(type==SEEK_SET)
{
- fseeko(global_context -> input_reads.first_read_file.input_fp, global_context -> current_circle_start_position_file1, SEEK_SET);
+ geinput_seek(&global_context -> input_reads.first_read_file, & global_context -> current_circle_start_position_file1);
if(global_context ->input_reads.is_paired_end_reads)
- fseeko(global_context -> input_reads.second_read_file.input_fp, global_context -> current_circle_start_position_file2, SEEK_SET);
+ geinput_seek(&global_context -> input_reads.second_read_file, & global_context -> current_circle_start_position_file2);
}
else
{
- fseeko(global_context -> input_reads.first_read_file.input_fp, global_context -> current_circle_end_position_file1, SEEK_SET);
+ geinput_seek(&global_context -> input_reads.first_read_file, & global_context -> current_circle_end_position_file1);
if(global_context ->input_reads.is_paired_end_reads)
- fseeko(global_context -> input_reads.second_read_file.input_fp, global_context -> current_circle_end_position_file2, SEEK_SET);
+ geinput_seek(&global_context -> input_reads.second_read_file, & global_context -> current_circle_end_position_file2);
}
+ global_context -> running_processed_reads_in_chunk=0;
}
@@ -3434,104 +3238,103 @@ int read_chunk_circles(global_context_t *global_context)
// printf("GINP1 AT %llu\n", ftello(global_context -> input_reads.first_read_file.input_fp));
+ unsigned int chunk_no = 0;
+
+ global_context -> current_index = (gehash_t*) malloc(sizeof(gehash_t));
+ global_context -> current_value_index = global_context -> all_value_indexes;
+ global_context -> running_processed_reads_in_chunk=0;
+ global_context -> processed_reads_in_chunk=0;
+
+ double time_load_index = miltime();
+ for(block_no = 0; block_no< global_context->index_block_number; block_no++)
+ {
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, block_no, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
+ if(gvindex_load(&global_context -> all_value_indexes[block_no], tmp_fname)) return -1;
+ }
+ double period_load_index = miltime() - time_load_index;
+ global_context -> timecost_load_index += period_load_index;
+
while(1)
{
int ret;
locate_read_files(global_context, SEEK_SET);
- if(global_context -> config.all_threads>1)
- {
- split_read_files(global_context);
- locate_read_files(global_context, SEEK_END);
- reward_read_files(global_context, SEEK_SET);
- }
-
- global_context -> current_index = (gehash_t*) malloc(sizeof(gehash_t));
- global_context -> current_value_index = (gene_value_index_t*) malloc(sizeof(gene_value_index_t));
for(global_context->current_index_block_number = 0; global_context->current_index_block_number < global_context->index_block_number; global_context->current_index_block_number++)
{
char tmp_fname[MAX_FILE_NAME_LENGTH];
- sprintf(tmp_fname, "%s.%02d.%c.tab", global_context->config.index_prefix, global_context->current_index_block_number, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
- print_in_box(80,0,0, "Load the %d-th index block...",1+ global_context->current_index_block_number);
-
-
- if(gehash_load(global_context -> current_index, tmp_fname)) return -1;
+ time_load_index = miltime();
+ if(global_context->index_block_number > 1 || chunk_no == 0)
+ {
+ sprintf(tmp_fname, "%s.%02d.%c.tab", global_context->config.index_prefix, global_context->current_index_block_number, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
+ print_in_box(80,0,0, "Load the %d-th index block...",1+ global_context->current_index_block_number);
- sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, global_context->current_index_block_number, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
- if(gvindex_load(global_context -> current_value_index, tmp_fname)) return -1;
+ if(gehash_load(global_context -> current_index, tmp_fname)) return -1;
+ sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, global_context->current_index_block_number, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
+ }
+ period_load_index = miltime() - time_load_index;
+ global_context -> timecost_load_index += period_load_index;
+ global_context -> current_value_index = global_context -> all_value_indexes + global_context->current_index_block_number;
if(global_context->current_index_block_number ==0 && global_context -> all_processed_reads==0)
global_context->align_start_time = miltime();
+ if(global_context->index_block_number == global_context->current_index_block_number + 1)
+ global_context -> is_final_voting_run = 1;
+ else global_context -> is_final_voting_run = 0;
+
+ double time_start_voting = miltime();
ret = run_maybe_threads(global_context, STEP_VOTING);
+ double period_voting = miltime() - time_start_voting;
+ global_context -> timecost_voting += period_voting;
- if(global_context -> config.all_threads<2 && global_context->current_index_block_number ==0)
+ if(0 == global_context->current_index_block_number){
locate_read_files(global_context, SEEK_END);
+ global_context -> processed_reads_in_chunk = global_context -> running_processed_reads_in_chunk;
+ }
+
if(global_context->current_index_block_number < global_context->index_block_number -1)
reward_read_files(global_context, SEEK_SET);
- gehash_destory_fast(global_context -> current_index);
- gvindex_destory(global_context -> current_value_index);
+ int is_last_chunk = global_context -> processed_reads_in_chunk < global_context->config.reads_per_chunk;
+ //SUBREADprintf("LAST_CHUNK=%d, INDEX_BLOCKS=%d\n", is_last_chunk, global_context->index_block_number );
+
+ if(global_context->index_block_number > 1 || is_last_chunk)
+ gehash_destory_fast(global_context -> current_index);
+
if(ret) break;
if(!global_context -> processed_reads_in_chunk) break;
}
- free(global_context -> current_index);
- free(global_context -> current_value_index);
-
//sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "%d reads have been processed in this chunk.", global_context -> processed_reads_in_chunk);
// after the voting step, all subread index blocks are released and all base index blocks are loaded at once.
- for(block_no = 0; block_no< global_context->index_block_number; block_no++)
- {
- char tmp_fname[MAX_FILE_NAME_LENGTH];
- sprintf(tmp_fname, "%s.%02d.%c.array", global_context->config.index_prefix, block_no, global_context->config.space_type == GENE_SPACE_COLOR?'c':'b');
- if(gvindex_load(&global_context -> all_value_indexes[block_no], tmp_fname)) return -1;
- }
- if(!global_context -> processed_reads_in_chunk)
- // base value indexes loaded in the last circle are not destroyed and are used in writting the indel VCF.
- // the indexes will be destroyed in destroy_global_context
- break;
-
- reward_read_files(global_context, SEEK_SET);
- ret = run_maybe_threads(global_context, STEP_ITERATION_ONE);
+ //reward_read_files(global_context, SEEK_SET);
+ //ret = run_maybe_threads(global_context, STEP_ITERATION_ONE);
+ double time_before_realign = miltime();
ret = anti_supporting_read_scan(global_context);
-
- //HashTable * event_table = ((indel_context_t *)global_context -> module_contexts[MODULE_INDEL_ID])->event_entry_table;
- //sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "There are %ld elements in the indel table before filtering.", event_table ->numOfElements);
-
remove_neighbour(global_context);
- //sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO, "There are only %ld elements in the indel table after filtering.", event_table ->numOfElements);
-
reward_read_files(global_context, SEEK_SET);
+ double period_before_realign = miltime() - time_before_realign;
+ global_context -> timecost_before_realign += period_before_realign;
+
+ double time_realign = miltime();
ret = ret || run_maybe_threads(global_context, STEP_ITERATION_TWO);
+ double period_realign = miltime() - time_realign;
- // printf("IBytes=%d+%d = %d\n", global_context -> all_value_indexes[0].start_base_offset, global_context -> all_value_indexes[0].values_bytes, global_context -> all_value_indexes[0].values [global_context -> all_value_indexes[0].values_bytes-1]);
+ global_context -> timecost_for_realign += period_realign;
- //gene_value_index_t * value_index = &global_context->all_value_indexes[0] ;
-
- //printf("=== I=%016llX B=%016llX\n", (long long)value_index , (long long)value_index -> values);
if(global_context -> config.is_third_iteration_running)
{
reward_read_files(global_context, SEEK_SET);
ret = ret || do_iteration_three(global_context, NULL);
}
- if(global_context -> config.report_sam_file)
- {
- reward_read_files(global_context, SEEK_SET);
- print_in_box(80, 0, 0, "%u %s were processed. Save the mapping results for them...", global_context ->processed_reads_in_chunk, global_context -> input_reads.is_paired_end_reads?"fragments":"reads");
- ret = ret || write_chunk_results(global_context);
- if('\r' == CORE_SOFT_BR_CHAR)
- sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_INFO,"");
-
- }
-
reward_read_files(global_context, SEEK_END);
global_context -> all_processed_reads+= global_context ->processed_reads_in_chunk;
@@ -3539,17 +3342,16 @@ int read_chunk_circles(global_context_t *global_context)
if(ret) return ret;
if(global_context -> processed_reads_in_chunk < global_context->config.reads_per_chunk)
- break;
- else
// base value indexes loaded in the last circle are not destroyed and are used in writting the indel VCF.
// the indexes will be destroyed in destroy_global_context
- for(block_no = 0; block_no< global_context->index_block_number; block_no++)
- gvindex_destory(&global_context -> all_value_indexes[block_no]);
-
+ break;
clean_context_after_chunk(global_context);
+ chunk_no++;
}
+ free(global_context -> current_index);
+
// load all array index blocks at once.
if(global_context -> config.is_third_iteration_running)
{
@@ -3604,23 +3406,24 @@ void print_subread_logo()
int print_configuration(global_context_t * context)
{
+ setlocale(LC_NUMERIC, "");
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
print_subread_logo();
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
print_in_box(80, 1, 1, context->config.entry_program_name == CORE_PROGRAM_SUBJUNC?"subjunc setting":"subread-align setting");
print_in_box(80, 0, 1, "");
- if(context->config.is_rna_seq_reads)
+ if(context->config.do_breakpoint_detection)
{
if(context->config.do_fusion_detection)
{
- print_in_box(80, 0, 0, " Function : Read alignment + Junction/Fusion detection%s", context->config.prefer_donor_receptor_junctions?" (RNA-Seq)":" (DNA-Seq)");
+ print_in_box(80, 0, 0, " Function : Read alignment + Junction/Fusion detection%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
}
else
- print_in_box(80, 0, 0, " Function : Read alignment + Junction detection (RNA-Seq)");
+ print_in_box(80, 0, 0, " Function : Read alignment + Junction detection (%s)", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?"DNA-Seq":"RNA-Seq");
}
else
- print_in_box(80, 0, 0, " Function : Read alignment");
+ print_in_box(80, 0, 0, " Function : Read alignment%s", context->config.experiment_type == CORE_EXPERIMENT_DNASEQ?" (DNA-Seq)":" (RNA-Seq)");
print_in_box(80, 0, 0, " Threads : %d", context->config.all_threads);
if( context->config.second_read_file[0])
{
@@ -3641,6 +3444,7 @@ int print_configuration(global_context_t * context)
print_in_box(80, 0, 1, "");
if( context->config.second_read_file[0])
{
+ print_in_box(80, 0, 0, " All subreads : %d", context->config.total_subreads);
print_in_box(80, 0, 0, " Min read1 votes : %d", context->config.minimum_subread_for_first_read);
print_in_box(80, 0, 0, " Min read2 votes : %d", context->config.minimum_subread_for_second_read);
print_in_box(80, 0, 0, " Max fragment size : %d", context->config.maximum_pair_distance);
@@ -3648,8 +3452,9 @@ int print_configuration(global_context_t * context)
print_in_box(80, 0, 1, "");
}
else
- print_in_box(80, 0, 0, " Min votes : %d", context->config.minimum_subread_for_first_read);
+ print_in_box(80, 0, 0, " Min votes : %d / %d", context->config.minimum_subread_for_first_read, context->config.total_subreads);
+ print_in_box(80, 0, 0, " Allowed mismatch : %d bases", context->config.max_mismatch_exonic_reads);
print_in_box(80, 0, 0, " Max indels : %d", context->config.max_indel_length);
print_in_box(80, 0, 0, " # of Best mapping : %d", context->config.multi_best_reads);
print_in_box(80, 0, 0, " Unique mapping : %s", context->config.report_multi_mapping_reads?"no":"yes");
@@ -3666,6 +3471,12 @@ int print_configuration(global_context_t * context)
print_in_box(80, 2, 1, "http://subread.sourceforge.net/");
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"");
+
+ if(!context->config.experiment_type){
+ sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify the experiment type by using the '-t' option.\n");
+ return -1;
+ }
+
if(!context->config.first_read_file[0])
{
sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"You have to specify at least one input file in the FASTQ/FASTA/PLAIN format using the '-r' option.\n");
@@ -3698,29 +3509,15 @@ int print_configuration(global_context_t * context)
int init_paired_votes(global_context_t *context)
{
- if(context -> config.is_rna_seq_reads)
- context -> chunk_subjunc_records = malloc(sizeof(subjunc_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
- else context -> chunk_subjunc_records = NULL;
- context -> chunk_alignment_records = malloc(sizeof(alignment_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
-
+ init_bigtable_results(context, 0);
- if(!context -> chunk_alignment_records)
- {
- return 1;
- }
+ //context -> big_margin_record = malloc( sizeof(*context -> big_margin_record) * (context->input_reads.is_paired_end_reads?2:1) * context -> config.big_margin_record_size * context ->config.reads_per_chunk);
- context -> big_margin_record = malloc( sizeof(*context -> big_margin_record) * (context->input_reads.is_paired_end_reads?2:1) * context -> config.big_margin_record_size * context ->config.reads_per_chunk);
+ //memset(context ->big_margin_record , 0 , sizeof(*context -> big_margin_record) *context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context -> config.big_margin_record_size);
- memset(context ->big_margin_record , 0 , sizeof(*context -> big_margin_record) *context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context -> config.big_margin_record_size);
- memset(context ->chunk_alignment_records , 0 , sizeof(alignment_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
-
- //fprintf(stderr, "MALLOC=%llu = %d * %d * %d \n", sizeof(alignment_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads, sizeof(alignment_result_t), context ->config.reads_per_chunk, context->config.multi_best_reads);
+ //fprintf(stderr, "MALLOC=%llu = %d * %d * %d \n", sizeof(mapping_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads, sizeof(mapping_result_t), context ->config.reads_per_chunk, context->config.multi_best_reads);
//sleep(10000);
- if(context -> chunk_subjunc_records)
- memset(context ->chunk_subjunc_records , 0 , sizeof(subjunc_result_t) * context ->config.reads_per_chunk * (context->input_reads.is_paired_end_reads?2:1) * context->config.multi_best_reads);
-
-
return 0;
}
@@ -3772,6 +3569,7 @@ int load_global_context(global_context_t * context)
context -> is_phred_warning = 0;
+ subread_init_lock(&context->input_reads.input_lock);
if(core_geinput_open(context, &context->input_reads.first_read_file, 1,1))
{
//sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable to open '%s' as input. Please check if it exists, you have the permission to read it, and it is in the correct format.\n", context->config.first_read_file);
@@ -3798,33 +3596,66 @@ int load_global_context(global_context_t * context)
//sublog_printf(SUBLOG_STAGE_RELEASED, SUBLOG_LEVEL_ERROR,"Unable to open '%s' as input. Please check if it exists, you have the permission to read it, and it is in the correct format.\n", context->config.second_read_file);
return -1;
}
+
+ context -> config.max_vote_combinations = 3;
+ context -> config.multi_best_reads = 3;
+ context -> config.max_vote_simples = 64;
+ context -> config.max_vote_number_cutoff = 2;
+ }else{
+ context -> config.max_vote_combinations = 3;
+ context -> config.multi_best_reads = 3;
+ context -> config.max_vote_simples = 3;
+ context -> config.max_vote_number_cutoff = 2;
}
+ context -> config.multi_best_reads = max(context -> config.multi_best_reads , context -> config.reported_multi_best_reads);
+ context -> config.max_vote_simples = max(context -> config.max_vote_simples , context -> config.reported_multi_best_reads);
+ context -> config.max_vote_combinations = max(context -> config.max_vote_combinations , context -> config.reported_multi_best_reads);
- if(context->input_reads.is_paired_end_reads)
- context->config.reads_per_chunk = 7*1024*1024*min(40,max(0.01,context->config.memory_use_multiplex));
- else
- context->config.reads_per_chunk = 14*1024*1024*min(40,max(0.01,context->config.memory_use_multiplex));
+ if(context->config.reads_per_chunk > 384*1024*1024){
+ if(context->input_reads.is_paired_end_reads)
+ context->config.reads_per_chunk = 512*1024*1024*min(40,max(0.01,context->config.memory_use_multiplex));
+ else
+ context->config.reads_per_chunk = 1024*1024*1024*min(40,max(0.01,context->config.memory_use_multiplex));
+ }else{
+ if(context->input_reads.is_paired_end_reads) context->config.reads_per_chunk /= 2;
+ if(context->config.multi_best_reads>1) context->config.reads_per_chunk /= context->config.multi_best_reads;
+ //#warning "COMMENT NEXT LINE!!!!!!"
+ // context->config.reads_per_chunk /= 16;
+ }
struct stat ginp1_stat;
+ int guess_tested_reads = 0;
stat(context->config.first_read_file , &ginp1_stat);
context->input_reads.first_read_file_size = ginp1_stat.st_size;
- context -> input_reads.avg_read_length = guess_reads_density_format(context->config.first_read_file , context->config.is_SAM_file_input?1:0, &min_phred_score, &max_phred_score);
+ context -> input_reads.avg_read_length = guess_reads_density_format(context->config.first_read_file , context->config.is_SAM_file_input?1:0, &min_phred_score, &max_phred_score , &guess_tested_reads);
if(context -> input_reads.avg_read_length<0 )context -> input_reads.avg_read_length = 250;
// SUBREADprintf("QR=[%d,%d]; ALEN=%f\n", min_phred_score, max_phred_score, context -> input_reads.avg_read_length);
if(max_phred_score>=0)
{
- if((context->config.phred_score_format == FASTQ_PHRED64 && min_phred_score < 65) || (context->config.phred_score_format == FASTQ_PHRED33 && max_phred_score > 33+50))
+ int inferred_offset;
+
+ if(abs(min_phred_score - 33) < abs(min_phred_score - 64)) inferred_offset = 33;
+ else inferred_offset = 64;
+
+ if((context->config.phred_score_format == FASTQ_PHRED64 && inferred_offset == 33) ||
+ (context->config.phred_score_format == FASTQ_PHRED33 && inferred_offset == 64))
{
- print_in_box(80,0,0, "WARNING The specified phred-score offset (%d) seems to be incorrect.", context->config.phred_score_format == FASTQ_PHRED33?33:64);
- print_in_box(80,0,0, " The observed phred-score range is [%d,%d].", min_phred_score, max_phred_score);
+ print_in_box(80,0,0, "WARNING - The specified Phred score offset (%d) seems incorrect.", context->config.phred_score_format == FASTQ_PHRED33?33:64);
+ print_in_box(80,0,0, " ASCII values of the quality scores of read bases included in");
+ print_in_box(80,0,0, " the first %d reads were found to be within the range of",guess_tested_reads);
+ print_in_box(80,0,0, " [%d,%d].", min_phred_score, max_phred_score);
print_in_box(80,0,0, "");
context -> is_phred_warning = 1;
}
+ else{
+ print_in_box(80,0,0, "The range of Phred scores observed in the data is [%d,%d]", min_phred_score - inferred_offset, max_phred_score - inferred_offset);
+ }
}
+ subread_init_lock(&context -> output_lock);
if(context->config.report_sam_file && context -> config.output_prefix[0])
{
@@ -3841,6 +3672,8 @@ int load_global_context(global_context_t * context)
else
{
context -> output_sam_fp = f_subr_open(tmp_fname,"wb");
+ //context -> output_sam_inner_buffer = malloc(OUTPUT_BUFFER_SIZE);
+ //setvbuf (context -> output_sam_fp, context -> output_sam_inner_buffer, _IOFBF, OUTPUT_BUFFER_SIZE);
context -> output_bam_writer = NULL;
}
if((!context -> output_bam_writer) && (!context->output_sam_fp))
@@ -3895,6 +3728,7 @@ int load_global_context(global_context_t * context)
context->current_index_block_number = 0;
load_offsets(&context->chromosome_table, context->config.index_prefix);
+
if(context->config.report_sam_file)
write_sam_headers(context);
@@ -3924,7 +3758,7 @@ int init_modules(global_context_t * context)
{
sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: begin");
int ret = init_indel_tables(context);
- if(context->config.is_rna_seq_reads || context->config.do_fusion_detection)
+ if(context->config.do_breakpoint_detection || context->config.do_fusion_detection)
ret = ret || init_junction_tables(context);
sublog_printf(SUBLOG_STAGE_DEV1, SUBLOG_LEVEL_DEBUG, "init_modules: finished: %d",ret);
@@ -3934,7 +3768,7 @@ int init_modules(global_context_t * context)
int destroy_modules(global_context_t * context)
{
destroy_indel_module(context);
- if(context->config.is_rna_seq_reads)
+ if(context->config.do_breakpoint_detection || context->config.do_fusion_detection)
destroy_junction_tables(context);
return 0;
}
@@ -3947,25 +3781,25 @@ int destroy_global_context(global_context_t * context)
gvindex_destory(&context -> all_value_indexes[block_no]);
if(context->output_sam_fp)
- fclose(context->output_sam_fp);
+ {
+ fclose(context -> output_sam_fp);
+ // free(context -> output_sam_inner_buffer);
+ }
if(context->output_bam_writer)
{
SamBam_writer_close(context->output_bam_writer);
free(context->output_bam_writer);
context->output_bam_writer=NULL;
}
- free(context->chunk_alignment_records);
- free(context->big_margin_record);
- if(context->chunk_subjunc_records)
- free(context->chunk_subjunc_records);
+ //free(context->big_margin_record);
for(xk1=0; xk1<5; xk1++)
if(context->module_contexts[xk1])free(context->module_contexts[xk1]);
geinput_close(&context -> input_reads.first_read_file);
if(context->input_reads.is_paired_end_reads) geinput_close(&context -> input_reads.second_read_file);
destroy_offsets(&context->chromosome_table);
+ finalise_bigtable_results(context);
-
if((context -> will_remove_input_file & 1) && (memcmp(context ->config.first_read_file, "./core-temp", 11) == 0)) unlink(context ->config.first_read_file);
if((context -> will_remove_input_file & 2) && (memcmp(context ->config.second_read_file, "./core-temp", 11) == 0)) unlink(context ->config.second_read_file);
@@ -4029,7 +3863,7 @@ int write_bincigar_part(char * bincigar, int chropt, unsigned int optlen, int bi
}
// function returns the actual length of bincigar, or -1 if anything is wrong, e.g., bincigar_len is too short or unrecognized operations.
-int cigar2bincigar(char *cigar, char *bincigar, int bincigar_len)
+int OLD_cigar2bincigar(char *cigar, char *bincigar, int bincigar_len)
{
int xk1=0;
unsigned int tmpv=0, bincigar_cursor=0;
@@ -4114,9 +3948,11 @@ int write_cigar_part(char *bincigar, char *cigar, int cigar_len , int * bincigar
return added_len;
}
-int bincigar2cigar(char * cigar, int cigar_len, char * bincigar, int bincigar_max_len, int read_len)
+int OLD_bincigar2cigar(char * cigar, int cigar_len, char * bincigar, int bincigar_max_len, int read_len)
{
+
int cigar_cursor = 0, bincigar_cursor = 0;
+
while(1)
{
int bincigar_move = 0;
@@ -4152,80 +3988,13 @@ int term_strncpy(char * dst, char * src, int max_dst_mem)
return 0;
}
+void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, char * res){
+ char * ch;
+ unsigned int off;
+ locate_gene_position(pos, &global_context -> chromosome_table, & ch, &off);
-// This assumes the first part of Cigar has differet strandness to the main part of the cigar.
-// Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
-// The first base in the read actually has a larger coordinate than Pos.
-// new_cigar has to be at least 100 bytes.
-unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar)
-{
- int cigar_cursor = 0;
- new_cigar[0]=0;
- unsigned int tmpi=0;
- int last_piece_end = 0;
- int last_sec_start = 0;
- unsigned int chro_pos = pos, this_section_start = pos, ret = pos;
- int is_positive_dir = 0;
- int read_cursor = 0;
- int section_no = 0;
-
- for(cigar_cursor = 0 ; ; cigar_cursor++)
- {
- if( cigar [cigar_cursor] == 'n' || cigar [cigar_cursor] == 'b' || cigar [cigar_cursor] == 0)
- {
- int xk1, jmlen=0, nclen=strlen(new_cigar);
- char jump_mode [13];
-
- if(cigar [cigar_cursor] !=0)
- {
- sprintf(jump_mode, "%u%c", tmpi, cigar [cigar_cursor] == 'b'?'n':'b');
- jmlen = strlen(jump_mode);
- }
-
- for(xk1=nclen-1;xk1>=0; xk1--)
- new_cigar[ xk1 + last_piece_end + jmlen - last_sec_start ] = new_cigar[ xk1 ];
- new_cigar [nclen + jmlen + last_piece_end - last_sec_start ] = 0;
-
- memcpy(new_cigar , jump_mode, jmlen);
- memcpy(new_cigar + jmlen , cigar + last_sec_start, last_piece_end - last_sec_start);
-
- last_sec_start = cigar_cursor+1;
-
- if(is_positive_dir && cigar [cigar_cursor] !=0)
- {
- if(cigar [cigar_cursor] == 'b') chro_pos -= tmpi - read_cursor - 1;
- else chro_pos += tmpi - read_cursor - 1;
- }
- if((!is_positive_dir) && cigar [cigar_cursor] !=0)
- {
- if(cigar [cigar_cursor] == 'b') chro_pos = this_section_start - tmpi - read_cursor - 1;
- else chro_pos = this_section_start + tmpi - read_cursor - 1;
- }
-
- this_section_start = chro_pos;
-
- if(section_no == 0)
- ret = chro_pos;
-
- is_positive_dir = ! is_positive_dir;
- section_no++;
- tmpi=0;
- }
- else if(isalpha(cigar [cigar_cursor]))
- {
- if(cigar [cigar_cursor]=='M' || cigar [cigar_cursor] == 'S')
- read_cursor += tmpi;
- tmpi=0;
- last_piece_end = cigar_cursor+1;
- }
- else tmpi = tmpi*10 + (cigar [cigar_cursor] - '0');
-
- if(cigar [cigar_cursor] == 0)break;
- }
-
- //printf("REV CIGAR: %s => %s\n", cigar, new_cigar);
- return ret;
+ sprintf(res, "%s:%u", ch, off);
}
int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos, int is_first_section_negative_strand, int is_first_section_reversed, char * in_cigar, unsigned int * out_poses, char ** out_cigars, char * out_strands, int read_len, short * perfect_lens)
@@ -4243,7 +4012,7 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
int cigar_cursor;
- out_poses[0] = current_perfect_map_start;
+ out_poses[0] = current_perfect_map_start - (is_reversed?1:0);
out_strands[0] = is_negative?'-':'+';
char main_piece_strand = (is_first_section_negative_strand == is_first_section_reversed)?'+':'-';
@@ -4292,12 +4061,13 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
long long int dist = current_perfect_cursor;
dist -= jummped_location;
- if(abs(dist) >= 134217728)
+ if(abs(dist) >= global_context -> config.maximum_intron_length)
is_long_jump = 1;
// A long jump is the jump longer than 2^27.
// Picard does not like it!!
}
+ // is_long_jump is true only if the two sections are on different chromosomes.
if(is_chro_jump || islower(ncch) || ncch == 'B' || is_long_jump)
{
current_perfect_cursor = jummped_location;
@@ -4305,6 +4075,9 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
if(islower(ncch)){
is_reversed = !is_reversed;
is_negative = !is_negative;
+
+ if(is_reversed)
+ current_perfect_cursor --;
}
current_perfect_map_start = current_perfect_cursor;
@@ -4316,7 +4089,7 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
perfect_len = 0;
current_perfect_section_no++;
- if(current_perfect_section_no>CIGAR_PERFECT_SECTIONS)break;
+ if(current_perfect_section_no>=CIGAR_PERFECT_SECTIONS)break;
out_poses[current_perfect_section_no] = current_perfect_map_start - read_cursor;
out_strands[current_perfect_section_no] = is_negative?'-':'+';
@@ -4336,13 +4109,17 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
read_cursor += tmpi;
if(ncch == 'M')
perfect_len += tmpi;
- if(!is_reversed)
+ if(is_reversed)
+ out_poses[current_perfect_section_no] += tmpi;
+ else
current_perfect_cursor += tmpi;
tmpi = 0;
}
else if(ncch == 'D' || ncch == 'N')
{
- if(!is_reversed)
+ if(is_reversed)
+ out_poses[current_perfect_section_no] += tmpi;
+ else
current_perfect_cursor += tmpi;
tmpi = 0;
}
@@ -4461,3 +4238,90 @@ void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r)
{
merge_sort_run(arr, 0, arr_size, compare, exchange, merge);
}
+
+unsigned int calc_end_pos(unsigned int p, char * cigar, unsigned int * all_skipped_len){
+ unsigned int cursor = p, tmpi=0;
+ int nch, cigar_cursor;
+ for(cigar_cursor = 0; 0!=(nch = cigar[cigar_cursor]); cigar_cursor++){
+ if(isdigit(nch)){
+ tmpi = tmpi * 10 + (nch - '0');
+ }else{
+ if(nch == 'M' || nch == 'N' || nch == 'D'){
+ cursor += tmpi;
+ if(nch == 'N' || nch == 'D') *all_skipped_len += tmpi;
+ }
+ tmpi = 0;
+ }
+ }
+ return cursor;
+
+}
+
+void test_PE_and_same_chro_cigars(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * cigar1, char * cigar2, char *read_name){
+ char * r1_chr, * r2_chr;
+ unsigned int r1_pos, r2_pos;
+
+ (*is_same_chromosome) = 0;
+ (*is_PE_distance) = 0;
+
+ locate_gene_position(pos1, &global_context -> chromosome_table, & r1_chr, & r1_pos);
+ locate_gene_position(pos2, &global_context -> chromosome_table, & r2_chr, & r2_pos);
+
+
+ if(r1_chr == r2_chr){
+ unsigned int skip_1 = 0;
+ unsigned int skip_2 = 0;
+ unsigned int r1_end_pos = calc_end_pos(pos1, cigar1, &skip_1);
+ unsigned int r2_end_pos = calc_end_pos(pos2, cigar2, &skip_2);
+
+ unsigned int tlen = max(r1_end_pos, r2_end_pos) - min(pos1, pos2);
+ if(tlen > skip_1) tlen -= skip_1;
+ if(tlen > skip_2) tlen -= skip_2;
+
+ (*is_same_chromosome) = 1;
+
+ if(tlen >= global_context -> config.minimum_pair_distance && tlen <= global_context -> config.maximum_pair_distance)
+ (* is_PE_distance) = 1;
+ }
+}
+
+void test_PE_and_same_chro_align(global_context_t * global_context , realignment_result_t * res1, realignment_result_t * res2, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2, char * read_name){
+ return test_PE_and_same_chro_cigars(global_context, res1 -> first_base_position, res2 -> first_base_position, is_PE_distance, is_same_chromosome , read_len_1 , read_len_2, res1 -> cigar_string, res2 -> cigar_string, read_name);
+}
+
+
+
+void test_PE_and_same_chro(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome, int read_len_1, int read_len_2)
+{
+ char * r1_chr, * r2_chr;
+ unsigned int r1_pos, r2_pos;
+
+ locate_gene_position(pos1, &global_context -> chromosome_table, & r1_chr, & r1_pos);
+ locate_gene_position(pos2, &global_context -> chromosome_table, & r2_chr, & r2_pos);
+
+ (*is_same_chromosome) = 0;
+ (*is_PE_distance) = 0;
+
+ long long tlen = r1_pos;
+ tlen -= r2_pos;
+ tlen = abs(tlen);
+ tlen += (r1_pos > r2_pos)?read_len_1:read_len_2;
+ unsigned int tlenI = (unsigned int) tlen;
+
+ //SUBREADprintf("TEST PE: %p == %p , TLEN=%u\n", r1_chr, r2_chr, tlenI);
+
+ if(r1_chr == r2_chr){
+ (*is_same_chromosome) = 1;
+ if(tlenI >= global_context -> config.minimum_pair_distance && tlenI <= global_context -> config.maximum_pair_distance)
+ (* is_PE_distance) = 1;
+ }
+
+}
+
+int FIXLENstrcmp(char * fixed_len, char * rname){
+ int x=0;
+ for(; fixed_len[x]; x++){
+ if(rname[x]!=fixed_len[x]) return 1;
+ }
+ return 0;
+}
diff --git a/src/core.h b/src/core.h
index 016435b..dd819b6 100644
--- a/src/core.h
+++ b/src/core.h
@@ -52,30 +52,77 @@
//#define _global_retrieve_voting_context(global_context, pair_number) (global_context->chunk_vote_records[pair_number])
-#define _global_retrieve_alignment(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_alignment_records[(2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id]):(global_context -> chunk_alignment_records[pair_number*global_context->config.multi_best_reads + best_read_id])))
+//#define _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_alignment_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_alignment_records+pair_number*global_context->config.multi_best_reads + best_read_id)))
-#define _global_retrieve_alignment_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_alignment_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_alignment_records+pair_number*global_context->config.multi_best_reads + best_read_id)))
+//#define _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_subjunc_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_subjunc_records+pair_number*global_context->config.multi_best_reads + best_read_id)))
-#define _global_retrieve_subjunc_ptr(global_context, pair_number, is_second_read, best_read_id) ((global_context->input_reads.is_paired_end_reads?(global_context -> chunk_subjunc_records + (2*pair_number+is_second_read)* global_context->config.multi_best_reads + best_read_id):(global_context -> chunk_subjunc_records+pair_number*global_context->config.multi_best_reads + best_read_id)))
+//#define _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read) (is_second_read?(global_context->big_margin_record + (2*pair_number+is_second_read)* global_context->config.big_margin_record_size):(global_context->big_margin_record + pair_number * global_context->config.big_margin_record_size))
+
+#define mark_gapped_read(res) (res)-> result_flags|= CORE_IS_GAPPED_READ;
+
+#define CORE_MAX_CIGAR_STR_LEN 110
+#define CORE_ADDITIONAL_INFO_LENGTH 400
+
+
+typedef struct{
+ subread_read_number_t fragments;
+ unsigned int maximum_interval_length;
+ unsigned int expected_items;
+ HashTable * entry_table;
+} bucketed_table_t;
+
+typedef struct{
+ int capacity;
+ int items;
+ unsigned int keyed_bucket;
+ unsigned int maximum_interval_length;
+
+ unsigned int * positions;
+ void ** details;
+} bucketed_table_bucket_t;
+
+
+typedef struct{
+ unsigned long long capacity;
+ subread_read_number_t fragments;
+ subread_read_number_t * fragment_numbers;
+} fragment_list_t;
-#define _global_retrieve_big_margin_ptr(global_context,pair_number, is_second_read) (global_context->input_reads.is_paired_end_reads?(global_context->big_margin_record + (2*pair_number+is_second_read)* global_context->config.big_margin_record_size):(global_context->big_margin_record + pair_number * global_context->config.big_margin_record_size))
-#define mark_gapped_read(res) (res)-> result_flags|= CORE_IS_GAPPED_READ;
typedef struct{
int is_paired_end_reads;
gene_input_t first_read_file;
gene_input_t second_read_file;
unsigned long long first_read_file_size;
- unsigned long long first_file_blocks[64];
- unsigned long long second_file_blocks[64];
- unsigned int reads_in_blocks[64];
- unsigned int start_read_number_blocks[64];
+ subread_lock_t input_lock;
double avg_read_length;
} read_input_t;
+typedef struct{
+ char read_name[MAX_READ_NAME_LEN];
+ unsigned short flags;
+ char chro_name[MAX_CHROMOSOME_NAME_LEN];
+ unsigned int location;
+ unsigned short map_quality;
+ char cigar[CORE_MAX_CIGAR_STR_LEN];
+ char other_chro_name[MAX_CHROMOSOME_NAME_LEN];
+ unsigned int other_location;
+ long long int tlen;
+ int rlen;
+ char read_text[MAX_READ_LENGTH];
+ char qual_text[MAX_READ_LENGTH];
+ char additional_columns[CORE_ADDITIONAL_INFO_LENGTH];
+} output_read_buffer_t;
+typedef struct{
+ int fragment_number_in_chunk;
+ int multi_mapping_locations;
+ int this_mapping_location;
+ output_read_buffer_t r1;
+ output_read_buffer_t r2;
+} output_fragment_buffer_t;
typedef struct{
// running_scheme
@@ -83,9 +130,11 @@ typedef struct{
int is_first_iteration_running;
int is_second_iteration_running;
int is_third_iteration_running;
+ int use_memory_buffer;
float memory_use_multiplex;
char temp_file_prefix[MAX_FILE_NAME_LENGTH];
- unsigned int reads_per_chunk;
+ subread_read_number_t reads_per_chunk;
+ int fast_run;
// input_scheme
char first_read_file[MAX_FILE_NAME_LENGTH];
@@ -123,9 +172,14 @@ typedef struct{
int SAM_extra_columns;
int report_multiple_best_in_pairs;
unsigned int multi_best_reads;
+ unsigned int reported_multi_best_reads;
// basic voting
char index_prefix[MAX_FILE_NAME_LENGTH];
+ int top_scores;
+ int max_vote_combinations;
+ int max_vote_simples;
+ int max_vote_number_cutoff;
int total_subreads;
int minimum_subread_for_first_read;
int minimum_subread_for_second_read;
@@ -140,23 +194,28 @@ typedef struct{
int use_hamming_distance_break_ties;
int use_quality_score_break_ties;
int big_margin_record_size;
+ int PE_predominant_weight;
// subjunc
int entry_program_name;
- char is_rna_seq_reads;
- char do_big_margin_filtering_for_junctions;
- char do_big_margin_filtering_for_reads;
- char limited_tree_scan;
- char use_hamming_distance_in_exon;
+ int experiment_type;
+ int do_breakpoint_detection;
+ int do_big_margin_filtering_for_junctions;
+ int do_big_margin_filtering_for_reads;
+ int limited_tree_scan;
+ int use_hamming_distance_in_exon;
unsigned int maximum_intron_length;
int high_quality_base_threshold;
- char max_insertion_at_junctions;
- char check_donor_at_junctions;
+ int max_insertion_at_junctions;
+ int check_donor_at_junctions;
// subfusion
int do_fusion_detection;
+ int do_structural_variance_detection;
int prefer_donor_receptor_junctions;
int more_accurate_fusions;
+ int maximum_translocation_length;
+ int maximum_colocating_distance;
// indel
char do_superlong_indel_detection;
@@ -172,6 +231,7 @@ typedef struct{
int init_max_event_number;
int use_dynamic_programming_indel;
int use_bitmap_event_table;
+ int maximise_sensitivity_indel;
int flanking_subread_indel_mismatch;
int DP_penalty_create_gap;
int DP_penalty_extend_gap;
@@ -186,7 +246,9 @@ typedef struct{
#define CORE_IS_NEGATIVE_STRAND 8
#define CORE_IS_FULLY_EXPLAINED 16
#define CORE_IS_BREAKEVEN 32
-#define CORE_IS_GAPPED_READ 64
+#define CORE_IS_GAPPED_READ 64
+#define CORE_IS_PAIRED_END 128
+#define CORE_TOO_MANY_MISMATCHES 256
#define CORE_CIGAR_OPT_M 0
#define CORE_CIGAR_OPT_S 1
@@ -201,46 +263,121 @@ typedef struct{
#define CORE_PROGRAM_SUBJUNC 200
#define CORE_PROGRAM_SUBINDEL 1000
+#define CORE_EXPERIMENT_DNASEQ 1000
+#define CORE_EXPERIMENT_RNASEQ 2000
+
+#define PRINT_BOX_NOCOLOR_FOR_COLON 2
+#define PRINT_BOX_CENTER 1
+
+typedef struct{
+ unsigned int event_small_side; // the last base before the event
+ unsigned int event_large_side; // the first base after the event
+ // for exon-exon junctions, these two numbers are the last and first base in the exons. (ZERO-BASED INDEX)
+ // for indels, the smaller side is the base before in inserted/deleted bases; the larger side is the base after them.
+ // for a fusion, the two sides are the base before the fusion point (the fusion point is not a base, but a vertical bar between two bases -- two sides.)
+ //
+ //
+ // The 'V' marked bases are the two sides.
+ // V V
+ // Exon-exon junction: 100 .......s|gt........ag|s....... 300 chrX
+ // ^ ^ These two bars are the junction point.
+ //
+ //
+ //
+ // DNA-fusions:
+ //
+ // small_side ===> .|. <=== large_side normal arrangement (N/Y)
+ //
+ // small_side ===> .|
+ // large_side ===> .| strand jumpped (N/N)
+ //
+ // |. <=== small_side strand jumpped (Y/Y)
+ // |. <=== large_side
+ //
+ // ^ This is fusion point. It is not a base, but a vertical bar between two sides.
+ //
+ //
+ //
+ // note: the fusion point is larger than event_small_side if small_side_increasing_coordinate == N; it is smaller than event_small_side if small_side_increasing_coordinate == Y;
+ // the fusion point is smaller than event_large_side if large_side_increasing_coordinate == Y; it is larger than event_large_side if large_side_increasing_coordinate == N.
+
+ short indel_length;
+ short junction_flanking_left;
+ short junction_flanking_right;
+
+ unsigned char event_type;
+ char indel_at_junction;
+ char is_negative_strand; // this only works to junction detection, according to 'GT/AG' or 'CT/AC' donors. This only applys to junctions.
+ char is_strand_jumped; // "strand jumped" means that the left and right sides are on different strands. This only applys to fusions.
+ char is_donor_found; // only for junctions: GT/AG is found at the location.
+ // Also, if "is_strand_jumped" is true, all coordinates (e.g., splicing points, cover_start, cover_end, etc) are on "reversed read" view.
+
+ char small_side_increasing_coordinate;
+ char large_side_increasing_coordinate; // normal exon-exon junctions must have N and Y on small/large increasing coordinates.
+
+ //char is_ambiguous;
+ char connected_next_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "3I" will have 1 here .
+ char connected_previous_event_distance; // the distance (negative or positive) to the next event in the table. For example, if the cigar string is 10M3I1M1I10M, event "1I" will have 1 here.
+
+ //char inserted_bases[(1+MAX_INSERTION_LENGTH) / 4 + 1];
+ char * inserted_bases;
+ unsigned short supporting_reads;
+ unsigned short anti_supporting_reads;
+ unsigned short final_counted_reads;
+ unsigned short final_reads_mismatches;
+ unsigned int global_event_id;
+ float event_quality;
+
+ unsigned long long critical_read_id;
+ int critical_supporting_reads;
+} chromosome_event_t;
+
+
+
typedef struct
{
unsigned int selected_position;
+ short result_flags;
+ short read_length;
// 4 bytes
gene_vote_number_t selected_votes;
gene_vote_number_t used_subreads_in_vote;
unsigned char noninformative_subreads_in_vote;
- unsigned char final_quality;
// this coverage is the range on reads, in point of view of "main piece" strand (i.e., "is_negative_strand")
char indels_in_confident_coverage;
- char result_flags;
- // 12 bytes
- union{
- struct{
- gene_vote_number_t selected_indel_record [MAX_INDEL_SECTIONS*3 + 1];
- unsigned short confident_coverage_start;
- unsigned short confident_coverage_end;
- };
- char cigar_string[MAX_INDEL_SECTIONS * 3+5];
- };
- // 4x bytes
-
- union
- {
- unsigned long long Score_L;
- struct{
- short final_mismatched_bases;
- short best_second_diff_bases;
- };
- };
- // 48 bytes
- unsigned long long int Score_H;
- // 56 butes
-
-} alignment_result_t;
-
-#define CORE_MAX_CIGAR_LEN (MAX_INDEL_SECTIONS * 3+5)
-#define CORE_MAX_CIGAR_STR_LEN 110
-#define CORE_ADDITIONAL_INFO_LENGTH 400
+ char is_fully_covered;
+
+ gene_vote_number_t selected_indel_record [MAX_INDEL_SECTIONS*3 + 1];
+ unsigned short confident_coverage_start;
+ unsigned short confident_coverage_end;
+
+ short subread_quality;
+
+} mapping_result_t;
+
+typedef struct{
+ mapping_result_t * mapping_result;
+ unsigned int first_base_position;
+ char cigar_string[CORE_MAX_CIGAR_STR_LEN];
+ chromosome_event_t * supporting_chromosome_events[MAX_EVENTS_IN_READ];
+ short flanking_size_left[MAX_EVENTS_IN_READ];
+ short flanking_size_right[MAX_EVENTS_IN_READ];
+ char crirical_support[MAX_EVENTS_IN_READ];
+
+ char first_base_is_jumpped;
+ short final_mismatched_bases;
+ short final_matched_bases;
+ short best_second_diff_bases;
+ short realign_flags;
+ short final_quality;
+ short hamming_matched;
+
+} realignment_result_t;
+
+#define BUCKETED_TABLE_INIT_ITEMS 3
+#define FRAGMENT_LIST_INIT_ITEMS 3
+
typedef struct
{
@@ -248,6 +385,8 @@ typedef struct
gene_vote_number_t minor_votes;
char double_indel_offset;
char indel_at_junction;
+ char small_side_increasing_coordinate;
+ char large_side_increasing_coordinate;
unsigned int minor_position;
// this coverage is the range on reads, in point of view of "main piece" strand
unsigned short minor_coverage_start;
@@ -255,6 +394,50 @@ typedef struct
} subjunc_result_t;
+// THREE_TOP_UPDATE dictates the number of top-votes.
+
+typedef struct {
+ int is_vote_t_item;
+ int item_index_i;
+ int item_index_j;
+ unsigned int mapping_position;
+ int major_half_votes;
+}simple_mapping_t;
+
+typedef struct{
+ simple_mapping_t * r1_loc;
+ simple_mapping_t * r2_loc;
+
+ unsigned long long score_adj;
+} vote_combination_t;
+
+typedef struct{
+ unsigned long long maxinum_read_number;
+ int result_chunk_fd;
+ int junction_chunk_fd;
+ int big_margin_chunk_fd;
+
+ mapping_result_t * result_chunk_addr;
+ subjunc_result_t * junction_chunk_addr;
+ unsigned short * big_margin_chunk_addr;
+
+ subread_lock_t resize_lock;
+} bigtable_t;
+
+
+// this cached item is for an entire read (not read pair, not one best result)
+typedef struct {
+ int status;
+ subread_read_number_t pair_number;
+ int is_second_read;
+
+ unsigned short big_margin_data[9*3];
+ mapping_result_t * alignment_res;
+ subjunc_result_t * subjunc_res;
+
+} bigtable_cached_result_t;
+
+
typedef struct{
int thread_id;
pthread_t thread;
@@ -263,13 +446,12 @@ typedef struct{
// modules functions may deside to use objects in which context.
void * module_thread_contexts[5];
gene_value_index_t * current_value_index;
- unsigned int processed_reads_in_chunk;
- // per chunk parameters
- gene_input_t * ginp1;
- gene_input_t * ginp2;
- unsigned int reads_to_be_done;
- unsigned int read_block_start;
+ output_fragment_buffer_t * output_buffer;
+ int output_buffer_item;
+ int output_buffer_pointer;
+ int is_finished;
+ subread_lock_t output_lock;
} thread_context_t;
@@ -285,6 +467,7 @@ typedef struct{
int current_index_block_number;
int will_remove_input_file;
int is_phred_warning;
+ int is_final_voting_run;
// global locks
subread_lock_t thread_initial_lock;
@@ -293,18 +476,34 @@ typedef struct{
SamBam_Writer * output_bam_writer;
FILE * output_sam_fp;
FILE * long_insertion_FASTA_fp;
+ char * output_sam_inner_buffer;
// running contexts
void * module_contexts[5];
+ thread_context_t * all_thread_contexts;
+ int last_written_fragment_number;
+ int need_merge_buffer_now;
read_input_t input_reads;
- alignment_result_t * chunk_alignment_records; // arrangement: PE:: array_offset = ( read_pair_no * 2 + is_second_read ) * best_read_number + best_read_id
- // arrangement: SE:: array_offset = read_pair_no * best_read_number + best_read_id
- subjunc_result_t * chunk_subjunc_records; // arrangement: PE:: array_offset = ( read_pair_no * 2 + is_second_read ) * best_read_number + best_read_id
- // arrangement: SE:: array_offset = read_pair_no * best_read_number + best_read_id
- unsigned char * big_margin_record;
+ bigtable_t bigtable;
+
+
+ subread_lock_t bigtable_lock;
+ subread_lock_t output_lock;
+ int bigtable_cache_size;
+ FILE * bigtable_cache_file_fp;
+ long long bigtable_cache_file_loaded_fragments_begin;
+ long long bigtable_cache_file_fragments;
+ bigtable_cached_result_t * bigtable_cache;
+ unsigned int bigtable_chunked_fragments;
+ int bigtable_dirty_data;
+
gene_offset_t chromosome_table;
double start_time;
double align_start_time;
+ double timecost_load_index;
+ double timecost_voting;
+ double timecost_before_realign;
+ double timecost_for_realign;
unsigned long long all_processed_reads;
unsigned long long all_mapped_reads;
@@ -313,15 +512,29 @@ typedef struct{
unsigned int all_fusions;
unsigned int all_indels;
- unsigned long long current_circle_start_position_file1;
- unsigned long long current_circle_start_position_file2;
- unsigned long long current_circle_end_position_file1;
- unsigned long long current_circle_end_position_file2;
- unsigned int processed_reads_in_chunk;
+ unsigned long long current_circle_start_abs_offset_file1;
+ gene_inputfile_position_t current_circle_start_position_file1;
+ gene_inputfile_position_t current_circle_start_position_file2;
+ gene_inputfile_position_t current_circle_end_position_file1;
+ gene_inputfile_position_t current_circle_end_position_file2;
+ subread_read_number_t processed_reads_in_chunk;
+ subread_read_number_t running_processed_reads_in_chunk;
+
+ // sunfusion structural variance
+ bucketed_table_t funky_table_BC;
+ bucketed_table_t funky_table_DE;
+ fragment_list_t funky_list_A;
+ fragment_list_t funky_list_DE;
+
+ bucketed_table_t breakpoint_table_P;
+ bucketed_table_t breakpoint_table_QR;
+ bucketed_table_t breakpoint_table_YZ;
+
+ bucketed_table_t translocation_result_table;
+ bucketed_table_t inversion_result_table;
// per chunk parameters
- unsigned int reads_to_be_done;
- unsigned int read_block_start;
+ subread_read_number_t read_block_start;
} global_context_t;
@@ -381,7 +594,7 @@ int cigar2bincigar(char *cigar, char *bincigar, int bincigar_len);
void print_subread_logo();
// print a line in the box
-void print_in_box(int line_width, int is_boundary, int is_center, char * pattern,...);
+void print_in_box(int line_width, int is_boundary, int options, char * pattern,...);
// find the value index covering this read
// it returns NULL if no index is found.
@@ -392,10 +605,13 @@ void char_strftime(char * tbuf);
int term_strncpy(char * dst, char * src, int max_dst_memory);
-int is_result_in_PE(alignment_result_t * aln);
+int is_result_in_PE(mapping_result_t * aln);
void core_version_number(char * program);
+mapping_result_t * _global_retrieve_alignment_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id);
+subjunc_result_t * _global_retrieve_subjunc_ptr(global_context_t * global_context, subread_read_number_t pair_number, int is_second_read, int best_read_id);
+unsigned short * _global_retrieve_big_margin_ptr(global_context_t * global_context, subread_read_number_t pair_number, subread_read_number_t is_second_read);
// This assumes the first part of Cigar has differet strandness to the main part of the cigar.
// Pos is the LAST WANTED BASE location before the first strand jump (split by 'b' or 'n').
@@ -410,4 +626,10 @@ void quick_sort(void * arr,int arr_size, int compare (void * arr, int l, int r),
// L_Minus_R should return -1, 0 or 1 when L<R, L==R or L>R.
// The result is from Small to Large.
void merge_sort(void * arr, int arr_size, int L_Minus_R (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge_SmallFirst(void * arr, int start, int items, int items2));
+
+void absoffset_to_posstr(global_context_t * global_context, unsigned int pos, char * res);
+
+void test_PE_and_same_chro(global_context_t * global_context , unsigned int pos1, unsigned int pos2, int * is_PE_distance, int * is_same_chromosome , int rlen1, int rlen2);
+
+int FIXLENstrcmp(char * fixed_len, char * rname);
#endif
diff --git a/src/coverage_calc.c b/src/coverage_calc.c
index fbee345..6d47cb4 100644
--- a/src/coverage_calc.c
+++ b/src/coverage_calc.c
@@ -28,9 +28,21 @@ static struct option cov_calc_long_options[] =
void calcCount_usage()
{
- SUBREADprintf("\ncoverageCount v%s\n\n", SUBREAD_VERSION);
- SUBREADprintf("This utility program counts the coverage of mapped reads at each location on the entire reference genome. It generates a number of binary files, each corresponding to a chromosome that is listed on the header of the input SAM or BAM file. Each of the binary file consists of many 4-byte integers (little-endian order), indicating the number of reads spanning each location on the corresponded chromosome; the file offset in bytes is calculated by the chromosomal location (zer [...]
- SUBREADprintf("./coverageCount -i <sam or bam file> -o <output_prefix>\n\n");
+ SUBREADprintf("\ncoverageCount Version %s\n\n", SUBREAD_VERSION);
+ SUBREADputs(" This program calculates the coverage of mapped reads at each location on");
+ SUBREADputs("the reference genome. It generates a binary file for each chromosome by concate-");
+ SUBREADputs("nating the coverage levels as 4-bytes integer numbers.");
+ SUBREADputs("");
+ SUBREADputs("Usage");
+ SUBREADputs("");
+ SUBREADputs(" ./coverageCount -i <input_file> -o <output_prefix>");
+ SUBREADputs("");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -i <string> Name of input file in SAM or BAM format.");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Prefix of the output files. Each output file contains Four-byte");
+ SUBREADputs(" integer numbers");
SUBREADputs("");
}
@@ -127,7 +139,8 @@ int covCalc()
}
else
{
- int Staring_Points[FC_CIGAR_PARSER_ITEMS];
+ char * Chros[FC_CIGAR_PARSER_ITEMS];
+ unsigned int Staring_Points[FC_CIGAR_PARSER_ITEMS];
unsigned short Staring_Read_Points[FC_CIGAR_PARSER_ITEMS];
unsigned short Section_Lengths[FC_CIGAR_PARSER_ITEMS];
@@ -150,7 +163,7 @@ int covCalc()
coverage_bin_entry_t * chrbin = (coverage_bin_entry_t*) bin_entry[0];
unsigned int chrlen = (void *)( bin_entry[0]) - NULL;
- int cigar_sections = RSubread_parse_CIGAR_string(cigar_str, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
+ int cigar_sections = RSubread_parse_CIGAR_string(chro, pos, cigar_str, Chros, Staring_Points, Staring_Read_Points, Section_Lengths, &is_junc);
for(x1 = 0; x1 < cigar_sections; x1++)
{
unsigned int x2;
@@ -159,7 +172,7 @@ int covCalc()
{
if(pos+x2 < chrlen)
{
- if(chrbin[x2 + pos] <= COVERAGE_MAX_INT)chrbin[x2 + pos] ++;
+ if(chrbin[x2] <= COVERAGE_MAX_INT)chrbin[x2] ++;
all_counted ++;
if(all_counted % 10000000 == 0)
{
diff --git a/src/del4-mmap-test.c b/src/del4-mmap-test.c
new file mode 100644
index 0000000..18cab20
--- /dev/null
+++ b/src/del4-mmap-test.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+
+
+#define MEGA 1024llu*1024
+
+int main(){
+ int fd = open("/usr/local/work/liao/arena/del4.mem", O_TRUNC | O_CREAT|O_WRONLY , 0600);
+ long long int x;
+
+ for(x=0; x<100*MEGA; x++){
+ write(fd, &x, 4);
+ }
+
+ close(fd);
+
+ fd = open("/usr/local/work/liao/arena/del4.mem", O_RDWR);
+
+ void * fd_ptr = mmap(NULL, 400*MEGA, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ assert(fd_ptr != MAP_FAILED);
+ printf("MEMPTR = %08X\n", (unsigned int)(fd_ptr));
+
+ int * int_ptr = (int *)fd_ptr;
+
+ for(x=0; x<100*MEGA; x+=456){
+ int myint = int_ptr[x];
+ //printf("MYI=%d\n", myint);
+ }
+ for(x=0; x<100*MEGA; x+=2){
+ int_ptr[x]=0x0a;
+ }
+
+ printf("MEMORY PREPARIED\n");
+ sleep(100);
+ munmap(fd_ptr, 400*MEGA);
+ close(fd);
+}
diff --git a/src/filterJunctionTable.c b/src/filterJunctionTable.c
index 95a32b7..78f6552 100644
--- a/src/filterJunctionTable.c
+++ b/src/filterJunctionTable.c
@@ -143,11 +143,12 @@ int main(int argc, char ** argv)
char chro[20];
unsigned int pos=0;
char cigar[50];
- unsigned int start_points[6];
- unsigned short section_lengths[6];
+ unsigned int start_points[FC_CIGAR_PARSER_ITEMS];
+ unsigned int start_read_points[FC_CIGAR_PARSER_ITEMS];
+ unsigned short section_lengths[FC_CIGAR_PARSER_ITEMS];
parse_line(ii?new_line2:new_line, &flag, chro, &pos, cigar);
- int sections = RSubread_parse_CIGAR_string(cigar, start_points, section_lengths);
+ int sections = RSubread_parse_CIGAR_string(cigar, start_points, start_read_points, section_lengths);
for(jj=0; jj<sections-1; jj++)
{
unsigned int edge1 = start_points[jj] + pos + section_lengths[jj];
diff --git a/src/fullscan.c b/src/fullscan.c
index 82882e1..1c0c9ce 100644
--- a/src/fullscan.c
+++ b/src/fullscan.c
@@ -30,14 +30,30 @@
float MIN_REPORTING_RATIO = 0.8;
gene_offset_t _global_offsets;
unsigned int SCAN_TOTAL_BASES=0;
+char * only_chro = NULL;
void fullscan_usage()
{
- SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION);
- SUBREADputs("This program scans the entire genome and reports all matches of a specified sequence.\n");
- SUBREADputs("Usage:\n");
- SUBREADputs(" ./subread-fullscan -i <index_name> {-m <report_threshold>} <ATGC_read_string>\n");
+ SUBREADprintf("\nsubread-fullscan Version %s\n\n", SUBREAD_VERSION);
+ SUBREADputs(" This program scans the entire genome to find all high-similarity locations to");
+ SUBREADputs("a specified read.");
+ SUBREADputs("");
+ SUBREADputs("Usage:");
+ SUBREADputs("");
+ SUBREADputs(" ./subread-fullscan [options] -i <index_name> <read_string>");
+ SUBREADputs("");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -i <string> Base name of the index.");
+ SUBREADputs("");
+ SUBREADputs(" read_string The read bases.");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -m <float> The minimum fraction of matched bases in the read, 0.8 by default");
+ SUBREADputs("");
+
}
void report_pos(unsigned int pos)
@@ -115,6 +131,12 @@ void full_scan_read(char * index_name, char * read_str)
for(; current_pos + read_len < index.start_point + index.length ; current_pos++)
{
+ if(only_chro){
+ char * chro_name;
+ unsigned int chro_pos;
+ locate_gene_position(current_pos, &_global_offsets, &chro_name, &chro_pos);
+ if(strcmp(chro_name, only_chro)!=0)continue;
+ }
scan_test_match(read_str, read_rev_str, chro_str, read_len, current_pos);
char nch = gvindex_get(&index, current_pos + read_len);
int i;
@@ -140,7 +162,7 @@ int main (int argc , char ** argv)
index_name[0]=0;
- while ((c = getopt (argc, argv, "i:m:?")) != -1)
+ while ((c = getopt (argc, argv, "i:m:c:?")) != -1)
switch(c)
{
case 'i':
@@ -149,6 +171,9 @@ int main (int argc , char ** argv)
case 'm':
MIN_REPORTING_RATIO = atof(optarg);
break;
+ case 'c':
+ only_chro = optarg;
+ break;
case '?':
return -1 ;
}
diff --git a/src/gene-algorithms.c b/src/gene-algorithms.c
index dbe351b..633d84a 100644
--- a/src/gene-algorithms.c
+++ b/src/gene-algorithms.c
@@ -60,6 +60,14 @@ void subread_lock_occupy(subread_lock_t * lock)
#endif
}
+void subread_destroy_lock(subread_lock_t * lock) {
+ #ifdef MACOS
+ pthread_mutex_destroy(lock);
+ #else
+ pthread_spin_destroy(lock);
+ #endif
+}
+
void subread_init_lock(subread_lock_t * lock)
{
#ifdef MACOS
@@ -1162,13 +1170,14 @@ int match_chro_indel(char * read, gene_value_index_t * index, unsigned int pos,
{
int section_last_subread = indel_recorder[tali*3+1] - 1;
int section_offset = indel_recorder[tali*3+2];
- int this_section_end = find_subread_end(test_len , total_subreads, section_last_subread)+6;
+ int this_section_end = find_subread_end(test_len , total_subreads, section_last_subread);
if(!indel_recorder[tali*3+3]) this_section_end = test_len;
this_section_end = min(test_len, this_section_end);
this_section_end = max(this_section_end, last_section_end);
- ret += match_chro(read + last_section_end - min(0, section_offset), index, pos + last_section_end + max(0, section_offset), this_section_end - last_section_end + min(0, section_offset), is_negative_strand, space_type);
+ ret += match_chro(read + last_section_end - min(0, section_offset), index, pos + last_section_end + max(0, section_offset), this_section_end - last_section_end + min(0, section_offset), is_negative_strand * 0, space_type);
+
last_section_end = this_section_end;
last_section_indels = section_offset;
}
@@ -2126,7 +2135,7 @@ int extend_covered_region(gene_value_index_t *array_index, unsigned int read_sta
}
-float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int phred_version, int * high_qual_unmatch, int * all_mismatched, int ql_kill)
+float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int phred_version, int * high_qual_unmatch, int * all_mismatched, int ql_kill, int head_clipped, int tail_clipped)
{
int i;
int ret =0;
@@ -2134,10 +2143,10 @@ float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, u
if(pos < array_index -> start_base_offset || pos + read_len >= array_index -> start_base_offset + array_index -> length){
//SUBREADprintf("WARNING: BASE INDEX OUT OF LIMIT: %u < %u < %u\n%s\n", array_index -> start_base_offset , pos, array_index -> start_base_offset + array_index -> length, read_txt);
// exit(-1);
- return 100;
+ return (read_len - tail_clipped - head_clipped);
}
lastch = gvindex_get(array_index, pos);
- for(i=0; i<read_len; i++)
+ for(i=head_clipped; i<read_len - tail_clipped; i++)
{
char nch = gvindex_get(array_index, pos+i+1);
int is_matched = read_txt[i] == '0'+chars2color(lastch, nch);
@@ -2155,16 +2164,16 @@ float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, u
return ret*1.;
}
-float match_base_quality(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int is_negative, int phred_version, int * high_qual_unmatch, int * all_mismatched, int ql_kill)
+float match_base_quality(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int is_negative, int phred_version, int * high_qual_unmatch, int * all_mismatched, int ql_kill, int head_clipped, int tail_clipped)
{
int i;
int ret =0;
if(pos < array_index -> start_base_offset || pos + read_len >= array_index -> start_base_offset + array_index -> length){
//SUBREADprintf("WARNING: BASE INDEX OUT OF LIMIT: %u < %u < %u\n%s\n", array_index -> start_base_offset , pos, array_index -> start_base_offset + array_index -> length, read_txt);
// exit(-1);
- return 100;
+ return (read_len - tail_clipped - head_clipped);
}
- for(i=0; i<read_len; i++)
+ for(i=head_clipped; i<read_len - tail_clipped; i++)
{
char true_chr;
if(is_negative)
@@ -2178,7 +2187,7 @@ float match_base_quality(gene_value_index_t *array_index, char * read_txt, unsi
else
true_chr = gvindex_get(array_index, pos + i);
- //printf("%c vs %c\n", true_chr , read_txt[i]);
+ //SUBREADprintf("%c vs %c\n", true_chr , read_txt[i]);
if (true_chr == read_txt[i])
{
@@ -2249,7 +2258,7 @@ float final_mapping_quality(gene_value_index_t *array_index, unsigned int pos, c
if(cigar_txt[cigar_cursor] == 'M' || cigar_txt[cigar_cursor] == 'S')
{
int all_MM=0;
- float nret = match_base_quality(array_index, read_txt + read_cursor, chromosome_cursor , (qual_txt && qual_txt[0])?qual_txt + read_cursor:NULL, x, 0, phred_version, mismatch, &all_MM, 200000);
+ float nret = match_base_quality(array_index, read_txt + read_cursor, chromosome_cursor , (qual_txt && qual_txt[0])?qual_txt + read_cursor:NULL, x, 0, phred_version, mismatch, &all_MM, 200000,0,0);
//printf ("%s: Q=%.6f; L=%d ; POS=%u\n", read_txt + read_cursor, nret, x, chromosome_cursor);
ret += (int)(nret*1000000);
@@ -2461,13 +2470,10 @@ void print_votes(gene_vote_t * vote, char *index_prefix)
int i,j;
char * chrname = NULL;
unsigned int chrpos = 0;
-
-
load_offsets (&offsets, index_prefix);
+ //locate_gene_position(vote -> max_position, &offsets, &chrname, &chrpos);
- locate_gene_position(vote -> max_position, &offsets, &chrname, &chrpos);
-
- SUBREADprintf("Max votes = %d , Position is %s,%u\n", vote->max_vote, chrname, chrpos );
+ SUBREADprintf(" ========== Max votes = %d ==========\n", vote->max_vote);// , Position is %s,%u\n", vote->max_vote, chrname, chrpos );
for (i=0; i<GENE_VOTE_TABLE_SIZE; i++)
for(j=0; j< vote->items[i]; j++)
{
diff --git a/src/gene-algorithms.h b/src/gene-algorithms.h
index 4b5dfa3..ffc238e 100644
--- a/src/gene-algorithms.h
+++ b/src/gene-algorithms.h
@@ -131,14 +131,16 @@ void bad_reverse_cigar(char * cigar);
void subread_lock_occupy(subread_lock_t * lock);
void subread_init_lock(subread_lock_t * lock);
+void subread_destroy_lock(subread_lock_t * lock);
void subread_lock_release(subread_lock_t * lock);
void remove_indel_neighbours(HashTable * indel_table);
-float match_base_quality(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int is_negative, int phred_version, int * high_qual_unmatch, int * all_MM, int ql_kill);
+float match_base_quality(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int is_negative, int phred_version, int * high_qual_unmatch, int * all_MM, int ql_kill, int head_clipped, int tail_clipped);
int match_chro_indel(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand, int space_type, int indel_size, gene_vote_number_t * indel_recorder, int total_subreads);
-float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int phred_version, int * high_qual_unmatch, int * all_MM, int ql_kill);
+float match_base_quality_cs(gene_value_index_t *array_index, char * read_txt, unsigned int pos, char * qual_txt, int read_len, int phred_version, int * high_qual_unmatch, int * all_MM, int ql_kill, int head_clipped, int tail_clipped);
void print_version_info();
int fc_strcmp_chro(const void * s1, const void * s2);
unsigned long fc_chro_hash(const void *key) ;
+
#endif
diff --git a/src/global-reassembly.c b/src/global-reassembly.c
index a96a434..2a429a3 100644
--- a/src/global-reassembly.c
+++ b/src/global-reassembly.c
@@ -64,6 +64,7 @@ static struct option GRA_long_options[] =
{"hugeMemory", no_argument, 0, 'H'},
{"tmpDir",required_argument, 0, 't'},
{"trimQuality", required_argument, 0, 'Q'},
+ {"ignorePairedNames", no_argument, 0, 'C'},
{0, 0, 0, 0}
};
@@ -111,6 +112,7 @@ typedef struct {
int maximum_mismatch_in20bp;
int reverse_unmapped_reads;
int phred_offset;
+ int check_read_names;
unsigned int maximum_contig_length;
unsigned int minimum_contig_length;
@@ -276,18 +278,22 @@ void GRA_init_context(GRA_global_context_t * global_context)
global_context -> total_threads = 1;
- global_context -> maximum_mismatch_in20bp = 0;
+ //#warning "================ REMOVE ' + 1' FROM THE NEXT LINE ===================="
+ global_context -> maximum_mismatch_in20bp = 0 + 1;
global_context -> phred_offset = 33;
- global_context -> read_trim_base_quality = 30 + 33;
+ global_context -> read_trim_base_quality = 30 + global_context -> phred_offset;
- global_context -> subread_length = 16;
+ //#warning "================ REMOVE ' - 3' FROM THE NEXT LINE ===================="
+ global_context -> subread_length = 16 - 3;
global_context -> subread_extract_step = 5;
- global_context -> min_overlap_votes = 4;
+ //#warning "================ REMOVE ' - 2' FROM THE NEXT LINE ===================="
+ global_context -> min_overlap_votes = 4 - 2;
- global_context -> min_extension_votes = 3;
+ //#warning "================ REMOVE ' - 1' FROM THE NEXT LINE ===================="
+ global_context -> min_extension_votes = 3 - 1;
global_context -> contig_str = NULL;
global_context -> start_time = miltime();
global_context -> maximum_contig_length = 50000000;
@@ -295,13 +301,13 @@ void GRA_init_context(GRA_global_context_t * global_context)
global_context -> minimum_contig_length = 251;
global_context -> reverse_unmapped_reads = 0;
+ global_context -> check_read_names = 1;
global_context -> debug_flags = 0*GRA_DEBUG_ASSEMBLING;
}
-
int GRA_scan_best_overlap(GRA_global_context_t * global_context, char * piece, char * read, int rlen)
{
int xk1;
@@ -318,6 +324,8 @@ int GRA_scan_best_overlap(GRA_global_context_t * global_context, char * piece, c
return best_loc;
}
+
+
// r1 is a zero-terminating string
// r2 is not a zero-terminating string.
// r1 and r2 are on opposite strands.
@@ -458,7 +466,10 @@ int GRA_trim_reads(GRA_global_context_t * global_context)
if(in_flags & SAM_FLAG_SECOND_READ_IN_PAIR)
- assert(readname_hash == fc_chro_hash(in_readname));
+ {
+ if(global_context -> check_read_names)
+ assert(readname_hash == fc_chro_hash(in_readname));
+ }
else{
unsigned long new_readname_hash = fc_chro_hash(in_readname);
readname_hash = new_readname_hash;
@@ -1743,7 +1754,7 @@ int main(int argc, char ** argv)
}
char * tmptmp = malloc(320);
- while ((c = getopt_long (argc, argv, "2:t:Q:V:L:i:I:o:bHR6", GRA_long_options, &option_index)) != -1)
+ while ((c = getopt_long (argc, argv, "2:t:Q:V:L:i:I:o:bHCR6", GRA_long_options, &option_index)) != -1)
switch(c)
{
case '2':
@@ -1791,6 +1802,9 @@ int main(int argc, char ** argv)
case 'Q':
tmp_trim_qual = atoi(optarg);
break;
+ case 'C':
+ global_context -> check_read_names = 0;
+ break;
default:
GRA_print_usage();
diff --git a/src/hashtable.c b/src/hashtable.c
index 53cbd9f..d1573d6 100644
--- a/src/hashtable.c
+++ b/src/hashtable.c
@@ -6,7 +6,7 @@
* Released to the public domain.
*
*--------------------------------------------------------------------------
- * $Id: hashtable.c,v 9999.10 2014/03/04 23:53:25 cvs Exp $
+ * $Id: hashtable.c,v 9999.11 2015/01/25 21:32:56 cvs Exp $
\*--------------------------------------------------------------------------*/
#include <stdio.h>
@@ -742,3 +742,16 @@ void free_values_destroy(HashTable * tab)
HashTableDestroy(tab);
}
+
+void HashTableIteration(HashTable * tab, void process_item(void * hashed_obj, HashTable * tab) )
+{
+ int i;
+ for (i=0; i< tab ->numOfBuckets; i++) {
+ KeyValuePair *pair = tab ->bucketArray[i];
+ while (pair != NULL) {
+ process_item(pair -> value, tab);
+ KeyValuePair *nextPair = pair->next;
+ pair = nextPair;
+ }
+ }
+}
diff --git a/src/hashtable.h b/src/hashtable.h
index 8d66035..7bb475d 100644
--- a/src/hashtable.h
+++ b/src/hashtable.h
@@ -6,7 +6,7 @@
* Released to the public domain.
*
*--------------------------------------------------------------------------
- * $Id: hashtable.h,v 9999.6 2013/06/20 07:26:26 cvs Exp $
+ * $Id: hashtable.h,v 9999.7 2015/01/25 21:32:56 cvs Exp $
\*--------------------------------------------------------------------------*/
#ifndef _HASHTABLE_H
@@ -40,6 +40,8 @@ typedef struct {
long long int counter3;
} HashTable;
+void HashTableIteration(HashTable * tab, void process_item(void * hashed_obj, HashTable * tab) );
+
/*--------------------------------------------------------------------------*\
* NAME:
* HashTableCreate() - creates a new HashTable
diff --git a/src/index-builder.c b/src/index-builder.c
index d21eb7e..2fd6fdf 100644
--- a/src/index-builder.c
+++ b/src/index-builder.c
@@ -41,7 +41,6 @@
int GENE_SLIDING_STEP = 3;
int IS_COLOR_SPACE = 0;
int VALUE_ARRAY_INDEX = 1;
-int QUICK_BUILD = 0;
int MARK_NONINFORMATIVE_SUBREADS = 0;
int IS_FORCED_ONE_BLOCK = 0;
@@ -958,7 +957,7 @@ void SIGINT_hook(int param)
if(tmp_file_for_signal[0])
{
unlink(tmp_file_for_signal);
- SUBREADprintf("\n\nReceived a terminal signal. The temporary file was removed. The index was NOT built sucessfully. Please DO NOT use the new index until they are rebuilt.\n\n");
+ SUBREADprintf("\n\nReceived a terminal signal. The temporary file was removed. The index was NOT built successfully. Please DO NOT use the new index until they are rebuilt.\n\n");
}
exit(param);
@@ -975,7 +974,7 @@ int main(int argc,char ** argv)
int main_buildindex(int argc,char ** argv)
#endif
{
- int threshold = 24, optindex=0;
+ int threshold = 100, optindex=0;
int memory_limit; // 8000 MBytes
char output_file[300], c, tmp_fa_file[300], log_file_name[300];
char *ptr_tmp_fa_file[1];
@@ -997,11 +996,11 @@ int main_buildindex(int argc,char ** argv)
optind = 0;
- while ((c = getopt_long (argc, argv, "kvcqBFM:o:f:D?", ib_long_options, &optindex)) != -1)
+ while ((c = getopt_long (argc, argv, "kvcBFM:o:f:D?", ib_long_options, &optindex)) != -1)
switch(c)
{
case 'B':
- IS_FORCED_ONE_BLOCK = 1;
+ IS_FORCED_ONE_BLOCK =1;
break;
case 'F':
GENE_SLIDING_STEP =1;
@@ -1009,13 +1008,11 @@ int main_buildindex(int argc,char ** argv)
case 'v':
core_version_number("Subread-buildindex");
return 0;
- case 'q':
- QUICK_BUILD = 1;
- break;
case 'c':
IS_COLOR_SPACE = 1;
break;
case 'M':
+ IS_FORCED_ONE_BLOCK = 0;
memory_limit = atoi(optarg);
break;
case 'f':
@@ -1040,6 +1037,7 @@ int main_buildindex(int argc,char ** argv)
{
SUBREADprintf("Version %s\n\n", SUBREAD_VERSION);
+ /*
SUBREADputs("Usage:");
SUBREADputs("");
SUBREADputs(" ./subread-buildindex [options] -o <basename> {FASTA file1} [FASTA file2] ...");
@@ -1050,18 +1048,9 @@ int main_buildindex(int argc,char ** argv)
SUBREADputs("");
SUBREADputs("Optional arguments:");
SUBREADputs("");
- SUBREADputs(" -F build a full index for the reference genome. 16bp subreads");
- SUBREADputs(" will be extracted from every position of the reference");
- SUBREADputs(" genome. Size of the index is typically 3 times the size of");
- SUBREADputs(" index built from using the default setting.");
- SUBREADputs("");
- SUBREADputs(" -B create one block of index. The built index will not be split");
- SUBREADputs(" into multiple pieces. This makes the largest amount of");
- SUBREADputs(" memory be requested when running alignments, but it enables");
- SUBREADputs(" the maximum mapping speed to be achieved. This option");
- SUBREADputs(" overrides -M when it is provided as well.");
+ SUBREADputs(" -G build a gapped index for the reference genome. 16bp subreads");
SUBREADputs("");
- SUBREADputs(" -M <int> size of requested memory(RAM) in megabytes, 8000 by default.");
+ SUBREADputs(" -M <int> size of requested memory(RAM) in megabytes. Index is split into blocks if necessary.");
SUBREADputs("");
SUBREADputs(" -f <int> specify the threshold for removing uninformative subreads");
SUBREADputs(" (highly repetitive 16mers in the reference). 24 by default.");
@@ -1071,6 +1060,39 @@ int main_buildindex(int argc,char ** argv)
SUBREADputs(" -v output version of the program.");
SUBREADputs("");
SUBREADputs("For more information about these arguments, please refer to the User Manual.\n");
+ */
+ SUBREADputs("Usage:");
+ SUBREADputs("");
+ SUBREADputs(" ./subread-buildindex [options] -o <basename> {FASTA file1} [FASTA file2] ...");
+ SUBREADputs("");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -o <basename> base name of the index to be created");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -F build a full index for the reference genome. 16bp subreads");
+ SUBREADputs(" will be extracted from every position of the reference");
+ SUBREADputs(" genome. Size of the index is typically 3 times the size of");
+ SUBREADputs(" index built from using the default setting.");
+ SUBREADputs("");
+ SUBREADputs(" -B create one block of index. The built index will not be split");
+ SUBREADputs(" into multiple pieces. This makes the largest amount of");
+ SUBREADputs(" memory be requested when running alignments, but it enables");
+ SUBREADputs(" the maximum mapping speed to be achieved. This option");
+ SUBREADputs(" overrides -M when it is provided as well.");
+ SUBREADputs("");
+ SUBREADputs(" -M <int> size of requested memory(RAM) in megabytes, 8000 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -f <int> specify the threshold for removing uninformative subreads");
+ SUBREADputs(" (highly repetitive 16mers in the reference). 100 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -c build a color-space index.");
+ SUBREADputs("");
+ SUBREADputs(" -v output version of the program.");
+ SUBREADputs("");
+ SUBREADputs("For more information about these arguments, please refer to the User Manual.\n");
+
return -1 ;
}
@@ -1108,7 +1130,15 @@ int main_buildindex(int argc,char ** argv)
int x1;
for(x1=0;x1< argc - optind; x1++)
- print_in_box(94, 0, 0, " %c[32mo%c[36m %s%c[0m", CHAR_ESC, CHAR_ESC, *(argv+optind+x1) , CHAR_ESC);
+ {
+ char * fasta_fn = *(argv+optind+x1);
+ int f_type = probe_file_type_fast(fasta_fn);
+ char o_char = 'o';
+ if(f_type != FILE_TYPE_FASTA){
+ o_char = '?';
+ }
+ print_in_box(94, 0, 0, " %c[32m%c%c[36m %s%c[0m", CHAR_ESC, o_char, CHAR_ESC, fasta_fn , CHAR_ESC);
+ }
print_in_box(80, 0, 0, "");
print_in_box(80, 2, 1, "http://subread.sourceforge.net/");
SUBREADputs("");
@@ -1116,6 +1146,19 @@ int main_buildindex(int argc,char ** argv)
print_in_box(80, 1, 1, "Running");
print_in_box(80, 0, 0, "");
+ for(x1=0;x1< argc - optind; x1++)
+ {
+ char * fasta_fn = *(argv+optind+x1);
+ int f_type = probe_file_type_fast(fasta_fn);
+ if(f_type != FILE_TYPE_FASTA && f_type != FILE_TYPE_NONEXIST){
+ SUBREADprintf("WARNING: '%s' is not a FASTA file.\n", fasta_fn);
+ if(f_type == FILE_TYPE_GZIP_FASTA){
+ SUBREADprintf(" The index builder does not accept gzipped files. The outcome is undefined.\n");
+ }
+ }
+ }
+
+
begin_ftime = miltime();
diff --git a/src/input-files.c b/src/input-files.c
index 43e9ef7..695bc23 100644
--- a/src/input-files.c
+++ b/src/input-files.c
@@ -27,12 +27,18 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <unistd.h>
+#include <zlib.h>
#include <stdio.h>
#include <assert.h>
#include "input-files.h"
+#include "sambam-file.h"
+#include "hashtable.h"
+#include "seek-zlib.h"
#include "gene-algorithms.h"
#include "sublog.h"
+#define FAST_PICARD_BAM_PROCESSING 0
+
unsigned int BASE_BLOCK_LENGTH = 15000000;
int is_R_warnned = 0;
@@ -56,56 +62,74 @@ void fastq_64_to_33(char * qs)
double guess_reads_density(char * fname, int is_sam)
{
- return guess_reads_density_format(fname, is_sam, NULL, NULL);
+ return guess_reads_density_format(fname, is_sam, NULL, NULL, NULL);
+}
+
+unsigned long long geinput_file_offset( gene_input_t * input){
+ if(input -> file_type == GENE_INPUT_GZIP_FASTQ){
+
+ return ((seekable_zfile_t*)input -> input_fp) -> block_start_in_file_offset + ((seekable_zfile_t*)input -> input_fp) ->in_block_offset * 5/16; // compressed text ~= plain text * 28%
+ }else{
+ return ftello((FILE*)input ->input_fp);
+ }
}
-double guess_reads_density_format(char * fname, int is_sam, int * min_phred_score, int * max_phred_score)
+
+double guess_reads_density_format(char * fname, int is_sam, int * min_phred_score, int * max_phred_score, int * tested_reads)
{
- gene_input_t ginp;
+ gene_input_t *ginp = malloc(sizeof(gene_input_t));
long long int fpos =0, fpos2 = 0;
int i;
int max_qual_chr = -1, min_qual_chr = 127;
char buff[MAX_READ_LENGTH] , qbuf[MAX_READ_LENGTH];
+ float retv = 0;
+
if(is_sam == 0)
{
- if(geinput_open(fname, &ginp))return -1.0;
+ if(geinput_open(fname, ginp))retv= -1.0;
}else if(is_sam == 1)
{
- if(geinput_open_sam(fname, &ginp,0))return -1.0;
+ if(geinput_open_sam(fname, ginp,0))retv= -1.0;
}else if(is_sam == 2)
{
- if(geinput_open_sam(fname, &ginp,1))return -1.0;
+ if(geinput_open_sam(fname, ginp,1))retv= -1.0;
}
- geinput_next_read(&ginp, NULL, buff, NULL);
-
- fpos = ftello(ginp.input_fp);
+ if(retv > -0.1){
+ geinput_next_read(ginp, NULL, buff, NULL);
- for(i=0; i<1000; i++)
- {
- if(geinput_next_read(&ginp, NULL, buff, qbuf)<0) break;
- if(qbuf[0])
+ fpos = geinput_file_offset(ginp);
+ for(i=0; i<3000; i++)
{
- int xk=0;
- while(qbuf[xk])
+ if(geinput_next_read(ginp, NULL, buff, qbuf)<0) break;
+ if(qbuf[0])
{
- min_qual_chr = min(min_qual_chr,qbuf[xk]);
- max_qual_chr = max(max_qual_chr,qbuf[xk++]);
+ int xk=0;
+ while(qbuf[xk])
+ {
+ min_qual_chr = min(min_qual_chr,qbuf[xk]);
+ max_qual_chr = max(max_qual_chr,qbuf[xk++]);
+ }
}
+ if(tested_reads)
+ (*tested_reads) ++;
+
}
-
- }
- if(min_phred_score)
- {
- (*min_phred_score) = min_qual_chr;
- (*max_phred_score) = max_qual_chr;
+ if(min_phred_score)
+ {
+ (*min_phred_score) = min_qual_chr;
+ (*max_phred_score) = max_qual_chr;
- }
- fpos2 = ftello(ginp.input_fp) - fpos;
- geinput_close(&ginp);
+ }
+ fpos2 = geinput_file_offset(ginp) - fpos;
+ geinput_close(ginp);
+
+ retv= fpos2*1.0/i;
+ }
- return fpos2*1.0/i;
+ free(ginp);
+ return retv;
}
int is_gene_char(char c)
@@ -143,15 +167,23 @@ long long int guess_gene_bases(char ** files, int file_number)
return ret * 70 / 71;
}
+int geinput_getc(gene_input_t * input){
+ if(input -> file_type == GENE_INPUT_GZIP_FASTQ){
+ return seekgz_next_char((seekable_zfile_t*)input -> input_fp);
+ }else{
+ return fgetc((FILE*)input -> input_fp);
+ }
+}
+
-int read_line_noempty(int max_read_len, FILE * fp, char * buff, int must_upper)
+int read_line_noempty(int max_read_len, gene_input_t * input, char * buff, int must_upper)
{
int ret =0;
if(must_upper)
{
while(1)
{
- char ch = fgetc(fp);
+ char ch = geinput_getc(input);
#ifdef WINDOWS
if(ch == '\r') continue;
#endif
@@ -168,7 +200,7 @@ int read_line_noempty(int max_read_len, FILE * fp, char * buff, int must_upper)
{
while(1)
{
- char ch = fgetc(fp);
+ char ch = geinput_getc(input);
#ifdef WINDOWS
if(ch == '\r') continue;
#endif
@@ -177,7 +209,7 @@ int read_line_noempty(int max_read_len, FILE * fp, char * buff, int must_upper)
if(ret)
break;
}
- else buff[ret++] = ch;
+ else if(ret < max_read_len-1) buff[ret++] = ch;
}
}
@@ -269,7 +301,7 @@ int read_line_back(int max_read_len, FILE * fp, char * buff, int must_upper)
int geinput_readline(gene_input_t * input, char * buff, int conv_to_upper)
{
- return read_line(1200, input -> input_fp, buff, conv_to_upper);
+ return read_line(MAX_READ_LENGTH, input -> input_fp, buff, conv_to_upper);
}
int is_read(char * in_buff)
@@ -279,11 +311,13 @@ int is_read(char * in_buff)
int space_type = GENE_SPACE_BASE;
while((c=in_buff[p++])!='\0')
{
- int x = is_gene_char(c);
- if (x == GENE_SPACE_COLOR)
- space_type = GENE_SPACE_COLOR;
- else if(!x)
- return 0;
+ if(c!='\r' && c!='\n'){
+ int x = is_gene_char(c);
+ if (x == GENE_SPACE_COLOR)
+ space_type = GENE_SPACE_COLOR;
+ else if(!x)
+ return 0;
+ }
}
return space_type;
}
@@ -330,57 +364,88 @@ int geinput_open_sam(const char * filename, gene_input_t * input, int half_numbe
int geinput_open(const char * filename, gene_input_t * input)
{
- char in_buff[1201];
- int line_no = 0;
+ char in_buff[MAX_READ_LENGTH];
+ int line_no = 0, ret = 0;
if(strlen(filename)>298)
return 1;
strcpy(input->filename, filename);
- input->input_fp = f_subr_open(filename, "rb");
+ FILE * TMP_FP = f_subr_open(filename, "rb");
- if(input->input_fp == NULL)
+ if(TMP_FP == NULL)
return 1;
- while (1){
- long long int last_pos = ftello(input->input_fp);
- int rlen = read_line_noempty(1200, input->input_fp, in_buff, 0);
- if (rlen<=0)
- return 1;
-
- if(line_no==0 && is_read(in_buff))
- {
- input->file_type = GENE_INPUT_PLAIN;
- input->space_type = is_read(in_buff);
- fseek(input->input_fp,last_pos,SEEK_SET);
- break;
- }
- if(in_buff[0]=='>')
- {
- input->file_type = GENE_INPUT_FASTA;
- // printf("FILE %s OPENED AS FATSA.\n", filename);
- rlen += read_line(1200, input->input_fp, in_buff, 0);
- input->space_type = is_read(in_buff);
-
- fseek(input->input_fp,last_pos,SEEK_SET);
- break;
+ int id1, id2;
+ id1 = fgetc(TMP_FP);
+ id2 = fgetc(TMP_FP);
+
+ if(id1 == 31 && id2 == 139) {
+ fclose(TMP_FP);
+ input->input_fp = malloc(sizeof(seekable_zfile_t));
+ input->file_type = GENE_INPUT_GZIP_FASTQ;
+ ret = seekgz_open(filename, input->input_fp);
+ if(ret == 0){
+ int fq_stat = 0;
+ for(line_no = 0; line_no < 1000; line_no++){
+ int fl = seekgz_gets(input->input_fp, in_buff, 1000);
+ if(fl < 1)break; // EOF
+ else if(fl == 1)continue; // empty line
+ else{ // text line
+ if(fq_stat%4 == 1) // read text
+ {
+ input->space_type = is_read(in_buff);
+ break;
+ }
+ fq_stat ++;
+ }
+ }
+ seekgz_close(input->input_fp);
+ seekgz_open(filename, input->input_fp);
+ }
+ }else{
+ input->file_type = GENE_INPUT_FASTQ;
+ input->input_fp = TMP_FP;
+ fseek(input->input_fp, 0, SEEK_SET);
+ while (1){
+ long long int last_pos = ftello(input->input_fp);
+ int rlen = read_line_noempty(MAX_READ_LENGTH, input, in_buff, 0);
+ if (rlen<=0){
+ ret = 1;
+ break;
+ }else{
+ if(line_no==0 && is_read(in_buff))
+ {
+ input->file_type = GENE_INPUT_PLAIN;
+ input->space_type = is_read(in_buff);
+ fseek(input->input_fp,last_pos,SEEK_SET);
+ break;
+ }
+ if(in_buff[0]=='>')
+ {
+ input->file_type = GENE_INPUT_FASTA;
+ // printf("FILE %s OPENED AS FATSA.\n", filename);
+ rlen += read_line(MAX_READ_LENGTH, input->input_fp, in_buff, 0);
+ input->space_type = is_read(in_buff);
+
+ fseek(input->input_fp,last_pos,SEEK_SET);
+ break;
+ }
+ if(in_buff[0]=='@')
+ {
+ input->file_type = GENE_INPUT_FASTQ;
+ rlen += read_line_noempty(MAX_READ_LENGTH, input, in_buff, 0);
+ input->space_type = is_read(in_buff);
+ fseek(input->input_fp, last_pos,SEEK_SET);
+ break;
+ }
+ line_no++;
+ }
}
- if(in_buff[0]=='@')
- {
- input->file_type = GENE_INPUT_FASTQ;
- // printf("FILE %s OPENED AS FATSQ.\n", filename);
-
- rlen += read_line_noempty(1200, input->input_fp, in_buff, 0);
- input->space_type = is_read(in_buff);
-
-
- fseek(input->input_fp, last_pos,SEEK_SET);
- break;
- }
- line_no++;
}
+ input -> read_chunk_start = geinput_file_offset(input);
- input -> read_chunk_start = 0;
- return 0;
+ if(0 == input->space_type)input->space_type = GENE_SPACE_BASE;
+ return ret;
}
@@ -483,8 +548,8 @@ int geinput_readline_back(gene_input_t * input, char * linebuffer_3000)
return ret;
}
-#define SKIP_LINE { nch=' '; while(nch != EOF && nch != '\n') nch = fgetc(input->input_fp); }
-#define SKIP_LINE_NOEMPTY {int content_line_l = 0; nch=' '; while(nch != EOF && (nch != '\n' ||! content_line_l)){nch = fgetc(input->input_fp); content_line_l += (nch != '\n');} }
+#define SKIP_LINE { nch=' '; while(nch != EOF && nch != '\n') nch = geinput_getc(input); }
+#define SKIP_LINE_NOEMPTY {int content_line_l = 0; nch=' '; while(nch != EOF && (nch != '\n' ||! content_line_l)){nch = geinput_getc(input); content_line_l += (nch != '\n');} }
void geinput_jump_read(gene_input_t * input)
{
@@ -558,94 +623,20 @@ unsigned int read_numbers(gene_input_t * input)
return ret;
}
-int geinput_next_read_sam(gene_input_t * input, char * read_name, char * read_string, char * quality_string, gene_offset_t* offsets, unsigned int *pos, int * quality, int * flags, int need_reversed)
-{
- char in_buff [3001];
- int tabs ;
- int current_str_pos = 0;
- int i;
- int ret = -1;
- int in_sam_reverse = 0;
- int mapping_flags = 0;
- int mapping_quality = 0;
- char chro[MAX_CHROMOSOME_NAME_LEN];
- unsigned int chro_pos = 0;
-
-
- while(1)
- {
- int linelen = read_line(3000, input->input_fp, in_buff, 0);
- if(linelen <1)return -1;
- if(read_name)
- *read_name = 0;
- if(quality_string)
- *quality_string = 0;
- *read_string = 0;
- mapping_flags = 0;
- tabs=0;
- mapping_quality = 0;
- for(i=0; i<linelen+1; i++)
- {
- if(in_buff[i]=='\t'|| i ==linelen)
- {
- if(tabs == 0 && read_name)read_name[current_str_pos] = 0;
- if(tabs == 2)
- {
- chro[current_str_pos] = 0;
- }
- if(tabs == 1)
- {
- in_sam_reverse = (mapping_flags & 16 )?1:0;
- }
- if(tabs == 9){
- read_string[current_str_pos] = 0;
- ret = current_str_pos;
- }
- if(tabs == 10 && quality_string){
- quality_string[current_str_pos] = 0;
- break;
- }
-
- current_str_pos = 0;
- tabs +=1;
- }
- else
- {
- if(tabs == 9)// read
- read_string[current_str_pos++] = in_buff[i];
- else if(tabs == 10 && quality_string)// quality string
- quality_string[current_str_pos++] = in_buff[i];
- else if(tabs == 0 && read_name)// name
- read_name[current_str_pos++] = in_buff[i];
- else if(tabs == 1)
- mapping_flags = mapping_flags*10+(in_buff[i]-'0');
- else if(tabs == 2)
- chro[current_str_pos++] = in_buff[i];
- else if(tabs == 3)
- chro_pos = chro_pos*10+(in_buff[i]-'0');
- else if(tabs == 4)
- mapping_quality = mapping_quality*10+(in_buff[i]-'0');
- else if(tabs == 5)
- if(in_buff[i]=='S') mapping_quality = 0;
- }
- }
- if(0==(mapping_flags & SAM_FLAG_SECONDARY_MAPPING))
- break;
+void geinput_tell(gene_input_t * input, gene_inputfile_position_t * pos){
+ if(input -> file_type == GENE_INPUT_GZIP_FASTQ){
+ seekgz_tell(( seekable_zfile_t *)input -> input_fp, &pos -> seekable_gzip_position);
+ }else{
+ pos -> simple_file_position = ftello((FILE *)input -> input_fp);
}
- *quality = mapping_quality;
- *flags=mapping_flags;
- if(offsets)
- *pos= linear_gene_position(offsets , chro, chro_pos-1);
-
+}
- if(in_sam_reverse + need_reversed == 1)
- {
- if(quality_string)
- reverse_quality(quality_string, ret);
- reverse_read(read_string, ret, input->space_type);
+void geinput_seek(gene_input_t * input, gene_inputfile_position_t * pos){
+ if(input -> file_type == GENE_INPUT_GZIP_FASTQ){
+ seekgz_seek(( seekable_zfile_t *)input -> input_fp, &pos -> seekable_gzip_position);
+ }else{
+ fseeko((FILE *)input -> input_fp, pos -> simple_file_position, SEEK_SET);
}
- return ret;
-
}
int trim_read_inner(char * read_text, char * qual_text, int rlen, short t_5, short t_3)
@@ -683,24 +674,38 @@ int trim_read_inner(char * read_text, char * qual_text, int rlen, short t_5, sho
return max(0, rlen - t_5 - t_3);
}
+long long int tell_current_line_no(gene_input_t * input){
+ long long int fpos = ftello(input->input_fp);
+ fseeko(input->input_fp,0,SEEK_SET);
+ long long ret = 0, fscanpos = 0;
+ while(1)
+ {
+ char nch = fgetc(input->input_fp);
+ if(nch == EOF) return -1;
+ if(nch == '\n') ret ++;
+ fscanpos ++;
+ if(fscanpos >= fpos){
+ fseeko(input->input_fp, fpos, SEEK_SET);
+ return ret;
+ }
+ }
+}
+
int geinput_next_read(gene_input_t * input, char * read_name, char * read_string, char * quality_string)
{
return geinput_next_read_trim( input, read_name, read_string, quality_string, 0, 0, NULL);
}
int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_string, char * quality_string, short trim_5, short trim_3, int * is_secondary)
{
- if(input->file_type == GENE_INPUT_PLAIN)
- {
- int ret = read_line(1200, input->input_fp, read_string, 0);
+ if(input -> file_type == GENE_INPUT_PLAIN) {
+ int ret = read_line(MAX_READ_LENGTH, input->input_fp, read_string, 0);
if(quality_string) *quality_string=0;
if(ret <3)return -1;
if(trim_5 || trim_3) ret = trim_read_inner(read_string, NULL, ret, trim_5, trim_3);
return ret;
- }
- else if(input->file_type >= GENE_INPUT_SAM_SINGLE)
- {
+ } else if(input->file_type >= GENE_INPUT_SAM_SINGLE) {
char in_buff [3001];
int tabs;
int current_str_pos;
@@ -782,15 +787,13 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
}
if(trim_5 || trim_3) ret = trim_read_inner(read_string, quality_string, ret, trim_5, trim_3);
return ret;
- }
- else if(input->file_type == GENE_INPUT_FASTA)
- {
+ } else if(input->file_type == GENE_INPUT_FASTA) {
int ret;
if(quality_string) (*quality_string)=0;
while(1) // fetch read name
{
- ret = read_line(1200, input->input_fp, read_string, 0);
+ ret = read_line(MAX_READ_LENGTH, input->input_fp, read_string, 0);
if(ret <1)
{
sublog_printf(SUBLOG_STAGE_RELEASED,SUBLOG_LEVEL_DEBUG, "The input file normally exhausted.");
@@ -820,7 +823,7 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
while(1) // fetch read text
{
char nch;
- ret += read_line(1200-ret, input->input_fp, read_string+ret, 1);
+ ret += read_line(MAX_READ_LENGTH-ret, input->input_fp, read_string+ret, 1);
nch = fgetc(input->input_fp);
@@ -835,9 +838,7 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
if(trim_5 || trim_3) ret = trim_read_inner(read_string, quality_string, ret, trim_5, trim_3);
return ret;
- }
- else if(input->file_type == GENE_INPUT_FASTQ)
- {
+ } else if(input->file_type == GENE_INPUT_FASTQ || input->file_type == GENE_INPUT_GZIP_FASTQ) {
char nch;
int ret;
@@ -849,25 +850,19 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
}
else
{
- while(1)
- {
- nch = fgetc(input->input_fp);
- if(nch==EOF) return -1;
- #ifdef WINDOWS
- if(nch=='\r')
- {
- nch = fgetc(input->input_fp);
- if(nch==EOF) return -1;
- }
- #endif
- if(nch == '@') break;
- if(nch != '\n' && nch != '\r')
- {
- SKIP_LINE_NOEMPTY;
- }
+ do{
+ nch = geinput_getc(input);
+ } while (nch == '\n');
+ if(nch==EOF) return -1;
+
+ if(nch != '@') {
+ long long int lineno = tell_current_line_no(input);
+ SUBREADprintf("ERROR: a format issue %c is found on the %lld-th line in input file '%s'!\nProgram aborted!\n", nch, lineno, input -> filename);
+ return -1;
}
- read_line_noempty(MAX_READ_NAME_LEN, input->input_fp, read_name, 0);
+ read_line_noempty(MAX_READ_NAME_LEN, input, read_name, 0);
+
int cursor = 1;
while(read_name[cursor])
{
@@ -880,17 +875,27 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
}
}
// READ LINE
- ret = read_line_noempty(1200, input->input_fp, read_string, 1);
+ ret = read_line_noempty(MAX_READ_LENGTH, input, read_string, 1);
// SKIP "+"
- SKIP_LINE_NOEMPTY;
+ do{
+ nch = geinput_getc(input);
+ } while( nch == '\n' );
+ if(nch != '+'){
+ long long int lineno = tell_current_line_no(input);
+ SUBREADprintf("ERROR: a format issue %c is found on the %lld-th line in input file '%s'!\nProgram aborted!\n", nch, lineno, input -> filename);
+ return -1;
+ }
+ SKIP_LINE;
// QUAL LINE
if (quality_string)
- read_line_noempty(1200, input->input_fp, quality_string, 0);
+ read_line_noempty(MAX_READ_LENGTH, input, quality_string, 0);
else
SKIP_LINE_NOEMPTY;
+
+
#ifdef MODIFIED_READ_LEN
{
int modified_start = 0;
@@ -920,7 +925,10 @@ int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_s
}
void geinput_close(gene_input_t * input)
{
- fclose(input->input_fp);
+ if(input -> file_type == GENE_INPUT_GZIP_FASTQ)
+ seekgz_close((seekable_zfile_t * ) input->input_fp);
+ else
+ fclose((FILE*)input->input_fp);
}
char * __converting_char_table = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTNGNNNCNNNNNNNNNNNNAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTNGNNNCNNNNNNNNNNNNAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN ";
@@ -1053,6 +1061,7 @@ int genekey2int(char key [],int space_type)
if(space_type == GENE_SPACE_BASE)
for (i=0; i<16; i++)
{
+ //SUBREADprintf("KV=%c\n", key[i]);
//ret = ret << 2;
ret |= (base2int(key[i]))<<(2*(15-i));
}
@@ -1063,7 +1072,7 @@ int genekey2int(char key [],int space_type)
ret |= color2int (key[i]);
}
-// printf("RET=%u\n",ret);
+ //SUBREADprintf("RET=%u\n",ret);
return ret;
}
@@ -1376,7 +1385,7 @@ void write_read_block_file(FILE *temp_fp , unsigned int read_number, char *read_
datum.read_len = read_len;
datum.mapping_quality = mapping_quality;
- if(rl < 1|| rl > 1200)
+ if(rl < 1|| rl > MAX_READ_LENGTH)
{
SUBREADprintf("READ IS TOO LONG:%d\n", rl);
@@ -2036,19 +2045,25 @@ unsigned long long int sort_SAM_hash(char * str)
}
+void do_SIGINT_remove(char * prefix, int param);
char * _SAMSORT_SNP_delete_temp_prefix = NULL;
-void SAM_SORT_SIGINT_hook(int param)
-{
- #ifdef MAKE_STANDALONE
- int xk1, last_slash = -1;
- if(_SAMSORT_SNP_delete_temp_prefix != NULL)
+char * _REPAIRER_delete_temp_prefix = NULL;
+void SAM_SORT_SIGINT_hook(int param) {
+ do_SIGINT_remove(_SAMSORT_SNP_delete_temp_prefix, param);
+}
+void REPAIR_SIGINT_hook(int param) {
+ do_SIGINT_remove(_REPAIRER_delete_temp_prefix, param);
+}
+
+void delete_with_prefix(char * prefix){
+ if(prefix != NULL)
{
+ int xk1, last_slash = -1;
char del2[300], del_suffix[200], del_name[400];
- SUBREADprintf("\n\nReceived a terminal signal. The temporary files were removed.\n");
- for(xk1=0; _SAMSORT_SNP_delete_temp_prefix[xk1]; xk1++)
+ for(xk1=0; prefix[xk1]; xk1++)
{
- if(_SAMSORT_SNP_delete_temp_prefix[xk1]=='/') last_slash = xk1;
- else if(_SAMSORT_SNP_delete_temp_prefix[xk1]=='\\')
+ if(prefix[xk1]=='/') last_slash = xk1;
+ else if(prefix[xk1]=='\\')
{
SUBREADprintf("The file name is unknown.\n");
return;
@@ -2056,14 +2071,14 @@ void SAM_SORT_SIGINT_hook(int param)
}
if(last_slash>=0)
{
- memcpy(del2, _SAMSORT_SNP_delete_temp_prefix, last_slash);
+ memcpy(del2, prefix, last_slash);
del2[last_slash] = 0;
- strcpy(del_suffix , _SAMSORT_SNP_delete_temp_prefix + last_slash + 1);
+ strcpy(del_suffix , prefix + last_slash + 1);
}
else
{
strcpy(del2,".");
- strcpy(del_suffix , _SAMSORT_SNP_delete_temp_prefix);
+ strcpy(del_suffix , prefix);
}
if(strlen(del_suffix)>8)
@@ -2078,11 +2093,12 @@ void SAM_SORT_SIGINT_hook(int param)
{
if(strstr(dir->d_name, del_suffix))
{
- //printf("%s\n", dir->d_name);
+ //SUBREADprintf("DEL:%s\n", dir->d_name);
strcpy(del_name, del2);
strcat(del_name, "/");
strcat(del_name, dir->d_name);
unlink(del_name);
+ //test fix
}
}
}
@@ -2090,6 +2106,12 @@ void SAM_SORT_SIGINT_hook(int param)
}
+}
+
+void do_SIGINT_remove(char * prefix, int param) {
+ #ifdef MAKE_STANDALONE
+ delete_with_prefix(prefix);
+ SUBREADprintf("\n\nReceived a terminal signal. The temporary files were removed.\n");
exit(param);
#endif
}
@@ -2097,145 +2119,2370 @@ void SAM_SORT_SIGINT_hook(int param)
void * old_sig_TERM = NULL, * old_sig_INT = NULL;
-int sort_SAM_create(SAM_sort_writer * writer, char * output_file, char * tmp_path)
-{
- char tmp_fname[MAX_FILE_NAME_LENGTH+40];
- memset(writer, 0, sizeof(SAM_sort_writer));
-
- old_sig_TERM = signal (SIGTERM, SAM_SORT_SIGINT_hook);
- old_sig_INT = signal (SIGINT, SAM_SORT_SIGINT_hook);
-
- sprintf(writer -> tmp_path, "%s/temp-sort-%06u-%08X-", tmp_path, getpid(), rand());
- _SAMSORT_SNP_delete_temp_prefix = writer -> tmp_path;
-
- sprintf(tmp_fname, "%s%s", writer -> tmp_path, "headers.txt");
- writer -> all_chunks_header_fp = f_subr_open(tmp_fname,"w");
- if(!writer -> all_chunks_header_fp) return -1;
- fclose(writer -> all_chunks_header_fp);
- unlink(tmp_fname);
+#define PAIRER_GZIP_WINDOW_BITS -15
+#define PAIRER_DEFAULT_MEM_LEVEL 8
- writer -> out_fp = f_subr_open(output_file,"w");
- if(!writer -> out_fp) return -1;
+int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_threads , int has_dummy, int BAM_input, int c_level, char * out_file){
+ int x1;
+ memset(bam_main, 0, sizeof(SAM_pairer_writer_main_t));
+ bam_main -> bam_fp = f_subr_open(out_file, "wb");
+ if(NULL == bam_main -> bam_fp) return 1;
+ strcpy(bam_main -> bam_name, out_file);
+ bam_main -> threads = malloc(all_threads * sizeof(SAM_pairer_writer_thread_t));
+ bam_main -> all_threads = all_threads;
+ bam_main -> has_dummy = has_dummy;
+ bam_main -> compression_level = c_level;
+ subread_init_lock(&bam_main -> output_fp_lock);
+
+ for(x1 = 0; x1 < all_threads ; x1 ++){
+ bam_main -> threads[x1].BIN_buffer_ptr = 0;
+ bam_main -> threads[x1].strm.zalloc = Z_NULL;
+ bam_main -> threads[x1].strm.zfree = Z_NULL;
+ bam_main -> threads[x1].strm.opaque = Z_NULL;
+ bam_main -> threads[x1].strm.avail_in = 0;
+ bam_main -> threads[x1].strm.next_in = Z_NULL;
+
+ deflateInit2(&bam_main -> threads[x1].strm, bam_main -> compression_level, Z_DEFLATED,
+ PAIRER_GZIP_WINDOW_BITS, PAIRER_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ }
return 0;
}
-void find_tag_out(char * read_line_buf, char * tag, char * hi_tag_out)
+void SAM_pairer_write_BAM_header(FILE * writer, int compressed_size)
{
- int hi_tag = -1;
- char tag_str[10];
- sprintf(tag_str , "\t%s:i:", tag);
- char * hi_tag_str = strstr(read_line_buf, tag_str);
- if(hi_tag_str)
- {
+ // the four magic characters
+ fputc(31, writer);
+ fputc(139, writer);
+ fputc(8, writer);
+ fputc(4, writer);
- hi_tag = 0;
- int line_cursor;
- for(line_cursor=6; ; line_cursor++)
- {
- char nch = hi_tag_str[line_cursor];
-// printf("HI:i=%s; nch [%d] ='%c'\n", hi_tag_str, line_cursor, nch);
- if(!isdigit(nch)) break;
- hi_tag = hi_tag*10 + (nch-'0');
- }
- }
+ time_t time_now = 0;
+ fwrite(&time_now,4,1, writer);
- if(hi_tag >=0)
- {
- sprintf(hi_tag_out,"\t%s:i:%d", tag, hi_tag);
- }else hi_tag_out[0] = 0;
+ int tmp_i;
+ // Extra flags and OS
+ fputc(0, writer);
+ fputc(0xff, writer);
+ // Extra length
+ tmp_i = 6;
+ fwrite(&tmp_i,2,1, writer);
+
+ // SI1 and SI2 magic numbers, and SLEN
+ fputc(66, writer);
+ fputc(67, writer);
+ tmp_i = 2;
+ fwrite(&tmp_i,2,1, writer);
+ tmp_i = compressed_size + 19 + 6;
+ fwrite(&tmp_i,2,1, writer);
}
-void sort_SAM_finalise(SAM_sort_writer * writer)
+
+
+int SAM_pairer_multi_thread_compress(SAM_pairer_writer_main_t * bam_main , SAM_pairer_writer_thread_t * bam_thread)
{
- int x1_chunk, x1_block;
- int xk1;
- for(xk1=0;xk1<SAM_SORT_BLOCKS;xk1++)
- {
- if(writer -> current_block_fp_array[xk1])
- fclose(writer -> current_block_fp_array[xk1]);
+ #define BAM_compressed_space 65536
+ char * BAM_compressed = malloc(BAM_compressed_space);
+ int ret, have;
+ if(bam_thread -> BIN_buffer_ptr>0){
+ deflateReset(&bam_thread -> strm);
+ bam_thread -> strm.avail_in = bam_thread -> BIN_buffer_ptr;
+ bam_thread -> strm.next_in = bam_thread -> BIN_buffer;
+ bam_thread -> strm.avail_out = BAM_compressed_space;
+ bam_thread -> strm.next_out = (unsigned char *)BAM_compressed;
+ ret = deflate( &bam_thread -> strm , Z_FINISH);
+
+ have = BAM_compressed_space - bam_thread -> strm.avail_out;
+ assert(bam_thread -> strm.avail_in == 0);
+ }else{
+ z_stream nstrm;
+ nstrm.zalloc = Z_NULL;
+ nstrm.zfree = Z_NULL;
+ nstrm.opaque = Z_NULL;
+ nstrm.avail_in = 0;
+ nstrm.next_in = Z_NULL;
+
+ deflateInit2(&nstrm, SAMBAM_COMPRESS_LEVEL, Z_DEFLATED,
+ PAIRER_GZIP_WINDOW_BITS, PAIRER_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+
+ nstrm.avail_in = 0;
+ nstrm.next_in = bam_thread -> BIN_buffer;
+ nstrm.avail_out = BAM_compressed_space;
+ nstrm.next_out = (unsigned char *)BAM_compressed;
+ ret = deflate(&nstrm, Z_FINISH);
+ deflateEnd(&nstrm);
+ have = BAM_compressed_space - nstrm.avail_out;
}
- memset(writer -> current_block_fp_array, 0, sizeof(FILE *)*SAM_SORT_BLOCKS);
- writer -> current_chunk_size = 0;
- writer -> current_chunk++;
-
- for(x1_block = 0; x1_block <SAM_SORT_BLOCKS; x1_block++){
- HashTable * first_read_name_table;
- first_read_name_table = HashTableCreate(SAM_SORT_BLOCK_SIZE / 100 );
- HashTableSetKeyComparisonFunction(first_read_name_table , fc_strcmp_chro);
- HashTableSetDeallocationFunctions(first_read_name_table , free, free);
- HashTableSetHashFunction(first_read_name_table, HashTableStringHashFunction);
+ if(ret == Z_OK || 1){
- for(x1_chunk = 0; x1_chunk < writer -> current_chunk; x1_chunk++)
- {
- char tmpfname[MAX_FILE_NAME_LENGTH+40];
- sprintf(tmpfname, "%sCHK%08d-BLK%03d.bin", writer -> tmp_path, x1_chunk , x1_block);
+ //SUBREADprintf("Compress: %d -> %d %p\n", bam_thread -> BIN_buffer_ptr, have, bam_main -> bam_fp);
+ //if(bam_thread -> BIN_buffer_ptr == 0) have = 0;
+ unsigned int crc0 = crc32(0, NULL, 0);
+ unsigned int CRC32 = crc32(crc0, (unsigned char *) bam_thread -> BIN_buffer ,bam_thread -> BIN_buffer_ptr);
- FILE * bbfp = f_subr_open(tmpfname,"rb");
- if(!bbfp) continue;
- while(!feof(bbfp))
- {
- char * read_name = NULL;
- short flags;
- short read_name_len;
- short read_len;
- int ret = fread(&flags, 2,1 , bbfp);
- if(ret<1) break;
- fread(&read_name_len, 2,1 , bbfp);
- if(flags & SAM_FLAG_SECOND_READ_IN_PAIR)
- fseek(bbfp, read_name_len, SEEK_CUR);
- else
- {
- read_name = malloc(read_name_len+1);
- fread(read_name, 1, read_name_len, bbfp);
- read_name[read_name_len] = 0;
- }
- fread(&read_len,2,1,bbfp);
- if(flags & SAM_FLAG_SECOND_READ_IN_PAIR)
- fseek(bbfp, read_len, SEEK_CUR);
- else
- {
- char * new_line_mem = malloc(read_len+1);
- fread(new_line_mem, 1, read_len, bbfp);
- new_line_mem[read_len] = 0;
+ subread_lock_occupy( &bam_main -> output_fp_lock );
+ SAM_pairer_write_BAM_header( bam_main -> bam_fp , have);
+ fwrite(BAM_compressed,1, have, bam_main -> bam_fp );
+ fwrite(&CRC32 , 4, 1, bam_main -> bam_fp);
+ fwrite( &bam_thread -> BIN_buffer_ptr , 4, 1, bam_main -> bam_fp);
- if(read_len<2)
- {
- SUBREADprintf("Cannot determain read length from the tmp file!\n");
- assert(0);
- }
+ subread_lock_release( &bam_main -> output_fp_lock );
+ bam_thread -> BIN_buffer_ptr = 0;
+ } else {
+ SUBREADprintf("ERROR: Cannot compress a BAM block : %d\n", ret);
+ return 1;
+ }
+ free(BAM_compressed);
+ return 0;
+}
- if( new_line_mem[0]==0 || new_line_mem[1]==0)
- {
- SUBREADprintf("Cannot load read part from the tmp file!\n");
- assert(0);
- }
- char * old_line_mem = HashTableGet(first_read_name_table, read_name);
- if(old_line_mem)
- old_line_mem[0]=0xff;
- else
- HashTablePut(first_read_name_table, read_name, new_line_mem);
- //if( first_read_name_table -> numOfElements<4)printf("RV=%s\n", read_name);
- }
- }
+void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) {
+ int x1;
+ for(x1 = 0; x1 < bam_main -> all_threads ; x1 ++){
+ if(bam_main -> threads[x1].BIN_buffer_ptr>0){
+ SAM_pairer_multi_thread_compress(bam_main, bam_main->threads+x1);
+ }
- fclose(bbfp);
+ if(x1 == bam_main -> all_threads - 1){
+ assert(0 == bam_main -> threads[x1].BIN_buffer_ptr);
+ SAM_pairer_multi_thread_compress(bam_main, bam_main->threads+x1);
}
+ deflateEnd(&bam_main -> threads[x1].strm);
+ }
+ subread_destroy_lock(&bam_main -> output_fp_lock);
+ fclose(bam_main -> bam_fp);
+ free(bam_main -> threads);
+}
- //printf("BLK=%d; CKS=%d; READS=%llu\n", x1_block, x1_chunk, first_read_name_table -> numOfElements);
- unsigned long long int finished_second_reads = 0;
+// Tiny_Mode only write the following information:
+// Name Flag Chro Pos Mapq Cigar MateChro MatePos Tlen N I NH:i:xx HI:i:xx
+// Tiny_Mode does not work when output and input are both in BAM format
+// in_format can be either
+// bin_buff_size_per_thread is in Mega-Bytes.
+// It returns 0 if no error
+int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_buff_size_per_thread, int BAM_input, int is_Tiny_Mode, int is_single_end_mode, int force_do_not_sort, int display_progress, char * in_file, void (* reset_output_function) (void * pairer), int (* output_header_function) (void * pairer, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len), int (* output_function) (void * pairer, int thread_no, char * readname, char * bin1, char * bin2 [...]
+
+ memset(pairer, 0, sizeof(SAM_pairer_context_t));
+ pairer -> input_fp = f_subr_open(in_file, "rb");
+ if(NULL == pairer -> input_fp) return 1;
+
+ pairer -> input_is_BAM = BAM_input;
+ pairer -> tiny_mode = is_Tiny_Mode;
+ pairer -> reset_output_function = reset_output_function;
+ pairer -> output_function = output_function;
+ pairer -> output_header = output_header_function;
+ pairer -> display_progress = display_progress;
+ pairer -> is_single_end_mode = is_single_end_mode;
+ pairer -> force_do_not_sort = force_do_not_sort;
+ subread_init_lock(&pairer -> input_fp_lock);
+ subread_init_lock(&pairer -> output_header_lock);
+
+ pairer -> total_threads = all_threads;
+ pairer -> input_buff_SBAM_size = bin_buff_size_per_thread * 1024 * 1024;
+ pairer -> input_buff_BIN_size = 1024*1024;
+
+ pairer -> appendix1 = appendix1;
+
+ _REPAIRER_delete_temp_prefix = tmp_path;
+ old_sig_TERM = signal (SIGTERM, REPAIR_SIGINT_hook);
+ old_sig_INT = signal (SIGINT, REPAIR_SIGINT_hook);
+
+ strcpy(pairer -> tmp_file_prefix, tmp_path);
+ pairer -> threads = malloc(all_threads * sizeof(SAM_pairer_thread_t));
+ memset(pairer -> threads, 0, all_threads * sizeof(SAM_pairer_thread_t));
+
+ if(pairer ->input_is_BAM){
+ pairer ->bam_margin_table = HashTableCreate(2191);
+ HashTableSetHashFunction(pairer -> bam_margin_table, fc_chro_hash);
+ HashTableSetKeyComparisonFunction(pairer -> bam_margin_table, fc_strcmp_chro);
+ HashTableSetDeallocationFunctions(pairer -> bam_margin_table, free, free);
+ }else{
+ pairer -> sam_contig_number_table = HashTableCreate(21907);
+ HashTableSetHashFunction(pairer -> sam_contig_number_table, fc_chro_hash);
+ HashTableSetKeyComparisonFunction(pairer -> sam_contig_number_table, fc_strcmp_chro);
+ HashTableSetDeallocationFunctions(pairer -> sam_contig_number_table, free, NULL);
+ }
- for(x1_chunk = 0; x1_chunk < writer -> current_chunk; x1_chunk++)
- {
- char tmpfname[MAX_FILE_NAME_LENGTH+40];
- sprintf(tmpfname, "%sCHK%08d-BLK%03d.bin", writer -> tmp_path, x1_chunk , x1_block);
+ int x1;
+
+ for(x1 = 0; x1 < all_threads ; x1++){
+ pairer -> threads[x1].thread_id = x1;
+ pairer -> threads[x1].reads_in_SBAM = 0;
+ pairer -> threads[x1].input_buff_SBAM = malloc(pairer -> input_buff_SBAM_size);
+ pairer -> threads[x1].input_buff_BIN = malloc(pairer -> input_buff_BIN_size);
+
+ pairer -> threads[x1].input_buff_BIN_used = 0;
+ pairer -> threads[x1].orphant_table = HashTableCreate(pairer -> input_buff_SBAM_size / 100);
+ HashTableSetHashFunction(pairer -> threads[x1].orphant_table, fc_chro_hash);
+ HashTableSetKeyComparisonFunction(pairer -> threads[x1].orphant_table, fc_strcmp_chro);
+ HashTableSetDeallocationFunctions(pairer -> threads[x1].orphant_table, free, free);
+ pairer -> threads[x1].strm.zalloc = Z_NULL;
+ pairer -> threads[x1].strm.zfree = Z_NULL;
+ pairer -> threads[x1].strm.opaque = Z_NULL;
+ pairer -> threads[x1].strm.avail_in = 0;
+ pairer -> threads[x1].strm.next_in = Z_NULL;
+
+ inflateInit2(&pairer -> threads[x1].strm, PAIRER_GZIP_WINDOW_BITS);
+
+ if(force_do_not_sort)
+ subread_init_lock(&pairer -> threads[x1].SBAM_lock);
+ }
+ return 0;
+}
+
+void SAM_pairer_destroy(SAM_pairer_context_t * pairer){
+
+ int x1;
+ unsigned long long all_orphants = 0;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ inflateEnd(&pairer -> threads[x1].strm);
+ free(pairer -> threads[x1].input_buff_BIN);
+ free(pairer -> threads[x1].input_buff_SBAM);
+
+ if(pairer -> force_do_not_sort)
+ subread_destroy_lock(&pairer -> threads[x1].SBAM_lock);
+
+ all_orphants += pairer -> threads[x1].orphant_table->numOfElements;
+ HashTableDestroy(pairer -> threads[x1].orphant_table);
+ }
+
+ if(pairer->input_is_BAM)
+ HashTableDestroy(pairer -> bam_margin_table);
+ else HashTableDestroy(pairer -> sam_contig_number_table);
+
+ subread_destroy_lock(&pairer -> input_fp_lock);
+ subread_destroy_lock(&pairer -> output_header_lock);
+ delete_with_prefix(pairer -> tmp_file_prefix);
+ fclose(pairer -> input_fp);
+ free(pairer -> threads);
+ signal (SIGTERM, old_sig_TERM);
+ signal (SIGINT, old_sig_INT);
+ //SUBREADprintf("All orphans=%llu frags\n", all_orphants);
+}
+
+// always assume that fp is at the start of a BAM GZ block.
+int SAM_pairer_read_BAM_block(FILE * fp, int max_read_len, char * inbuff) {
+ unsigned char gz_header_12 [12];
+ int read_len = fread(gz_header_12, 1, 12, fp );
+ if(read_len < 12) return -1;
+ if(gz_header_12[0]!=31 || gz_header_12[1]!=139){
+ SUBREADprintf("BAD GZ BAM: %u, %u\n", gz_header_12[0], gz_header_12[1]);
+ return -1;
+ }
+ unsigned short xlen = 0, bsize = 0;
+ memcpy(&xlen, gz_header_12 + 10, 2);
+ int xlen_read = 0;
+
+ while( xlen_read < xlen ){
+ unsigned char x_header_4[4];
+ unsigned short slen = 0;
+ read_len = fread(x_header_4, 1, 4, fp);
+ if(read_len < 4){
+ SUBREADprintf("BAD GZ BAM 6LEN\n");
+ return -1;
+ }
+ memcpy(&slen, x_header_4+2 , 2);
+ xlen_read += 4;
+ if(x_header_4[0]==66 && x_header_4[1]==67 && slen == 2){
+ read_len = fread(&bsize, 2, 1, fp);
+ if(read_len < 1){
+ SUBREADprintf("BAD GZ BAM XLEN\n");
+ return -1;
+ }
+ }else{
+ fseek(fp, slen, SEEK_CUR);
+ }
+ xlen_read += slen;
+ }
+ if(bsize < 1 || bsize < xlen + 19){
+ SUBREADprintf("BAD GZ BAM BSIZE\n");
+ return -1;
+ }
+ read_len = fread(inbuff, 1, bsize - xlen - 19, fp);
+ //SUBREADprintf("GOOD GZ\n");
+
+ // seek over CRC and ISIZE
+ fseek(fp, 8, SEEK_CUR);
+ if(read_len < bsize - xlen - 19) return -1;
+ return read_len;
+}
+
+#define MIN_BAM_BLOCK_SIZE 66000
+
+int SAM_pairer_read_SAM_MB( FILE * fp, int max_read_len, char * inbuff ){
+ int ret = 0;
+
+ while(1){
+ if(ret >= max_read_len - MIN_BAM_BLOCK_SIZE || feof(fp))break;
+ int rlen = fread(inbuff +ret , 1, max_read_len - MIN_BAM_BLOCK_SIZE - ret , fp);
+ //SUBREADprintf("RLEN=%d, BUF=%d\n", rlen, max_read_len - MIN_BAM_BLOCK_SIZE - ret );
+ if(rlen > 0){
+ int x1;
+ for(x1 = 0; x1 < min(200, rlen); x1++)
+ if(*(inbuff+ret+x1)<8 || *(inbuff+ret+x1)> 127){
+ // SUBREADprintf("NOT_SAM_ACTUALLY\n");
+ return -1;
+ }
+ ret += rlen;
+ }
+ }
+ if(!feof(fp)){
+ int nch, last=-1;
+ while(1){
+ nch = fgetc(fp);
+ if(nch < 0 || nch == '\n'){
+ if(last != '\n') inbuff[ret++]='\n';
+ inbuff[ret] = 0;
+ break;
+ }else{
+ inbuff[ret++]=nch;
+ }
+ last = nch;
+ }
+ }
+ return ret;
+}
+
+void SAM_pairer_fill_BIN_buff(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context , int * is_finished){
+ // load continuous 64MB of data into the SBAM buffer of the current thread
+ // For BAM files: must be the entire blocks.
+ // For SAM files: must be the full lines.
+ int current_buffer_used = 0;
+ int current_blocks = 0;
+ if(pairer -> input_is_BAM){
+ while(1){
+ if(feof(pairer -> input_fp)){
+ *is_finished = 1;
+ break;
+ }
+ if(pairer -> input_buff_SBAM_size - current_buffer_used < MIN_BAM_BLOCK_SIZE) {
+ break;
+ }
+ int this_size = 0;
+
+ this_size = SAM_pairer_read_BAM_block( pairer -> input_fp , pairer -> input_buff_SBAM_size - current_buffer_used , thread_context -> input_buff_SBAM + current_buffer_used);
+
+ current_blocks ++;
+ if(this_size >= 0) {
+ current_buffer_used += this_size;
+ } else {
+ *is_finished = 1;
+ break;
+ }
+ }
+ }else{
+ current_buffer_used = SAM_pairer_read_SAM_MB(pairer -> input_fp , pairer -> input_buff_SBAM_size , thread_context -> input_buff_SBAM);
+ if(current_buffer_used < 1) *is_finished = 1;
+ }
+
+ //SUBREADprintf("PAPA:READ=%d by %d blocks %p, PTRS=%p %p\n", current_buffer_used, current_blocks, thread_context, thread_context -> input_buff_SBAM, thread_context -> input_buff_BIN);
+ thread_context -> input_buff_SBAM_used = current_buffer_used;
+ thread_context -> input_buff_SBAM_ptr = 0;
+ thread_context -> input_buff_BIN_used = 0;
+ thread_context -> input_buff_BIN_ptr = 0;
+}
+
+int SAM_pairer_find_start(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context );
+#define BAM_next_nch { \
+ int retXX = 0; while(thread_context -> input_buff_BIN_ptr >= thread_context -> input_buff_BIN_used){retXX = SAM_pairer_fetch_BAM_block(pairer, thread_context); if(retXX) break;}\
+ if(retXX) nch=-1; else nch = thread_context -> input_buff_BIN[thread_context -> input_buff_BIN_ptr++];}
+
+#define SAM_next_line {\
+ if( thread_context -> input_buff_SBAM_used <= thread_context -> input_buff_SBAM_ptr ){ line_ptr = NULL;}else{\
+ line_ptr = thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr;line_len = 0;\
+ while(line_len + thread_context -> input_buff_SBAM_ptr < thread_context -> input_buff_SBAM_used){ int ccch = thread_context -> input_buff_SBAM[ thread_context -> input_buff_SBAM_ptr + line_len ]; if(ccch == '\n')break; line_len ++;}\
+ thread_context -> input_buff_SBAM_ptr += line_len+1;}}
+
+int SAM_pairer_fetch_BAM_block(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context){
+ if(thread_context -> input_buff_SBAM_used <= thread_context -> input_buff_SBAM_ptr){
+ return 1;
+ }
+
+ int remained_BIN = thread_context -> input_buff_BIN_used - thread_context -> input_buff_BIN_ptr;
+ if( remained_BIN > 0) {
+ int x1;
+ for(x1 = 0 ; x1 < thread_context -> input_buff_BIN_used - thread_context -> input_buff_BIN_ptr; x1++)
+ thread_context -> input_buff_BIN[x1] = thread_context -> input_buff_BIN[x1+thread_context -> input_buff_BIN_ptr];
+ thread_context -> input_buff_BIN_used -= thread_context -> input_buff_BIN_ptr;
+ } else thread_context -> input_buff_BIN_used = 0;
+
+ thread_context -> input_buff_BIN_ptr = 0;
+
+ thread_context -> strm.zalloc = Z_NULL;
+ thread_context -> strm.zfree = Z_NULL;
+ thread_context -> strm.opaque = Z_NULL;
+ thread_context -> strm.avail_in = 0;
+ thread_context -> strm.next_in = Z_NULL;
+
+ inflateReset(&thread_context -> strm);
+
+ thread_context -> strm.avail_in = (unsigned int)(thread_context -> input_buff_SBAM_used - thread_context -> input_buff_SBAM_ptr);
+ thread_context -> strm.next_in = (unsigned char *)thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr;
+ thread_context -> strm.avail_out = pairer -> input_buff_BIN_size - thread_context -> input_buff_BIN_used;
+ thread_context -> strm.next_out = (unsigned char *)thread_context -> input_buff_BIN + thread_context -> input_buff_BIN_used;
+
+ int ret = inflate(&thread_context ->strm, Z_FINISH);
+ if(ret == Z_OK || ret == Z_STREAM_END)
+ {
+ int have = pairer -> input_buff_BIN_size - thread_context ->strm.avail_out - thread_context -> input_buff_BIN_used;
+ int used_BAM = (unsigned int)(thread_context -> input_buff_SBAM_used - thread_context -> input_buff_SBAM_ptr) - thread_context -> strm.avail_in;
+
+ thread_context -> input_buff_BIN_used += have;
+ thread_context -> input_buff_SBAM_ptr += used_BAM;
+
+ if(thread_context -> need_find_start){
+ int test_read_bin = SAM_pairer_find_start(pairer, thread_context);
+ if(test_read_bin<1){
+ pairer -> is_bad_format = 1;
+ SUBREADprintf("BIN REMAIN=%d, BAM USED=%d, BIN GENERATED=%d, BAM REMAIN=%d, TEST_READ_BIN=%d\n", remained_BIN, used_BAM, have, thread_context -> input_buff_SBAM_used - thread_context -> input_buff_SBAM_ptr, test_read_bin);
+ }
+ }
+ } else {
+ SUBREADprintf("GZIP ERROR:%d\n", ret);
+ return 1;
+ }
+ return 0;
+}
+
+#define BAM_next_u32(v) {\
+ (v) = 0; unsigned int poww = 1 ; \
+ BAM_next_nch; (v) += nch*poww; poww *= 256;\
+ BAM_next_nch; (v) += nch*poww; poww *= 256;\
+ BAM_next_nch; (v) += nch*poww; poww *= 256;\
+ BAM_next_nch; (v) += nch*poww;\
+}
+
+void SAM_pairer_reduce_BAM_bin(SAM_pairer_context_t * pairer, SAM_pairer_thread_t * thread_context, unsigned char * bin_where, int * bin_len){
+ unsigned int seq_len, name_len, cigar_ops;
+ memcpy(&seq_len, bin_where + 20, 4);
+ if(seq_len<=1) return;
+ memcpy(&name_len, bin_where + 12, 4);
+ name_len = name_len & 0xff;
+ memcpy(&cigar_ops, bin_where + 16, 4);
+ cigar_ops = cigar_ops & 0xffff;
+
+ int targ_pos = 36+name_len+4*cigar_ops + 2;
+ int src_pos = 36+name_len+4*cigar_ops + (1+seq_len) / 2 + seq_len;
+
+ bin_where[targ_pos-2]=0xff;
+ bin_where[targ_pos-1]=0xff;
+
+ seq_len = 1;
+ memcpy(bin_where + 20, &seq_len, 4);
+ while(src_pos < (*bin_len)){
+ bin_where[targ_pos++]=bin_where[src_pos++];
+ }
+ (* bin_len) = targ_pos - 4;
+ memcpy(bin_where, bin_len, 4);
+ (* bin_len) += 4;
+
+}
+
+#define MAX_BIN_RECORD_LENGTH 65000
+int reduce_SAM_to_BAM(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context);
+
+int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context , unsigned char ** bin_where, int * bin_len ) {
+ if( pairer -> input_is_BAM ){
+ int nch = 0;
+ while(1){
+ if(!pairer -> BAM_header_parsed){
+ int x1;
+ unsigned int bam_signature;
+ BAM_next_u32(bam_signature);
+ BAM_next_u32(pairer -> BAM_l_text);
+ char * header_txt = malloc(pairer->BAM_l_text);
+
+ //SUBREADprintf("HEAD_TXT=%d\n", pairer -> BAM_l_text);
+ for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
+ BAM_next_nch;
+ header_txt [x1] = nch;
+ }
+ pairer -> output_header(pairer, thread_context -> thread_id, 1, pairer -> BAM_l_text , header_txt , pairer -> BAM_l_text );
+
+ BAM_next_u32(pairer -> BAM_n_ref);
+ //SUBREADprintf("HEAD_CHRO=%d\n", pairer -> BAM_n_ref);
+ unsigned int ref_bin_len = 0;
+ for(x1 = 0; x1 < pairer -> BAM_n_ref; x1++) {
+ unsigned int l_name, l_ref, x2;
+ char ref_name[MAX_CHROMOSOME_NAME_LEN];
+ BAM_next_u32(l_name);
+ assert(l_name < 256);
+ memcpy(header_txt + ref_bin_len, &l_name, 4);
+ ref_bin_len += 4;
+ for(x2 = 0; x2 < l_name; x2++){
+ BAM_next_nch;
+ header_txt[ref_bin_len++] = nch;
+ ref_name[x2]=nch;
+ }
+ BAM_next_u32(l_ref);
+ memcpy(header_txt + ref_bin_len, &l_ref, 4);
+ ref_bin_len += 4;
+
+ assert(ref_bin_len < pairer -> BAM_l_text);
+ //SUBREADprintf("%d-th ref : %s [len=%u]\n", x1, ref_name, l_ref);
+ }
+
+ //exit(0);
+ pairer -> output_header(pairer, thread_context -> thread_id, 0, pairer -> BAM_n_ref , header_txt , ref_bin_len );
+
+ free(header_txt);
+
+ pairer -> BAM_header_parsed = 1;
+ if(pairer -> display_progress)
+ SUBREADprintf("\nThe header was parsed: %d reference sequences were found.\nScanning the input file.\n", pairer -> BAM_n_ref);
+ SAM_pairer_fetch_BAM_block(pairer, thread_context);
+
+ //SUBREADprintf("HEAD_FINISHED, BAD=%d\n", pairer -> is_bad_format);
+ }
+
+ if(pairer -> is_bad_format) return 0;
+
+ while(thread_context -> input_buff_BIN_used <= thread_context -> input_buff_BIN_ptr){
+ int ret_fetch = SAM_pairer_fetch_BAM_block(pairer, thread_context);
+ if(ret_fetch)
+ return 0;
+ }
+
+ unsigned int record_len=0;
+ memcpy(&record_len, thread_context -> input_buff_BIN + thread_context -> input_buff_BIN_ptr, 4);
+ thread_context -> input_buff_BIN_ptr += 4;
+
+ if(record_len < 32 || record_len > MAX_BIN_RECORD_LENGTH || thread_context -> input_buff_BIN_used < thread_context -> input_buff_BIN_ptr + record_len ){
+ //SUBREADprintf("BAD FORMAT:%u\n", record_len);
+ pairer -> is_bad_format = 1;
+ return 0;
+ }
+
+ /*
+ while(thread_context -> input_buff_BIN_used <= thread_context -> input_buff_BIN_ptr + record_len){
+ int ret_fetch = SAM_pairer_fetch_BAM_block(pairer, thread_context);
+ if(ret_fetch)
+ return 0;
+ }*/
+
+ (* bin_where) = thread_context -> input_buff_BIN + thread_context -> input_buff_BIN_ptr - 4;
+ (* bin_len) = record_len + 4;
+ thread_context -> input_buff_BIN_ptr += record_len;
+
+ if( pairer -> tiny_mode )SAM_pairer_reduce_BAM_bin(pairer, thread_context, *bin_where, bin_len);
+
+ return 1;
+ }
+ } else {
+ char *line_ptr;
+ int line_len=0, passed_read_SBAM_ptr = -1;
+ if(!pairer -> BAM_header_parsed){
+ char * header_start = NULL;
+ int header_len = 0;
+ while(1){
+ SAM_next_line;
+ if(NULL == header_start && line_ptr[0] == '@') header_start = line_ptr;
+
+ if(NULL == line_ptr){
+ passed_read_SBAM_ptr = line_ptr - thread_context -> input_buff_SBAM;
+ //SUBREADprintf("FATAL: the header is too large to the buffer!\n");
+ break;
+ }else{
+ //SUBREADprintf("LINELEN=%d, PTR=%d, FIRST=%c\n", line_len, thread_context -> input_buff_SBAM_ptr , line_ptr[0]);
+ }
+ if(line_ptr[0]!='@'){
+ passed_read_SBAM_ptr = line_ptr - thread_context -> input_buff_SBAM;
+ break;
+ }
+ }
+
+ pairer -> output_header(pairer, thread_context -> thread_id, 1, header_len , header_start , header_len);
+ thread_context -> input_buff_SBAM_ptr = 0;
+ int header_bin_ptr = 0, header_contigs = 0;
+ while(1){
+ SAM_next_line;
+ if(line_ptr == NULL || line_ptr[0]!='@') break;
+ if(memcmp(line_ptr, "@SQ\t",4)==0){
+ unsigned int ct_len = 0, ctptr = 4, status = 0, sqname_len = 0;
+ char * sqname = NULL;
+ while(1){
+ char ctnch = line_ptr[ctptr++];
+ if( status == 0){
+ if(ctnch=='S' && line_ptr[ctptr] == 'N' && line_ptr[ctptr+1] == ':'){
+ ctptr += 2;
+ status = 10;
+ sqname = line_ptr + ctptr;
+ }else if(ctnch=='L' && line_ptr[ctptr] == 'N' && line_ptr[ctptr+1] == ':'){
+ ctptr += 2;
+ status = 20;
+ }else status = 30;
+ }else if(status == 10 || status == 20 || status == 30){
+ if(ctnch == '\t' || ctnch == '\n'){
+ status = 0;
+ if(ctnch == '\n') break;
+ //break;
+ }
+ if(status == 10) sqname_len ++;
+ else if(status == 20) ct_len = ct_len * 10 + ctnch - '0';
+ }
+ }
+
+
+ sqname_len += 1;
+ memcpy(header_start + header_bin_ptr, &sqname_len, 4);
+ header_bin_ptr += 4;
+ memcpy(header_start + header_bin_ptr, sqname, sqname_len-1);
+ *(header_start + header_bin_ptr + sqname_len - 1) = 0;
+ char * mem_contig_name = malloc(sqname_len);
+ strcpy(mem_contig_name , header_start + header_bin_ptr);
+ //SUBREADprintf("CONTIG %d : %s (len=%d = %d)\n", header_contigs, header_start + header_bin_ptr , sqname_len, strlen(mem_contig_name));
+ HashTablePut(pairer -> sam_contig_number_table , mem_contig_name, NULL + 1 + header_contigs);
+ header_bin_ptr += sqname_len;
+
+ memcpy(header_start + header_bin_ptr, &ct_len, 4);
+ header_bin_ptr += 4;
+ header_contigs++;
+ }
+ }
+
+ pairer -> output_header(pairer, thread_context -> thread_id, 0, header_contigs , header_start , header_bin_ptr);
+ pairer -> BAM_header_parsed = 1;
+ }
+
+ if(passed_read_SBAM_ptr >=0)
+ thread_context -> input_buff_SBAM_ptr = passed_read_SBAM_ptr;
+
+ if( thread_context -> input_buff_SBAM_ptr < thread_context -> input_buff_SBAM_used ){
+ thread_context -> input_buff_BIN_ptr = 0;
+ *bin_len = reduce_SAM_to_BAM(pairer, thread_context);
+ *bin_where = (unsigned char *)thread_context -> input_buff_BIN;
+
+ return ((*bin_len) > 0)?1:0;
+ }
+ return 0;
+ }
+ return 0;
+}
+
+int online_register_contig(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context, char * ref){
+ int reflen = strlen(ref);
+ char * header_sec = malloc(reflen + 20);
+ reflen++;
+ memcpy(header_sec, &reflen, 4);
+ memcpy(header_sec + 4, ref, reflen);
+ memset(header_sec + 4+reflen, 0, 4);
+ subread_lock_occupy(&pairer -> output_header_lock);
+
+ int refId = HashTableGet(pairer->sam_contig_number_table, ref) - NULL - 1;
+ if(refId < 0){
+ refId = pairer->sam_contig_number_table->numOfElements;
+ pairer -> output_header(pairer, thread_context -> thread_id, 0, 1 , header_sec , 8+reflen);
+ char * mem_ref = malloc(reflen);
+ memcpy(mem_ref, ref, reflen);
+ HashTablePut(pairer->sam_contig_number_table, mem_ref, NULL + refId + 1);
+ }
+ subread_lock_release(&pairer -> output_header_lock);
+ free(header_sec);
+ return refId;
+}
+
+#define set_memory_int(ptr, iii) { *(ptr) = (iii)&0xff; *(ptr+1) = (iii>>8)&0xff; *(ptr+2) = (iii>>16)&0xff;*(ptr+3) = (iii>>24); }
+
+int reduce_SAM_to_BAM(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context){
+
+
+ int column_no = 0, in_ptr = 0;
+ char * in_str = thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr;
+ char * read_name = NULL, * ref = NULL, * mate_ref = NULL, * cigar = NULL;
+ int flag = 0, pos = 0, mapq = 0, mate_pos = 0, tlen = 0, l_read_name = 0, tlen_sign = 1;
+
+ read_name = in_str;
+ while(1){
+ int nch = in_str[in_ptr];
+ if(nch == '\n' || nch == '\0') {
+ break;
+ }else if(nch == '\t'){
+ if(column_no == 0 || column_no == 2 || column_no == 5 || column_no == 6)
+ in_str[in_ptr] = 0;
+ column_no ++;
+ if(column_no == 2) ref = in_str + in_ptr + 1;
+ else if(column_no == 5) cigar = in_str + in_ptr + 1;
+ else if(column_no == 6) mate_ref = in_str + in_ptr + 1;
+ else if(column_no == 11) break;
+ }else{
+ if(column_no == 0) l_read_name ++;
+ else if(column_no == 1) flag = flag *10 + nch - '0';
+ else if(column_no == 3) pos = pos *10 + nch - '0';
+ else if(column_no == 4) mapq = mapq *10 + nch - '0';
+ else if(column_no == 7) mate_pos = mate_pos *10 + nch - '0';
+ else if(column_no == 8){
+ if(nch == '-') tlen_sign = -1;
+ else tlen = tlen *10 + nch - '0';
+ }
+ }
+
+ in_ptr++;
+ }
+ if(column_no < 10){
+ //SUBREADprintf("RETURN_LESS:%d\n", column_no);
+ return -1;
+ }
+ l_read_name++;
+
+ char * bin_tmp = (char *)thread_context -> input_buff_BIN + thread_context -> input_buff_BIN_ptr;
+
+ int refID = HashTableGet(pairer->sam_contig_number_table, ref) - NULL - 1;
+ if(refID < 0 && ref[0]!='*')
+ refID = online_register_contig(pairer, thread_context, ref);
+ set_memory_int(bin_tmp + 4, refID);
+
+ pos -= 1;
+ set_memory_int(bin_tmp + 8, pos);
+
+ int mapq_nl = mapq << 8 | l_read_name;
+ set_memory_int(bin_tmp + 12, mapq_nl);
+
+ int coverage;
+ int cigar_ops = SamBam_compress_cigar(cigar, (int *)(bin_tmp + 36 + l_read_name), &coverage);
+ int flag_nc = flag << 16 | cigar_ops;
+ set_memory_int(bin_tmp + 16, flag_nc);
+
+ int tmpi = 1;
+ set_memory_int(bin_tmp + 20, tmpi); // SEQ_LEN
+
+ int mate_refID = refID;
+ if(mate_ref[0]!='=' || mate_ref[1]!=0)
+ mate_refID = HashTableGet(pairer->sam_contig_number_table, mate_ref) - NULL - 1;
+
+ if(mate_refID < 0 && mate_ref[0]!='*')
+ mate_refID = online_register_contig(pairer, thread_context, mate_ref);
+
+ set_memory_int(bin_tmp + 24, mate_refID);
+
+ mate_pos -= 1;
+ set_memory_int(bin_tmp + 28, mate_pos);
+
+ tlen = tlen * tlen_sign;
+ set_memory_int(bin_tmp + 32, tlen);
+
+ memcpy(bin_tmp + 36, read_name, l_read_name);
+ bin_tmp[36 + l_read_name + 4 * cigar_ops] = 0xff;
+ bin_tmp[36 + l_read_name + 4 * cigar_ops+1] = 0x10;
+
+ int bin_ptr = 36 + l_read_name + 4 * cigar_ops + 2;
+
+ if(column_no == 11) // has extra tags
+ {
+ while(in_str[in_ptr] == '\t'){
+ if((!isalpha(in_str[in_ptr+1])) || (!isalpha(in_str[in_ptr+2])) || (!isalpha(in_str[in_ptr+4]))){
+ while(in_str[in_ptr] !='\n')in_ptr++;
+ break;
+ }
+ in_ptr ++;
+ int xxnch;
+ if(in_str[in_ptr + 3] == 'Z'){
+ bin_tmp[bin_ptr+0] = in_str[in_ptr+0];
+ bin_tmp[bin_ptr+1] = in_str[in_ptr+1];
+ bin_tmp[bin_ptr+2] = 'Z';
+ bin_ptr += 3;
+ in_ptr += 5;
+ while(1){
+ xxnch = *(in_str + in_ptr);
+ if(xxnch == '\n' || xxnch == '\t') break;
+ *(bin_tmp + (bin_ptr++)) = xxnch;
+ in_ptr ++;
+ }
+ *(bin_tmp + (bin_ptr++)) = 0;
+ }else if(in_str[in_ptr + 3] == 'i'){
+ int tmpi = 0, tmpi_sign = 1;
+ bin_tmp[bin_ptr+0] = in_str[in_ptr+0];
+ bin_tmp[bin_ptr+1] = in_str[in_ptr+1];
+ bin_tmp[bin_ptr+2] = 'i';
+ bin_ptr += 3;
+ in_ptr += 5;
+
+ while(1){
+ xxnch = *(in_str + in_ptr);
+ if(xxnch == '\n' || xxnch == '\t') break;
+ else if(xxnch == '-') tmpi_sign = -1;
+ else tmpi = tmpi * 10 + xxnch - '0';
+ in_ptr ++;
+ }
+ tmpi *= tmpi_sign;
+ set_memory_int(bin_tmp+bin_ptr, tmpi);
+ bin_ptr += 4;
+
+ }else if(in_str[in_ptr + 3] == 'A'){
+ bin_tmp[bin_ptr+0] = in_str[in_ptr+0];
+ bin_tmp[bin_ptr+1] = in_str[in_ptr+1];
+ bin_tmp[bin_ptr+2] = 'A';
+ bin_tmp[bin_ptr+3] = in_str[in_ptr+5];
+ bin_ptr += 4;
+ in_ptr += 6;
+ }else{
+ in_ptr += 5;
+ while(1){
+ xxnch = *(in_str + in_ptr);
+ if(xxnch == '\n' || xxnch == '\t') break;
+ in_ptr++;
+ }
+ }
+ }
+
+ }
+
+ thread_context -> input_buff_SBAM_ptr += in_ptr + 1;
+ assert(bin_ptr < 65000);
+
+ bin_ptr -= 4;
+ set_memory_int(bin_tmp, bin_ptr);
+ bin_ptr += 4;
+ //memcpy(buf, bin_tmp, bin_ptr);
+
+ return bin_ptr;
+}
+
+int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value){
+ int found = 0;
+ int bin_cursor = 0;
+ while(bin_cursor < bin_len){
+ if(0){
+ char outc[3];
+ outc[0] = bin[bin_cursor];
+ outc[1] = bin[bin_cursor+1];
+
+ outc[2]=0;
+ SUBREADprintf("TAG=%s, TYP=%c %d %c\n", outc, bin[bin_cursor+2], bin[bin_cursor+3], bin[bin_cursor+4]);
+ }
+
+ if(bin[bin_cursor] == tag_name[0] && bin[bin_cursor+1] == tag_name[1]){
+ int tag_int_val = 0;
+ if(bin[bin_cursor+2]=='i' || bin[bin_cursor+2]=='I'){
+ memcpy(&tag_int_val, bin+bin_cursor+3, 4);
+ found = 1;
+ } else if(bin[bin_cursor+2]=='s' || bin[bin_cursor+2]=='S'){
+ memcpy(&tag_int_val, bin+bin_cursor+3, 2);
+ found = 1;
+ } else if(bin[bin_cursor+2]=='c' || bin[bin_cursor+2]=='C'){
+ memcpy(&tag_int_val, bin+bin_cursor+3, 1);
+ found = 1;
+ }
+ if(found){
+ (* saved_value) = tag_int_val;
+ break;
+ }
+ }
+ int skip_content = 0;
+ //SUBREADprintf("NextTag=%c; ", bin[bin_cursor+2]);
+ if(bin[bin_cursor+2]=='i' || bin[bin_cursor+2]=='I' || bin[bin_cursor+2]=='f')
+ skip_content = 4;
+ else if(bin[bin_cursor+2]=='s' || bin[bin_cursor+2]=='S')
+ skip_content = 2;
+ else if(bin[bin_cursor+2]=='c' || bin[bin_cursor+2]=='C' || bin[bin_cursor+2]=='A')
+ skip_content = 1;
+ else if(bin[bin_cursor+2]=='Z' || bin[bin_cursor+2]=='H'){
+ while(bin[bin_cursor+skip_content + 3]){
+ //SUBREADprintf("ACHAR=%c\n", (bin[skip_content + 3]));
+ skip_content++;
+ }
+ skip_content ++;
+ } else if(bin[bin_cursor+2]=='B'){
+ char cell_type = tolower(bin[bin_cursor+3]);
+
+ memcpy(&skip_content, bin + bin_cursor + 4, 4);
+ // SUBREADprintf("Array Type=%c, cells=%d\n", cell_type, skip_content);
+ if(cell_type == 's')skip_content *=2;
+ else if(cell_type == 'i' || cell_type == 'f')skip_content *= 4;
+ skip_content += 4 + 1;
+ }else{
+ SUBREADprintf("UnknownTag=%c\n", bin[bin_cursor+2]);
+ assert(0);
+ }
+ //SUBREADprintf("SKIP=%d\n", skip_content);
+ bin_cursor += skip_content + 3;
+ }
+ return found;
+}
+
+int SAM_pairer_get_read_full_name( SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context , unsigned char * bin, int bin_len , char * full_name, int * this_flag){
+ full_name[0]=0;
+ int rlen = 0;
+ unsigned int l_read_name = 0;
+ unsigned int refID = 0;
+ unsigned int next_refID = 0;
+ unsigned int pos = 0, l_seq = 0, cigar_opts;
+ unsigned int next_pos = 0, tmpi = 0;
+ int FLAG;
+
+ int HItag = -1;
+
+
+ memcpy(&refID, bin + 4, 4);
+ memcpy(&pos, bin + 8, 4);
+ memcpy(&tmpi, bin + 12, 4);
+ l_read_name = tmpi & 0xff;
+ memcpy(&tmpi, bin + 16, 4);
+ FLAG = (tmpi >> 16)&0xffff;
+ (*this_flag) = FLAG;
+ cigar_opts = tmpi & 0xffff;
+ memcpy(&next_refID, bin + 24, 4);
+ memcpy(&next_pos, bin + 28, 4);
+ memcpy(full_name, bin+36, l_read_name);
+ unsigned int r1_refID, r1_pos, r2_refID, r2_pos;
+
+ if(FLAG & 4){
+ refID = -1;
+ pos = 0;
+ }
+
+ if(FLAG & 8){
+ next_refID = -1;
+ next_pos = 0;
+ }
+
+ if((FLAG & 0x40) == 0x40){
+ r1_refID = refID;
+ r1_pos = pos;
+ r2_refID = next_refID;
+ r2_pos = next_pos;
+ } else {
+ r2_refID = refID;
+ r2_pos = pos;
+ r1_refID = next_refID;
+ r1_pos = next_pos;
+ }
+
+
+ memcpy(&l_seq, bin + 20, 4);
+ //SUBREADprintf("LQ=%d, RL=%d, CIGAR_OPT=%d\n", l_seq, (l_seq+1)/2, cigar_opts);
+
+ unsigned int tags_start = 36+l_read_name+4*cigar_opts+(l_seq+1)/2+l_seq;
+ unsigned int tags_len = bin_len - tags_start;
+
+ if(tags_len > 2){
+ SAM_pairer_iterate_int_tags(bin + tags_start, tags_len, "HI", &HItag);
+ }
+
+ int slash_pos = 0;
+ for(; slash_pos < l_read_name - 1; slash_pos++){
+ if(full_name[slash_pos] == '/') break;
+ }
+
+ rlen = slash_pos + sprintf(full_name+slash_pos, "\027%d\027%u\027%d\027%u\027%d", r1_refID, r1_pos, r2_refID, r2_pos, HItag);
+
+ return rlen;
+}
+
+int SAM_pairer_multi_thread_header (void * pairer_vp, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len){
+
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ SAM_pairer_writer_main_t * bam_main = (SAM_pairer_writer_main_t * )pairer -> appendix1;
+ SAM_pairer_writer_thread_t * bam_thread = bam_main -> threads + thread_no;
+ unsigned int BIN_block_cursor = 0, bin_cursor = 0;
+ if(is_text){
+ memcpy( bam_thread -> BIN_buffer, "BAM\1", 4 );
+ memcpy( bam_thread -> BIN_buffer + 4 , & items , 4 );
+ BIN_block_cursor = 8;
+ }else{
+ memcpy( bam_thread -> BIN_buffer , & items , 4 );
+ BIN_block_cursor = 4;
+ }
+ while( bin_cursor < bin_len ){
+ int write_text_len = min(SAM_PAIRER_WRITE_BUFFER - BIN_block_cursor, bin_len - bin_cursor);
+ memcpy(bam_thread -> BIN_buffer + BIN_block_cursor , bin + bin_cursor, write_text_len);
+ bam_thread -> BIN_buffer_ptr = write_text_len + BIN_block_cursor;
+
+ SAM_pairer_multi_thread_compress(bam_main, bam_thread);
+ bin_cursor += write_text_len;
+ BIN_block_cursor = 0;
+ }
+
+ bam_thread -> BIN_buffer_ptr = 0;
+ return 0;
+}
+
+void SAM_pairer_make_dummy(char * rname, char * bin1, char * out_bin2){
+ char * tmptr = NULL;
+
+ //SUBREADprintf("S=%s ", rname);
+ char * realname = strtok_r(rname, "\027", &tmptr);
+ int len_name = strlen(realname);
+ int r1_chro = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r1_pos = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r2_chro = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r2_pos = atoi(strtok_r(NULL, "\027", &tmptr));
+ int HItag = atoi(strtok_r(NULL, "\027", &tmptr));
+ int mate_FLAG = 0;
+ memcpy(&mate_FLAG, bin1 + 16, 4);
+ mate_FLAG = 0xffff&(mate_FLAG >>16);
+ int mate_tlen = 0;
+ memcpy(&mate_tlen, bin1 + 32, 4);
+
+ if(r1_chro<0) r1_pos=-1;
+ if(r2_chro<0) r2_pos=-1;
+
+ int my_chro = (mate_FLAG&0x40)? r2_chro : r1_chro;
+ int my_pos = (mate_FLAG&0x40)? r2_pos : r1_pos;
+ int mate_chro = (mate_FLAG&0x40)? r1_chro : r2_chro;
+ int mate_pos = (mate_FLAG&0x40)? r1_pos : r2_pos;
+
+ int bin_mq_nl = (len_name+1);
+ int my_flag = (mate_FLAG&0x40)? 0x80:0x40;
+ my_flag |= 1;
+ if(mate_FLAG & 8)my_flag |=4;
+ if(mate_FLAG & 4)my_flag |=8;
+ if(mate_FLAG & 0x10) my_flag |= 0x20;
+ if(mate_FLAG & 0x20) my_flag |= 0x10;
+ my_flag = my_flag << 16;
+
+ memcpy(out_bin2+4, &my_chro,4);
+ memcpy(out_bin2+8, &my_pos,4);
+ memcpy(out_bin2+12, &bin_mq_nl, 4);
+ memcpy(out_bin2+16, &my_flag, 4);
+
+ my_flag = 1;
+ memcpy(out_bin2+20, &my_flag, 4);
+ memcpy(out_bin2+24, &mate_chro, 4);
+ memcpy(out_bin2+28, &mate_pos, 4);
+
+ mate_tlen = -mate_tlen;
+ memcpy(out_bin2+32, &mate_tlen, 4);
+ memcpy(out_bin2+36, realname, len_name+1);
+ out_bin2[36 + len_name+1] = 0xff;
+ out_bin2[36 + len_name+2] = 0x20;
+
+ int all_len = 36 + len_name + 3 - 4;
+ //SUBREADprintf("HI=%d\n", HItag);
+ if(HItag>=0){
+ out_bin2[36 + len_name+3]='H';
+ out_bin2[36 + len_name+4]='I';
+ if(HItag<128){
+ out_bin2[36 + len_name+5]='C';
+ memcpy(out_bin2 + 36 + len_name+6, &HItag, 1);
+ all_len += 4;
+ }else if(HItag<32767){
+ out_bin2[36 + len_name+5]='S';
+ memcpy(out_bin2 + 36 + len_name+6, &HItag, 2);
+ all_len += 5;
+ }else {
+ out_bin2[36 + len_name+5]='I';
+ memcpy(out_bin2 + 36 + len_name+6, &HItag, 4);
+ all_len += 7;
+ }
+ }
+ memcpy(out_bin2,&all_len,4);
+}
+
+void SAM_pairer_reset( SAM_pairer_context_t * pairer ) {
+ int x1;
+ pairer -> is_finished = 0;
+ pairer -> BAM_header_parsed = 0;
+ pairer -> total_input_reads = 0;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1 ++){
+ pairer -> threads[x1].reads_in_SBAM = 0;
+ pairer -> threads[x1].input_buff_BIN_used = 0;
+ pairer -> threads[x1].input_buff_BIN_ptr = 0;
+ pairer -> threads[x1].input_buff_SBAM_used = 0;
+ pairer -> threads[x1].input_buff_SBAM_ptr = 0;
+ pairer -> threads[x1].orphant_block_no = 0;
+ pairer -> threads[x1].immediate_last_read_full_name[0]=0;
+ HashTableDestroy(pairer -> threads[x1].orphant_table);
+ pairer -> threads[x1].orphant_table = HashTableCreate(pairer -> input_buff_SBAM_size / 100);
+ HashTableSetHashFunction(pairer -> threads[x1].orphant_table, fc_chro_hash);
+ HashTableSetKeyComparisonFunction(pairer -> threads[x1].orphant_table, fc_strcmp_chro);
+ HashTableSetDeallocationFunctions(pairer -> threads[x1].orphant_table, free, free);
+ inflateReset(&pairer -> threads[x1].strm);
+ }
+
+}
+void SAM_pairer_writer_reset( void * pairer_vp ) {
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ SAM_pairer_writer_main_t * bam_main = (SAM_pairer_writer_main_t * )pairer -> appendix1;
+ ftruncate(fileno(bam_main -> bam_fp), 0);
+ fclose(bam_main -> bam_fp);
+ bam_main -> bam_fp = f_subr_open(bam_main -> bam_name, "wb");
+ int x1;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1 ++){
+ bam_main -> threads[x1].BIN_buffer_ptr = 0;
+ deflateReset(&bam_main -> threads[x1].strm);
+ }
+
+
+}
+
+int SAM_pairer_multi_thread_output(void * pairer_vp, int thread_no, char * rname, char * bin1, char * bin2 ){
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ SAM_pairer_writer_main_t * bam_main = (SAM_pairer_writer_main_t * )pairer -> appendix1;
+ SAM_pairer_writer_thread_t * bam_thread = bam_main -> threads + thread_no;
+
+ char dummy_bin2 [MAX_READ_NAME_LEN*2 + 180 ];
+ if(bin2==NULL && rname != NULL && bam_main -> has_dummy){
+ SAM_pairer_make_dummy( rname, bin1, dummy_bin2 );
+ bin2 = dummy_bin2;
+ }
+
+ int bin_len1, bin_len2 = 0;
+ memcpy(&bin_len1, bin1, 4);
+ bin_len1 +=4;
+
+ if(bin2) {
+ memcpy(&bin_len2, bin2, 4);
+ bin_len2 +=4;
+ }
+
+ if( bin_len1 + bin_len2 >= SAM_PAIRER_WRITE_BUFFER){
+ SUBREADprintf("ERROR: BAM Record larger than a BAM block!\n");
+ return 1;
+ }
+
+ if(bin_len1 + bin_len2 + bam_thread -> BIN_buffer_ptr >= SAM_PAIRER_WRITE_BUFFER){
+ int ret = SAM_pairer_multi_thread_compress(bam_main, bam_thread);
+ if(ret)return 1;
+ }
+ memcpy( bam_thread -> BIN_buffer + bam_thread -> BIN_buffer_ptr, bin1, bin_len1 );
+ if(bin2)
+ memcpy( bam_thread -> BIN_buffer + bam_thread -> BIN_buffer_ptr + bin_len1, bin2, bin_len2 );
+ bam_thread -> BIN_buffer_ptr += bin_len1 + bin_len2;
+ return 0;
+}
+
+void SAM_pairer_do_read_test( SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context , int read_name_len, char * read_full_name, int bin_len, char * bin ){
+ unsigned char * mate_bin = HashTableGet(thread_context -> orphant_table, read_full_name);
+ if(mate_bin){
+ if(pairer -> output_function)
+ pairer -> output_function(pairer, thread_context -> thread_id, read_full_name, bin, (char*)mate_bin);
+ HashTableRemove(thread_context -> orphant_table, read_full_name);
+ if(thread_context -> orphant_space > bin_len)
+ thread_context -> orphant_space -= bin_len;
+ else thread_context -> orphant_space = 0;
+ //SUBREADprintf("Mate_found: %s\n", read_full_name);
+ }else{
+ char * mem_name = malloc(read_name_len + 1);
+ memcpy(mem_name, read_full_name, read_name_len);
+ mem_name[read_name_len] = 0;
+
+ char * mem_bin = malloc(bin_len);
+ memcpy(mem_bin, bin , bin_len);
+
+ HashTablePut(thread_context -> orphant_table, mem_name, mem_bin);
+ thread_context -> orphant_space += bin_len;
+ //SUBREADprintf("Orphant_created [%d]: %s\n", thread_context -> thread_id, read_full_name);
+ }
+}
+
+int SAM_pairer_do_next_read( SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context ){
+ char read_full_name[ MAX_READ_NAME_LEN*2 +80 ]; // rname:chr_r1:pos_r1:chr_r2:pos_r2:HI_tag
+ unsigned char * bin = NULL;
+ int bin_len = 0, this_flags = 0;
+
+ int has_next_read = SAM_pairer_get_next_read_BIN(pairer, thread_context, &bin, &bin_len);
+ if(has_next_read){
+ int name_len = SAM_pairer_get_read_full_name(pairer, thread_context, bin, bin_len, read_full_name, & this_flags);
+ if(0 && FIXLENstrcmp("V0112_0155:7:1206:5677:116578", read_full_name) == 0)
+ SUBREADprintf("FNNM:%s, FLAG=%d\n", read_full_name , this_flags);
+
+ if(pairer -> is_single_end_mode == 0 && ( this_flags & 1 ) == 1){ // if the reads are PE
+ if(strcmp(read_full_name , thread_context -> immediate_last_read_full_name) == 0){
+ if(pairer -> output_function)
+ pairer -> output_function(pairer, thread_context -> thread_id, read_full_name, (char*) bin, (char*)thread_context -> immediate_last_read_bin);
+
+ thread_context -> immediate_last_read_full_name[0] = 0;
+ }else{
+ if(thread_context -> immediate_last_read_full_name[0])
+ SAM_pairer_do_read_test(pairer , thread_context , thread_context -> immediate_last_read_name_len , thread_context -> immediate_last_read_full_name , thread_context -> immediate_last_read_bin_len , thread_context -> immediate_last_read_bin);
+
+ thread_context -> immediate_last_read_bin_len = bin_len;
+ thread_context -> immediate_last_read_name_len = name_len;
+ strcpy(thread_context -> immediate_last_read_full_name, read_full_name);
+ memcpy(thread_context -> immediate_last_read_bin, bin, bin_len);
+ }
+ }else{ // else just write.
+ if(pairer -> output_function)
+ pairer -> output_function(pairer, thread_context -> thread_id, NULL, (char*) bin, NULL);
+ }
+ return 0;
+ }else pairer -> BAM_header_parsed = 1;
+ return 1;
+}
+
+
+// all orphants are written into files, each has a size of buffer size.
+// when the orphants are longer than buffer_size, then sort and save to disk.
+
+void SAM_pairer_sort_exchange(void * arr, int l, int r){
+ unsigned char *** sort_data = (unsigned char ***) arr;
+ unsigned char * tmpc;
+
+ tmpc = sort_data[0][r];
+ sort_data[0][r] = sort_data[0][l];
+ sort_data[0][l] = tmpc;
+
+ tmpc = sort_data[1][r];
+ sort_data[1][r] = sort_data[1][l];
+ sort_data[1][l] = tmpc;
+}
+
+int SAM_pairer_sort_compare(void * arr, int l, int r){
+ char *** sort_data = (char ***) arr;
+ return strcmp(sort_data[0][l], sort_data[0][r]);
+}
+
+void SAM_pairer_sort_merge( void * arr, int start, int items, int items2 ){
+ unsigned char *** sort_data = (unsigned char ***) arr;
+
+ unsigned char ** tmp_name_list = malloc(sizeof(char *) * (items+items2));
+ unsigned char ** tmp_bin_list = malloc(sizeof(char *) * (items+items2));
+
+ int i1_cursor = start, i2_cursor = items + start;
+ int tmp_cursor = 0;
+
+ while(1){
+ if(i1_cursor == items + start && i2_cursor == items + items2 + start )break;
+ int select_items_1 = (i2_cursor == start + items + items2) || (i1_cursor < items + start && SAM_pairer_sort_compare(arr, i1_cursor, i2_cursor) <= 0);
+ if(select_items_1){
+ tmp_name_list[tmp_cursor] = sort_data[0][i1_cursor];
+ tmp_bin_list[tmp_cursor ++] = sort_data[1][i1_cursor++];
+ }else{
+ tmp_name_list[tmp_cursor] = sort_data[0][i2_cursor];
+ tmp_bin_list[tmp_cursor ++] = sort_data[1][i2_cursor++];
+ }
+ }
+ assert(tmp_cursor == items + items2);
+
+ memcpy( sort_data[0] + start, tmp_name_list, sizeof(char *) * (items+items2) );
+ memcpy( sort_data[1] + start, tmp_bin_list, sizeof(char *) * (items+items2) );
+ free(tmp_name_list);
+ free(tmp_bin_list);
+
+}
+
+unsigned int SAM_pairer_osr_hash(char * st){
+ int x1 = 0, nch;
+ unsigned int ret = 0, ret2=0;
+ while((nch = st[x1++])!=0){
+ ret = (ret << 2) ^ nch;
+ ret2 = (ret << 3) ^ nch;
+ }
+ return (ret^ret2) % 39846617;
+}
+
+int SAM_pairer_osr_next_name(FILE * fp , char * name, int thread_no, int all_threads){
+ while(1){
+ if(feof(fp)) return 0;
+ int rlen =0;
+ fread(&rlen, 1, 2, fp);
+ if(rlen<1) return 0;
+ assert(rlen < 1024);
+
+ int rlen2 = fread(name, 1, rlen, fp);
+ if(rlen2 != rlen) return 0;
+ name[rlen]=0;
+ if( SAM_pairer_osr_hash(name)% all_threads == thread_no )
+ {
+ fseek(fp, -2-rlen, SEEK_CUR);
+ return 1;
+ }
+ fread(&rlen, 1, 2, fp);
+ assert(rlen < 65535);
+ rlen +=4;
+ fseek(fp, rlen, SEEK_CUR);
+ }
+ return 0;
+}
+
+void SAM_pairer_osr_next_bin(FILE * fp, char * bin){
+ int rlen =0;
+ fread(&rlen, 1, 2, fp);
+ assert(rlen < 1024);
+ fseek(fp, rlen, SEEK_CUR);
+ rlen =0;
+ fread(&rlen, 1, 2, fp);
+ assert(rlen < 65535);
+ rlen +=4;
+ fread(bin, 1, rlen, fp);
+}
+
+void * SAM_pairer_rescure_orphants(void * params){
+
+ void ** param_ptr = (void **) params;
+ SAM_pairer_context_t * pairer = param_ptr[0];
+ int thread_no = (int)(param_ptr[1]-NULL);
+ free(params);
+
+ int orphant_fp_size = 50, orphant_fp_no=0;
+ FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
+ int thno, bkno, x1;
+ char * bin_tmp1 , * bin_tmp2;
+
+ if(0 == thread_no && pairer -> display_progress)
+ SUBREADprintf("Finished scanning the input file. Processing unpaired reads.\n");
+
+ bin_tmp1 = malloc(66000);
+ bin_tmp2 = malloc(66000);
+
+ for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+ for( bkno = 0 ; ; bkno++){
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thno, bkno);
+
+ FILE * in_fp = fopen(tmp_fname, "rb");
+ if(NULL == in_fp) break;
+ if(orphant_fp_no >= orphant_fp_size){
+ orphant_fp_size *= 1.5;
+ orphant_fps = realloc(orphant_fps, orphant_fp_size * sizeof(FILE *));
+ }
+ orphant_fps[orphant_fp_no++]=in_fp;
+ }
+ }
+
+ int max_name_len = MAX_READ_NAME_LEN*2 +80;
+ char * names = malloc( orphant_fp_no * max_name_len );
+ memset(names, 0, orphant_fp_no * max_name_len );
+
+ for(x1 = 0 ; x1 < orphant_fp_no; x1++)
+ {
+ int has = SAM_pairer_osr_next_name( orphant_fps[x1] , names + max_name_len*x1 , thread_no , pairer-> total_threads);
+ if(!has) *(names + max_name_len*x1)=0;
+ }
+
+ unsigned long long rescured=0, died=0;
+
+ while(1){
+ int min_name_fileno = -1;
+ int min2_name_fileno = -1;
+
+ for(x1 = 0 ; x1 < orphant_fp_no; x1++){
+ int has = *(names + max_name_len*x1);
+ if(has){
+ int strcv_12 = 1;
+ if(min_name_fileno >=0) strcv_12 = strcmp(names+(min_name_fileno * max_name_len), names+(x1 * max_name_len));
+ if(strcv_12 > 0){
+ min_name_fileno = x1;
+ min2_name_fileno = -1;
+ }else if( strcv_12 == 0){
+ min2_name_fileno = x1;
+ }
+ }
+
+ }
+
+ if(min_name_fileno >= 0){
+ SAM_pairer_osr_next_bin( orphant_fps[ min_name_fileno ] , bin_tmp1);
+
+ if( min2_name_fileno >=0){
+ SAM_pairer_osr_next_bin( orphant_fps[ min2_name_fileno ] , bin_tmp2);
+ pairer -> output_function(pairer, thread_no, names + max_name_len*min_name_fileno , (char*) bin_tmp1, (char*)bin_tmp2);
+
+ int read_has = SAM_pairer_osr_next_name( orphant_fps[min2_name_fileno], names + max_name_len*min2_name_fileno, thread_no, pairer-> total_threads);
+ if(!read_has) *(names + max_name_len*min2_name_fileno)=0;
+ rescured++;
+ }else{
+ //SUBREADprintf("FINAL_ORPHAN:%s\n" , names + max_name_len*min_name_fileno);
+ pairer -> output_function(pairer, thread_no, names + max_name_len*min_name_fileno, (char*) bin_tmp1, NULL);
+ died++;
+ }
+
+ int read_has = SAM_pairer_osr_next_name( orphant_fps[min_name_fileno], names + max_name_len*min_name_fileno, thread_no, pairer-> total_threads);
+ if(!read_has) *(names + max_name_len*min_name_fileno)=0;
+ } else break;
+ }
+ free(names);
+
+ for(x1 = 0 ; x1 < orphant_fp_no; x1++)
+ {
+ fclose ( orphant_fps[x1] );
+ }
+ free( bin_tmp1 );
+ free( bin_tmp2 );
+ pairer -> total_orphan_reads += died;
+ //SUBREADprintf("RESCURE THREAD %d Rescured %llu, Died %llu\n", thread_no, rescured, died);
+ return NULL;
+}
+
+void SAM_pairer_update_orphant_table(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context){
+ unsigned int x2 = 0;
+ unsigned char ** name_list, ** bin_list;
+ //SUBREADprintf("ELES=%lu\n", thread_context->orphant_table->numOfElements);
+ name_list = malloc(sizeof(char*) * thread_context->orphant_table->numOfElements);
+ bin_list = malloc(sizeof(char*) * thread_context->orphant_table->numOfElements);
+
+ int x1;
+ for(x1 = 0; x1 < thread_context->orphant_table->numOfBuckets; x1 ++){
+ KeyValuePair *pair = thread_context->orphant_table->bucketArray[x1];
+ while (pair != NULL) {
+ KeyValuePair *nextPair = pair->next;
+ name_list [x2] = (unsigned char *)pair -> key;
+ bin_list [x2] = pair -> value;
+ x2++;
+ pair = nextPair;
+ }
+ }
+
+ assert(x2 == thread_context->orphant_table->numOfElements);
+ unsigned char ** sort_data[2];
+ sort_data[0]=name_list;
+ sort_data[1]=bin_list;
+ merge_sort(sort_data, thread_context->orphant_table->numOfElements, SAM_pairer_sort_compare, SAM_pairer_sort_exchange, SAM_pairer_sort_merge);
+
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thread_context -> thread_id, thread_context -> orphant_block_no++);
+ FILE * tmp_fp = fopen(tmp_fname, "wb");
+
+ for(x1 = 0; x1 < x2; x1 ++){
+ unsigned int bin_len;
+
+ memcpy(&bin_len, bin_list[x1] , 4);
+ int namelen = strlen((char *)name_list[x1]);
+
+ fwrite(&namelen,1,2,tmp_fp);
+ fwrite(name_list[x1], 1, namelen, tmp_fp);
+ fwrite(&bin_len,1,2,tmp_fp);
+ fwrite(bin_list[x1], 1, bin_len + 4, tmp_fp);
+
+ HashTableRemove(thread_context->orphant_table , name_list[x1]);
+ }
+ assert(thread_context -> orphant_table-> numOfElements == 0);
+ fclose(tmp_fp);
+ free(name_list);
+ free(bin_list);
+ thread_context -> orphant_space = 0;
+}
+
+
+int is_read_bin(char * bin, int bin_len, int max_refID){
+ int block_len;
+ memcpy(&block_len, bin, 4);
+ if(block_len > MAX_BIN_RECORD_LENGTH - 4 || block_len < 32) return -1;
+ if(block_len > bin_len - 4) return -2;
+ int refID, mate_refID;
+ memcpy(&refID, bin + 4, 4);
+ memcpy(&mate_refID, bin + 24, 4);
+ if(refID != -1 && (refID< 0 || refID >=max_refID)) return -3;
+ if(mate_refID != -1 && (mate_refID< 0 || mate_refID >=max_refID)) return -4;
+ int l_seq;
+ memcpy(&l_seq, bin + 20, 4);
+ if(l_seq > MAX_BIN_RECORD_LENGTH || l_seq < 0) return -5;
+
+ int min_mq_nl;
+ memcpy(&min_mq_nl, bin + 12, 4);
+ int name_len = min_mq_nl & 0xff;
+ int flag_nc;
+ memcpy(&flag_nc, bin + 16, 4);
+ int cigar_opts = flag_nc & 0xffff;
+ if(cigar_opts > 100) return -6;
+
+ int rname_cursor = 36;
+ if(bin[rname_cursor] == '@') return -7;
+ for(; rname_cursor< 36 + name_len - 1; rname_cursor ++){
+ int nch = bin[rname_cursor];
+ if(nch < 0x20 || nch > 0x80) return -9;
+ if(nch == '\t') return -8;
+ }
+
+ if(bin[rname_cursor]!=0)return -10;
+
+ if(block_len < 32 + name_len + 4*cigar_opts + l_seq + (l_seq+1)/2) return -11;
+
+
+ int cigar_i;
+ for(cigar_i = 0; cigar_i < cigar_opts ; cigar_i++){
+ int cigar_v;
+ memcpy(&cigar_v , bin + 36 + name_len + 4*cigar_i, 4);
+ int cigar_op = cigar_v & 0xf;
+ int cigar_value = cigar_v & 0xfffffff;
+ if(cigar_op > 8) return -12;
+ if((cigar_op == 0 || cigar_op == 1 || cigar_op > 4) && (cigar_value < 1 || cigar_value > MAX_BIN_RECORD_LENGTH)) return -13;
+ }
+
+ int ext_cursor = 36 + name_len + 4*cigar_opts + l_seq + (l_seq+1)/2;
+ if(ext_cursor > block_len + 4){
+ if(ext_cursor < block_len + 4 + 4) return -17;
+ if((!isalpha(bin[ext_cursor]))|| (!isalpha(bin[ext_cursor+1]))||!isalpha(bin[ext_cursor+2])){
+ // SUBREADprintf("TAGERR: %c%c%c\n", bin[ext_cursor], bin[ext_cursor+1], bin[ext_cursor+2]);
+ return -16;
+ }
+ }
+
+ if(bin_len > 4+block_len){
+ int next_block_len;
+
+ if(bin_len < 8+block_len) return -17;
+ memcpy(&next_block_len, bin + 4 + block_len, 4);
+
+ if(next_block_len > MAX_BIN_RECORD_LENGTH - 4 || next_block_len < 32) return -18;
+ if(next_block_len > bin_len - 4) return -19;
+ }
+
+ return 1;
+}
+
+int SAM_pairer_find_start(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context ){
+ thread_context -> need_find_start = 0;
+ if(FAST_PICARD_BAM_PROCESSING){
+ int start_pos = 0;
+ for(start_pos = 0; start_pos < min(MAX_BIN_RECORD_LENGTH, thread_context -> input_buff_BIN_used); start_pos++){
+ if(is_read_bin((char *)thread_context -> input_buff_BIN + start_pos, thread_context -> input_buff_SBAM_used - start_pos , pairer -> BAM_n_ref)){
+ break;
+ }
+ }
+ thread_context -> input_buff_BIN_ptr = start_pos;
+ SUBREADprintf("FOUND START : %d\n", start_pos);
+ return start_pos < min(MAX_BIN_RECORD_LENGTH, thread_context -> input_buff_BIN_used);
+ }else{
+ return is_read_bin((char *)thread_context -> input_buff_BIN , thread_context -> input_buff_SBAM_used , pairer -> BAM_n_ref);
+ }
+}
+
+#define PAIRER_WAIT_TICK_TIME 10000
+
+void * SAM_pairer_thread_run( void * params ){
+ void ** param_ptr = (void **) params;
+ SAM_pairer_context_t * pairer = param_ptr[0];
+ int thread_no = (int)(param_ptr[1]-NULL);
+ free(params);
+
+ SAM_pairer_thread_t * thread_context = pairer -> threads + thread_no;
+ int is_finished = 0;
+ while(1){
+ subread_lock_occupy(&pairer -> input_fp_lock);
+ if(pairer -> BAM_header_parsed || thread_no == 0){
+ SAM_pairer_fill_BIN_buff(pairer, thread_context, &is_finished);
+ thread_context -> need_find_start = pairer -> BAM_header_parsed;
+ }
+ subread_lock_release(&pairer -> input_fp_lock);
+
+ if(!pairer -> BAM_header_parsed && thread_no > 0) {
+ usleep(PAIRER_WAIT_TICK_TIME);
+ } else if(thread_context -> input_buff_SBAM_used>0) {
+ unsigned int processed_reads = 0;
+ while(1){
+ int has_no_more = SAM_pairer_do_next_read(pairer, thread_context);
+ if(has_no_more)break;
+ processed_reads++;
+ }
+
+ pairer -> total_input_reads += processed_reads;
+ }
+ if(pairer -> is_bad_format) break;
+
+ if(thread_context -> immediate_last_read_full_name[0]){
+ SAM_pairer_do_read_test(pairer , thread_context , thread_context -> immediate_last_read_name_len , thread_context -> immediate_last_read_full_name , thread_context -> immediate_last_read_bin_len , thread_context -> immediate_last_read_bin);
+ thread_context -> immediate_last_read_full_name[0] = 0;
+ }
+
+ if(thread_context -> orphant_space > pairer -> input_buff_SBAM_size)
+ SAM_pairer_update_orphant_table(pairer, thread_context);
+
+ if(is_finished){
+ pairer -> BAM_header_parsed = 1;
+ break;
+ }
+ }
+
+ if(thread_context -> orphant_table -> numOfElements > 0)
+ SAM_pairer_update_orphant_table(pairer, thread_context);
+
+ return NULL;
+}
+
+
+// not only run, but also finalise.
+// It returns 0 if no error.
+int SAM_pairer_run_once( SAM_pairer_context_t * pairer){
+ int x1;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ // this 16-byte memory block is freed in the thread worker.
+ void ** init_params = malloc(sizeof(void *) * 2);
+
+ init_params[0] = pairer;
+ init_params[1] = (void *)(NULL+x1);
+ pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_thread_run, init_params);
+ }
+
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ pthread_join(pairer -> threads[x1].thread_stab, NULL);
+ }
+
+ if(0 == pairer -> is_bad_format){
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ // this 16-byte memory block is freed in the thread worker.
+
+ void ** init_params = malloc(sizeof(void *) * 2);
+
+ init_params[0] = pairer;
+ init_params[1] = (void *)(NULL+x1);
+ pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_rescure_orphants, init_params);
+ }
+
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ pthread_join(pairer -> threads[x1].thread_stab, NULL);
+ }
+ }
+
+ return 0;
+}
+
+int fix_load_next_block(FILE * in, char * binbuf, z_stream * strm){
+ char * bam_buf = malloc(70000);
+ int x1, ret = 0;
+ x1 = fgetc(in);
+ if(x1 != 31) ret = -1;
+ x1 = fgetc(in);
+ if(x1 != 139) ret = -1;
+ x1 = fgetc(in);
+ if(x1 != 8) ret = -1;
+ x1 = fgetc(in);
+ if(x1 != 4) ret = -1;
+ if(ret == 0){
+ x1 = fgetc(in);
+ x1 = fgetc(in);
+ x1 = fgetc(in);
+ x1 = fgetc(in);
+
+ x1 = fgetc(in);//XFL
+
+ x1 = fgetc(in);//OS
+ int xlen;
+ xlen = fgetc(in);
+ xlen += fgetc(in) * 256;
+ int bsize = -1, xlen_ptr = 0;
+
+ while(xlen_ptr < xlen){
+ int si1 = fgetc(in);
+ int si2 = fgetc(in);
+ int slen = fgetc(in);
+ slen += fgetc(in) * 256;
+ if(si1 == 66 && si2==67){
+ bsize = fgetc(in);
+ bsize += 256*fgetc(in);
+ }else{
+ fseek(in , slen, SEEK_CUR);
+ }
+ xlen_ptr += 4 + slen;
+ }
+ if(bsize > 0){
+ fread(bam_buf, 1, bsize - xlen - 19, in);
+ }
+ fseek(in, 8, SEEK_CUR);
+
+ strm -> avail_in = bsize - xlen - 19;
+ strm -> next_in = (unsigned char*)bam_buf;
+ strm -> avail_out = 70000;
+ strm -> next_out = (unsigned char*)binbuf;
+ int ret_inf = inflate(strm, Z_FINISH);
+ if(ret_inf == Z_STREAM_END){
+ ret = 70000 - strm -> avail_out;
+ // SUBREADprintf("FIX_DECOM: %d -> %d\n", bsize - xlen - 19, ret);
+ }else{
+ SUBREADprintf("FIX_DECOM_ERR:%d\n" , ret_inf);
+ ret = -1;
+ }
+ inflateReset(strm);
+ }
+ free(bam_buf);
+ return ret;
+}
+
+void fix_write_block(FILE * out, char * bin, int binlen, z_stream * strm){
+ char * bam_buf = malloc(70000);
+ int x1, bam_len = 0, retbam;
+
+ if(binlen > 0){
+ strm -> avail_in = binlen;
+ strm -> next_in = (unsigned char*)bin;
+ strm -> avail_out = 70000;
+ strm -> next_out = (unsigned char*)bam_buf;
+ retbam = deflate(strm , Z_FINISH);
+ bam_len = 70000 - strm -> avail_out;
+ deflateReset(strm);
+ }else{
+ z_stream nstrm;
+ nstrm.zalloc = Z_NULL;
+ nstrm.zfree = Z_NULL;
+ nstrm.opaque = Z_NULL;
+ nstrm.avail_in = 0;
+ nstrm.next_in = Z_NULL;
+
+ deflateInit2(&nstrm, SAMBAM_COMPRESS_LEVEL, Z_DEFLATED,
+ PAIRER_GZIP_WINDOW_BITS, PAIRER_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+
+ nstrm.avail_in = 0;
+ nstrm.next_in = (unsigned char*)bin;
+ nstrm.avail_out = 70000;
+ nstrm.next_out = (unsigned char*)bam_buf;
+ retbam = deflate(&nstrm, Z_FINISH);
+ bam_len = 70000 - nstrm.avail_out;
+ deflateEnd(&nstrm);
+ }
+
+ //SUBREADprintf("FIX_COMPR: %d -> %d RET=%d\n", binlen , bam_len, retbam);
+
+ unsigned int crc0 = crc32(0, NULL, 0);
+ unsigned int crc = crc32(crc0, (unsigned char *) bin , binlen);
+
+ fputc(31, out);
+ fputc(139, out);
+ fputc(8, out);
+ fputc(4, out);
+ fputc(0, out);
+ fputc(0, out);
+ fputc(0, out);
+ fputc(0, out);
+
+ fputc(0, out);//XFL
+ fputc(0xff, out);//OS
+
+ x1 = 6;
+ fwrite( &x1, 2, 1 , out );
+ fputc( 66, out );
+ fputc( 67, out );
+ x1 = 2;
+ fwrite( &x1, 2, 1 , out );
+ x1 = bam_len + 19 + 6;
+ fwrite( &x1, 2, 1 , out );
+ fwrite( bam_buf , 1,bam_len, out );
+
+ fwrite( &crc, 4, 1, out );
+ fwrite( &binlen, 4, 1, out );
+
+ free(bam_buf);
+}
+
+#define FIX_GET_NEXT_NCH { while(in_bin_ptr == in_bin_size){ \
+ in_bin_ptr = 0; in_bin_size = 0;\
+ int newsize = fix_load_next_block(old_fp, in_bin, &in_strm);\
+ if(newsize < 0){ break;}else{in_bin_size = newsize;}\
+} if(in_bin_size>0){nch = in_bin[in_bin_ptr++]; if(nch < 0)nch += 256; } else nch = -1; }
+
+#define FIX_FLASH_OUT { if(out_bin_ptr > 0) fix_write_block(new_fp, out_bin, out_bin_ptr, &out_strm); out_bin_ptr = 0; }
+
+#define FIX_APPEND_OUT(p, c) { if(out_bin_ptr > 60000){FIX_FLASH_OUT} ; memcpy(out_bin + out_bin_ptr, p, c); out_bin_ptr +=c ; }
+#define FIX_APPEND_READ(p, c){ memcpy(out_bin + out_bin_ptr, p, c); out_bin_ptr +=c ; }
+
+void SAM_pairer_fix_format(SAM_pairer_context_t * pairer){
+ FILE * old_fp = pairer -> input_fp;
+ fseek(old_fp, 0, SEEK_SET);
+ char tmpfname [300];
+
+ sprintf(tmpfname, "%s.fixbam", pairer -> tmp_file_prefix);
+
+ FILE * new_fp = f_subr_open(tmpfname, "wb");
+ char * in_bin = malloc(140000);
+ char * out_bin = malloc(70000);
+
+ z_stream in_strm;
+ z_stream out_strm;
+ in_strm.zalloc = Z_NULL;
+ in_strm.zfree = Z_NULL;
+ in_strm.opaque = Z_NULL;
+ in_strm.avail_in = 0;
+ in_strm.next_in = Z_NULL;
+
+ inflateInit2(&in_strm, PAIRER_GZIP_WINDOW_BITS);
+
+ out_strm.zalloc = Z_NULL;
+ out_strm.zfree = Z_NULL;
+ out_strm.opaque = Z_NULL;
+ out_strm.avail_in = 0;
+ out_strm.next_in = Z_NULL;
+
+ deflateInit2(&out_strm, Z_NO_COMPRESSION, Z_DEFLATED,
+ PAIRER_GZIP_WINDOW_BITS, PAIRER_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+
+ int in_bin_ptr = 0;
+ int out_bin_ptr = 0;
+ int in_bin_size = 0;
+ int content_count = 0;
+ int content_size = 0;
+ int x1, nch = 0;
+
+ for(x1 = 0; x1 < 4; x1++){
+ FIX_GET_NEXT_NCH; // BAM1
+ FIX_APPEND_OUT(&nch, 1);
+ }
+
+
+ // ====== The header texts
+ content_size = 0;
+ for(x1 = 0; x1 < 4; x1++){
+ FIX_GET_NEXT_NCH;
+ // SUBREADprintf("FIX: TLEN: %d\n", nch);
+ content_size += (nch << (8 * x1));
+ }
+ FIX_APPEND_OUT(&content_size, 4);
+ //SUBREADprintf("FIX: TXTLEN=%d\n", content_size);
+ for(content_count = 0; content_count < content_size; content_count++){
+ FIX_GET_NEXT_NCH;
+ FIX_APPEND_OUT(&nch, 1);
+ // fputc(nch, stderr);
+ }
+ FIX_FLASH_OUT;
+
+ // ====== The chromosome table
+ content_size = 0;
+ for(x1 = 0; x1 < 4; x1++){
+ FIX_GET_NEXT_NCH;
+ content_size += (nch << (8 * x1));
+ }
+ FIX_APPEND_OUT(&content_size, 4);
+ //SUBREADprintf("FIX: CHROLEN=%d\n", content_size);
+ for(content_count = 0; content_count < content_size; content_count++){
+ int namelen = 0;
+ for(x1 = 0; x1 < 4; x1++){
+ FIX_GET_NEXT_NCH;
+ namelen+= (nch << (8 * x1));
+ }
+ FIX_APPEND_READ(&namelen, 4);
+ for(x1 = 0; x1 < namelen + 4; x1++){ // inc. length
+ FIX_GET_NEXT_NCH;
+ FIX_APPEND_READ(&nch, 1);
+ }
+
+ if(out_bin_ptr > 60000){
+ FIX_FLASH_OUT;
+ }
+ }
+ FIX_FLASH_OUT;
+
+ // ===== The reads
+ unsigned long long reads =0;
+ while(1){
+ int block_size = 0;
+ FIX_GET_NEXT_NCH;
+ if(nch<0) break;
+ FIX_APPEND_READ(&nch, 1);
+ block_size = nch;
+ for(x1 = 1; x1 < 4; x1++){
+ FIX_GET_NEXT_NCH;
+ block_size += (nch << (8 * x1));
+ FIX_APPEND_READ(&nch, 1);
+ }
+ for(x1 = 0; x1 < block_size; x1++){
+ FIX_GET_NEXT_NCH;
+ FIX_APPEND_READ(&nch, 1);
+ }
+ reads ++;
+ if(out_bin_ptr > 60000){
+ FIX_FLASH_OUT;
+ }
+ }
+ FIX_FLASH_OUT;
+ //SUBREADprintf("FIX READS=%llu\n", reads);
+ fix_write_block(new_fp, out_bin, 0, &out_strm);
+ deflateEnd(&out_strm);
+ inflateEnd(&in_strm);
+
+ fclose(old_fp);
+ fclose(new_fp);
+
+ pairer -> input_fp = f_subr_open(tmpfname, "rb");
+ free(in_bin);
+ free(out_bin);
+}
+
+
+
+unsigned int nosort_tick_time = 100;
+#define NOSORT_SBAM_BUFF_SIZE 66000
+#define NOSORT_BIN_BUFF_SIZE (2*70000)
+
+
+void * SAM_nosort_thread_run( void * params ){
+ void ** param_ptr = (void **) params;
+ SAM_pairer_context_t * pairer = param_ptr[0];
+ int thread_no = (int)(param_ptr[1]-NULL);
+ free(params);
+
+ SAM_pairer_thread_t * thread_context = pairer -> threads + thread_no;
+
+ char * read_ptr_1 = (char *)thread_context -> input_buff_BIN;
+ char * read_ptr_2 = (char *)thread_context -> input_buff_BIN + NOSORT_BIN_BUFF_SIZE / 2;
+
+ while(1){
+ int has_found = 0, to_quit = 0;
+ subread_lock_occupy(&thread_context -> SBAM_lock);
+
+ // SUBREADprintf("CONSUME:RINS=%d, PTR=%d\n", thread_context -> reads_in_SBAM, thread_context -> input_buff_BIN_ptr );
+
+ if(thread_context -> reads_in_SBAM > 1){
+ if(pairer -> input_is_BAM){
+ int record_len;
+ // SUBREADprintf("LOAD BY THREAD %d:", thread_no);
+ memcpy(&record_len, thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr, 4);
+ // SUBREADprintf("RLEN=%d\n", record_len);
+ assert(record_len > 32 &&record_len < 65000);
+ memcpy(read_ptr_1 , thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr, 4 + record_len);
+ thread_context -> input_buff_SBAM_ptr += record_len + 4;
+
+ memcpy(&record_len, thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr, 4);
+ assert(record_len > 32 &&record_len < 65000);
+ memcpy(read_ptr_2 , thread_context -> input_buff_SBAM + thread_context -> input_buff_SBAM_ptr, 4 + record_len);
+ thread_context -> input_buff_SBAM_ptr += record_len + 4;
+ has_found = 1;
+ thread_context -> reads_in_SBAM -= 2;
+ }else{
+ thread_context -> input_buff_BIN_ptr = 0;
+ int rret = reduce_SAM_to_BAM(pairer, thread_context);
+ thread_context -> reads_in_SBAM -- ;
+ if(rret > 0){
+ thread_context -> input_buff_BIN_ptr = NOSORT_BIN_BUFF_SIZE/2;
+ rret = reduce_SAM_to_BAM(pairer, thread_context);
+ thread_context -> reads_in_SBAM -- ;
+ if(rret > 0){
+ has_found = 1;
+ }
+ }
+ }
+ }
+ if(pairer -> is_finished) to_quit = 1;
+ subread_lock_release(&thread_context -> SBAM_lock);
+
+ if(has_found)
+ pairer -> output_function(pairer, thread_no, NULL, (char*) read_ptr_1,(char*) read_ptr_2);
+ else{
+ if(to_quit) break;
+ usleep(nosort_tick_time);
+ }
+ }
+
+ return NULL;
+}
+
+int SAM_nosort_decompress_next_block(SAM_pairer_context_t * pairer){
+ int SBAM_used;
+ unsigned int decompressed_len;
+
+ char * SBAM_buff = pairer -> appendix2;
+ char * BIN_buff = pairer -> appendix3;
+ int * BIN_buff_used = pairer -> appendix4;
+ int * BIN_buff_ptr = pairer -> appendix5;
+
+ SBAM_used = PBam_get_next_zchunk(pairer -> input_fp, SBAM_buff, NOSORT_SBAM_BUFF_SIZE, &decompressed_len);
+ if(SBAM_used<0) return -1;
+
+ //SUBREADprintf("PRE-LOAD BAM: USED %d, PTR %d\n", * BIN_buff_used , * BIN_buff_ptr);
+ if((* BIN_buff_ptr) < (* BIN_buff_used)){
+ int diff = (* BIN_buff_used) - (* BIN_buff_ptr);
+ int x1;
+ for(x1 = 0; x1 < diff; x1++){
+ BIN_buff[x1] = BIN_buff[x1 + (* BIN_buff_ptr)];
+ }
+ (* BIN_buff_used) = diff;
+ } else (* BIN_buff_used) = 0;
+ (* BIN_buff_ptr) = 0;
+
+ int binlen = SamBam_unzip(BIN_buff + (* BIN_buff_used), SBAM_buff , SBAM_used);
+ //assert(binlen == decompressed_len);
+ if(binlen < 0) return -1;
+ (* BIN_buff_used) += binlen;
+ return binlen;
+}
+
+#define NOSORT_BAM_next_nch { while( BIN_buff_used == BIN_buff_ptr ){int rlen = SAM_nosort_decompress_next_block(pairer); if(rlen < 0) { BIN_buff_used = -1 ; break;}} if(BIN_buff_used < 0) nch = -1; else nch = BIN_buff[BIN_buff_ptr++]; }
+#define NOSORT_BAM_next_u32(v){ NOSORT_BAM_next_nch; if(nch < 0)v=-1;else{; v= nch; NOSORT_BAM_next_nch; v+=nch*256; NOSORT_BAM_next_nch; v+=nch*65536; NOSORT_BAM_next_nch; v+=nch*16777216;} }
+
+#define NOSORT_SAM_next_line {NOSORT_SAM_eof = fgets(line_ptr, NOSORT_SBAM_BUFF_SIZE, pairer -> input_fp);}
+
+#define NOSORT_REFILL_LOWBAR 6000
+#define NOSORT_REFILL_HIGHBAR 18000
+
+void SAM_nosort_run_once(SAM_pairer_context_t * pairer){
+ int x1;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ // this 16-byte memory block is freed in the thread worker.
+ void ** init_params = malloc(sizeof(void *) * 2);
+
+ init_params[0] = pairer;
+ init_params[1] = (void *)(NULL+x1);
+ pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_nosort_thread_run, init_params);
+ }
+
+ char * SBAM_buff = malloc(NOSORT_SBAM_BUFF_SIZE);
+ int nch;
+ unsigned char * BIN_buff = malloc(NOSORT_BIN_BUFF_SIZE);
+ char *NOSORT_SAM_eof=NULL;
+ int BIN_buff_used = 0;
+ int BIN_buff_ptr = 0;
+
+ pairer -> appendix2 = SBAM_buff;
+ pairer -> appendix3 = BIN_buff;
+ pairer -> appendix4 = &BIN_buff_used;
+ pairer -> appendix5 = &BIN_buff_ptr;
+
+ if(pairer -> input_is_BAM){
+ int x1;
+ unsigned int bam_signature;
+ NOSORT_BAM_next_u32(bam_signature);
+ NOSORT_BAM_next_u32(pairer -> BAM_l_text);
+ char * header_txt = malloc(pairer->BAM_l_text);
+
+ for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
+ NOSORT_BAM_next_nch;
+ header_txt [x1] = nch;
+ }
+
+ pairer -> output_header(pairer, 0, 1, pairer -> BAM_l_text , header_txt , pairer -> BAM_l_text );
+ NOSORT_BAM_next_u32(pairer -> BAM_n_ref);
+ unsigned int ref_bin_len = 0;
+ for(x1 = 0; x1 < pairer -> BAM_n_ref; x1++) {
+ unsigned int l_name, l_ref, x2;
+ char ref_name[MAX_CHROMOSOME_NAME_LEN];
+ NOSORT_BAM_next_u32(l_name);
+ assert(l_name < 256);
+ memcpy(header_txt + ref_bin_len, &l_name, 4);
+ ref_bin_len += 4;
+ for(x2 = 0; x2 < l_name; x2++){
+ NOSORT_BAM_next_nch;
+ header_txt[ref_bin_len++] = nch;
+ ref_name[x2]=nch;
+ }
+ NOSORT_BAM_next_u32(l_ref);
+ memcpy(header_txt + ref_bin_len, &l_ref, 4);
+ ref_bin_len += 4;
+
+ assert(ref_bin_len < pairer -> BAM_l_text);
+ }
+
+ pairer -> output_header(pairer, 0, 0, pairer -> BAM_n_ref , header_txt , ref_bin_len );
+ free(header_txt);
+
+ while(1){
+ if(pairer -> is_finished) break;
+ int need_sleep = 1;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ if(pairer -> is_finished) break;
+ SAM_pairer_thread_t * this_thread = pairer -> threads + x1;
+ if(this_thread -> reads_in_SBAM < NOSORT_REFILL_LOWBAR && (this_thread -> input_buff_SBAM_used == 0 || this_thread -> input_buff_SBAM_ptr > 0)){
+ subread_lock_occupy(&this_thread -> SBAM_lock);
+ int to_be_add = NOSORT_REFILL_HIGHBAR - this_thread -> reads_in_SBAM;
+
+ int x2, x3;
+ if(this_thread -> input_buff_SBAM_ptr < this_thread -> input_buff_SBAM_used){
+ for(x2 = 0; x2 < this_thread -> input_buff_SBAM_used - this_thread -> input_buff_SBAM_ptr; x2++)
+ this_thread -> input_buff_SBAM[x2] = this_thread -> input_buff_SBAM[x2 + this_thread -> input_buff_SBAM_ptr];
+ this_thread -> input_buff_SBAM_used -= this_thread -> input_buff_SBAM_ptr;
+ }else this_thread -> input_buff_SBAM_used =0;
+
+ this_thread -> input_buff_SBAM_ptr = 0;
+ for(x2 = 0 ; x2 < to_be_add ; x2++){
+ int record_len;
+ NOSORT_BAM_next_u32(record_len);
+ if(record_len < 32 || record_len > 65000){
+ pairer -> is_finished = 1;
+ break;
+ }
+
+ memcpy(this_thread -> input_buff_SBAM + this_thread -> input_buff_SBAM_used , &record_len, 4);
+ this_thread -> input_buff_SBAM_used += 4;
+ for(x3 =0; x3 < record_len; x3++){
+ NOSORT_BAM_next_nch;
+ this_thread -> input_buff_SBAM[this_thread -> input_buff_SBAM_used++] = nch;
+ }
+ this_thread -> reads_in_SBAM ++;
+ }
+ need_sleep = 0;
+ subread_lock_release(&this_thread -> SBAM_lock);
+ }
+ }
+ if(need_sleep) usleep(nosort_tick_time);
+ }
+ }else{
+ char * line_ptr = SBAM_buff;
+ char * header_start = NULL;
+ int passed_read_SBAM_ptr = -1;
+ unsigned int header_buffer_safe_size = 0;
+ while(1){
+ passed_read_SBAM_ptr = ftello(pairer -> input_fp);
+ NOSORT_SAM_next_line;
+ if(NOSORT_SAM_eof == NULL)break;
+
+ header_buffer_safe_size += strlen(line_ptr);
+ if(NULL== header_start && line_ptr[0] == '@') header_start = line_ptr;
+
+ if(NULL == line_ptr){
+ SUBREADprintf("FATAL: the header is too large to the buffer!\n");
+ break;
+ }else{
+ //SUBREADprintf("LINELEN=%d, PTR=%d, FIRST=%c\n", line_len, thread_context -> input_buff_SBAM_ptr , line_ptr[0]);
+ }
+ if(line_ptr[0]!='@'){
+ break;
+ }
+ }
+
+ fseek(pairer -> input_fp, 0 , SEEK_SET);
+ int header_bin_ptr = 0, header_contigs = 0;
+ char * header_bin = malloc(header_buffer_safe_size);
+
+
+ while(1){
+ NOSORT_SAM_next_line;
+ if(NOSORT_SAM_eof == NULL)break;
+ if(line_ptr[0]!='@') break;
+ if(memcmp(line_ptr, "@SQ\t",4)==0){
+ unsigned int ct_len = 0, ctptr = 4, status = 0, sqname_len = 0;
+ char * sqname = NULL;
+ while(1){
+ char ctnch = line_ptr[ctptr++];
+ if( status == 0){
+ if(ctnch=='S' && line_ptr[ctptr] == 'N' && line_ptr[ctptr+1] == ':'){
+ ctptr += 2;
+ status = 10;
+ sqname = line_ptr + ctptr;
+ }else if(ctnch=='L' && line_ptr[ctptr] == 'N' && line_ptr[ctptr+1] == ':'){
+ ctptr += 2;
+ status = 20;
+ }else status = 30;
+ }else if(status == 10 || status == 20 || status == 30){
+ if(ctnch == '\t' || ctnch == '\n'){
+ status = 0;
+ if(ctnch == '\n') break;
+ //break;
+ }
+ if(status == 10) sqname_len ++;
+ else if(status == 20) ct_len = ct_len * 10 + ctnch - '0';
+ }
+ }
+
+
+ sqname_len += 1;
+ memcpy(header_bin + header_bin_ptr, &sqname_len, 4);
+ header_bin_ptr += 4;
+ memcpy(header_bin + header_bin_ptr, sqname, sqname_len-1);
+ *(header_bin + header_bin_ptr + sqname_len - 1) = 0;
+ char * mem_contig_name = malloc(sqname_len);
+ strcpy(mem_contig_name , header_bin + header_bin_ptr);
+ // SUBREADprintf("CONTIG %d : %s (len=%d = %d)\n", header_contigs, header_bin + header_bin_ptr , sqname_len, strlen(mem_contig_name));
+ HashTablePut(pairer -> sam_contig_number_table , mem_contig_name, NULL + 1 + header_contigs);
+ header_bin_ptr += sqname_len;
+
+ memcpy(header_bin + header_bin_ptr, &ct_len, 4);
+ header_bin_ptr += 4;
+ header_contigs++;
+ }
+ }
+
+ pairer -> BAM_header_parsed = 1;
+ pairer -> output_header(pairer, 0, 0, header_contigs , header_bin , header_bin_ptr);
+ free(header_bin);
+
+ fseek(pairer -> input_fp, passed_read_SBAM_ptr, SEEK_SET);
+
+ line_ptr = SBAM_buff;
+
+ while(1){
+ if(pairer -> is_finished) break;
+ int need_sleep = 1;
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ if(pairer -> is_finished) break;
+ SAM_pairer_thread_t * this_thread = pairer -> threads + x1;
+ if(this_thread -> reads_in_SBAM < NOSORT_REFILL_LOWBAR && (this_thread -> input_buff_SBAM_used == 0 || this_thread -> input_buff_SBAM_ptr > 0)){
+ subread_lock_occupy(&this_thread -> SBAM_lock);
+ int to_be_add = NOSORT_REFILL_HIGHBAR - this_thread -> reads_in_SBAM;
+
+ int x2;
+ if(this_thread -> input_buff_SBAM_ptr < this_thread -> input_buff_SBAM_used){
+ for(x2 = 0; x2 < this_thread -> input_buff_SBAM_used - this_thread -> input_buff_SBAM_ptr; x2++)
+ this_thread -> input_buff_SBAM[x2] = this_thread -> input_buff_SBAM[x2 + this_thread -> input_buff_SBAM_ptr];
+ this_thread -> input_buff_SBAM_used -= this_thread -> input_buff_SBAM_ptr;
+ }else this_thread -> input_buff_SBAM_used =0;
+
+ this_thread -> input_buff_SBAM_ptr = 0;
+ for(x2 = 0 ; x2 < to_be_add ; x2++){
+ int record_len;
+ NOSORT_SAM_next_line;
+
+ if(NULL==NOSORT_SAM_eof || line_ptr[0]==0){
+ pairer -> is_finished = 1;
+ break;
+ }
+
+ record_len = strlen(line_ptr);
+ // SUBREADprintf("1CHR=%c, ECHR=%d , RL=%d, RINS=%d, USED=%d\n", line_ptr[0], line_ptr[record_len - 1], record_len, this_thread -> reads_in_SBAM, this_thread -> input_buff_SBAM_used);
+ memcpy(this_thread -> input_buff_SBAM + this_thread -> input_buff_SBAM_used , line_ptr, record_len);
+ this_thread -> input_buff_SBAM_used += record_len;
+ this_thread -> reads_in_SBAM ++;
+ }
+ need_sleep = 0;
+ subread_lock_release(&this_thread -> SBAM_lock);
+ }
+ }
+ if(need_sleep) usleep(nosort_tick_time);
+ }
+ }
+
+ free(SBAM_buff);
+ free(BIN_buff);
+
+
+ for(x1 = 0; x1 < pairer -> total_threads ; x1++){
+ pthread_join(pairer -> threads[x1].thread_stab, NULL);
+ }
+}
+
+int SAM_pairer_run( SAM_pairer_context_t * pairer){
+ int corrected_run;
+
+ if(pairer -> force_do_not_sort){
+ SAM_nosort_run_once(pairer);
+ }else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){
+ SAM_pairer_run_once(pairer);
+ if(pairer -> is_bad_format){
+ assert(0 == corrected_run);
+ if(pairer -> display_progress)
+ SUBREADprintf("Retrying with the corrected format...\n");
+ delete_with_prefix(pairer -> tmp_file_prefix);
+ SAM_pairer_fix_format(pairer);
+ SAM_pairer_reset(pairer);
+ pairer -> reset_output_function(pairer);
+ pairer -> is_bad_format = 0;
+ }else break;
+ }
+
+ return 0;
+}
+
+int sort_SAM_create(SAM_sort_writer * writer, char * output_file, char * tmp_path)
+{
+ char tmp_fname[MAX_FILE_NAME_LENGTH+40];
+ memset(writer, 0, sizeof(SAM_sort_writer));
+
+ old_sig_TERM = signal (SIGTERM, SAM_SORT_SIGINT_hook);
+ old_sig_INT = signal (SIGINT, SAM_SORT_SIGINT_hook);
+
+ sprintf(writer -> tmp_path, "%s/temp-sort-%06u-%08X-", tmp_path, getpid(), rand());
+ _SAMSORT_SNP_delete_temp_prefix = writer -> tmp_path;
+
+ sprintf(tmp_fname, "%s%s", writer -> tmp_path, "headers.txt");
+ writer -> all_chunks_header_fp = f_subr_open(tmp_fname,"w");
+ if(!writer -> all_chunks_header_fp) return -1;
+ fclose(writer -> all_chunks_header_fp);
+ unlink(tmp_fname);
+
+ writer -> out_fp = f_subr_open(output_file,"w");
+ if(!writer -> out_fp) return -1;
+
+ return 0;
+}
+
+void find_tag_out(char * read_line_buf, char * tag, char * hi_tag_out)
+{
+ int hi_tag = -1;
+ char tag_str[10];
+ sprintf(tag_str , "\t%s:i:", tag);
+ char * hi_tag_str = strstr(read_line_buf, tag_str);
+ if(hi_tag_str)
+ {
+
+
+ hi_tag = 0;
+ int line_cursor;
+ for(line_cursor=6; ; line_cursor++)
+ {
+ char nch = hi_tag_str[line_cursor];
+// printf("HI:i=%s; nch [%d] ='%c'\n", hi_tag_str, line_cursor, nch);
+ if(!isdigit(nch)) break;
+ hi_tag = hi_tag*10 + (nch-'0');
+ }
+ }
+
+ if(hi_tag >=0)
+ {
+ sprintf(hi_tag_out,"\t%s:i:%d", tag, hi_tag);
+ }else hi_tag_out[0] = 0;
+
+
+}
+
+void sort_SAM_finalise(SAM_sort_writer * writer)
+{
+ int x1_chunk, x1_block;
+ int xk1;
+ for(xk1=0;xk1<SAM_SORT_BLOCKS;xk1++)
+ {
+ if(writer -> current_block_fp_array[xk1])
+ fclose(writer -> current_block_fp_array[xk1]);
+ }
+ memset(writer -> current_block_fp_array, 0, sizeof(FILE *)*SAM_SORT_BLOCKS);
+ writer -> current_chunk_size = 0;
+ writer -> current_chunk++;
+
+ for(x1_block = 0; x1_block <SAM_SORT_BLOCKS; x1_block++){
+ HashTable * first_read_name_table;
+ first_read_name_table = HashTableCreate(SAM_SORT_BLOCK_SIZE / 100 );
+ HashTableSetKeyComparisonFunction(first_read_name_table , fc_strcmp_chro);
+ HashTableSetDeallocationFunctions(first_read_name_table , free, free);
+ HashTableSetHashFunction(first_read_name_table, HashTableStringHashFunction);
+
+ for(x1_chunk = 0; x1_chunk < writer -> current_chunk; x1_chunk++)
+ {
+ char tmpfname[MAX_FILE_NAME_LENGTH+40];
+ sprintf(tmpfname, "%sCHK%08d-BLK%03d.bin", writer -> tmp_path, x1_chunk , x1_block);
+
+ FILE * bbfp = f_subr_open(tmpfname,"rb");
+ if(!bbfp) continue;
+
+ while(!feof(bbfp))
+ {
+ char * read_name = NULL;
+ short flags;
+ short read_name_len;
+ short read_len;
+ int ret = fread(&flags, 2,1 , bbfp);
+ if(ret<1) break;
+ fread(&read_name_len, 2,1 , bbfp);
+ if(flags & SAM_FLAG_SECOND_READ_IN_PAIR)
+ fseek(bbfp, read_name_len, SEEK_CUR);
+ else
+ {
+ read_name = malloc(read_name_len+1);
+ fread(read_name, 1, read_name_len, bbfp);
+ read_name[read_name_len] = 0;
+ }
+ fread(&read_len,2,1,bbfp);
+ if(flags & SAM_FLAG_SECOND_READ_IN_PAIR)
+ fseek(bbfp, read_len, SEEK_CUR);
+ else
+ {
+ char * new_line_mem = malloc(read_len+1);
+ fread(new_line_mem, 1, read_len, bbfp);
+ new_line_mem[read_len] = 0;
+
+ if(read_len<2)
+ {
+ SUBREADprintf("Cannot determain read length from the tmp file!\n");
+ assert(0);
+ }
+
+
+ if( new_line_mem[0]==0 || new_line_mem[1]==0)
+ {
+ SUBREADprintf("Cannot load read part from the tmp file!\n");
+ assert(0);
+ }
+
+
+ char * old_line_mem = HashTableGet(first_read_name_table, read_name);
+ if(old_line_mem)
+ old_line_mem[0]=0xff;
+ else
+ HashTablePut(first_read_name_table, read_name, new_line_mem);
+ //if( first_read_name_table -> numOfElements<4)printf("RV=%s\n", read_name);
+ }
+ }
+
+ fclose(bbfp);
+ }
+
+ //printf("BLK=%d; CKS=%d; READS=%llu\n", x1_block, x1_chunk, first_read_name_table -> numOfElements);
+ unsigned long long int finished_second_reads = 0;
+
+ for(x1_chunk = 0; x1_chunk < writer -> current_chunk; x1_chunk++)
+ {
+ char tmpfname[MAX_FILE_NAME_LENGTH+40];
+ sprintf(tmpfname, "%sCHK%08d-BLK%03d.bin", writer -> tmp_path, x1_chunk , x1_block);
// printf("START_BLOCK: %s\n", tmpfname);
@@ -2528,6 +4775,8 @@ int sort_SAM_add_line(SAM_sort_writer * writer, char * SAM_line, int line_len)
}
if(tabs <= 7) return -1;
+ //if(memcmp("V0112_0155:7:1101:4561:132881", read_name, 27)==0)
+
char * hi_tag_str = strstr(SAM_line,"\tHI:i:");
if(hi_tag_str)
{
@@ -2569,7 +4818,7 @@ int sort_SAM_add_line(SAM_sort_writer * writer, char * SAM_line, int line_len)
}
char hi_key [13];
- if(hi_tag >=0 && pos_1 && pos_2)
+ if(hi_tag >=0)// && pos_1 && pos_2)
sprintf(hi_key, ":%d", hi_tag);
else
hi_key[0]=0;
@@ -2668,7 +4917,6 @@ int is_SAM_unsorted(char * SAM_line, char * tmp_read_name, short * tmp_flag, uns
return 0;
}
-int probe_file_type(char * fname, int * is_first_PE);
int is_certainly_bam_file(char * fname, int * is_first_read_PE)
{
@@ -2717,8 +4965,8 @@ int warning_file_type(char * fname, int expected_type)
return 1;
}
- else if((expected_type == FILE_TYPE_FAST_ && (read_type!= FILE_TYPE_FASTQ && read_type!= FILE_TYPE_FASTA))||
- (expected_type == FILE_TYPE_GZIP_FAST_ && (read_type!= FILE_TYPE_GZIP_FASTQ && read_type!= FILE_TYPE_GZIP_FASTA)) ||
+ else if((expected_type == FILE_TYPE_FAST_ && (read_type!= FILE_TYPE_FASTQ && read_type!= FILE_TYPE_FASTA && read_type!= FILE_TYPE_GZIP_FASTQ))||
+ (expected_type == FILE_TYPE_GZIP_FAST_ && read_type!= FILE_TYPE_GZIP_FASTA) ||
(( expected_type != FILE_TYPE_GZIP_FAST_ && expected_type != FILE_TYPE_FAST_) && expected_type != read_type))
{
char * req_fmt = "SAM";
@@ -2773,6 +5021,109 @@ char * fgets_noempty(char * buf, int maxlen, FILE * fp)
}
+int probe_file_type_fast(char * fname){
+ FILE * fp = f_subr_open(fname, "rb");
+ if(!fp) return FILE_TYPE_NONEXIST;
+
+ int ret = FILE_TYPE_UNKNOWN;
+ int nch;
+ char *test_buf=malloc(5000);
+
+ nch = fgetc(fp);
+
+ if(feof(fp))
+ ret = FILE_TYPE_EMPTY;
+ else
+ {
+ if(nch == '@') // FASTQ OR SAM
+ {
+ char * rptr = fgets_noempty(test_buf, 4999, fp);
+ int second_line_len = 0;
+ if(rptr)
+ {
+ rptr = fgets_noempty(test_buf, 4999, fp);
+ if(rptr)
+ {
+ second_line_len = strlen(test_buf);
+ int tabs = 0, x1;
+ for(x1=0;x1<4999;x1++)
+ {
+ if(test_buf[x1]=='\n' || !test_buf[x1]) break;
+ if(test_buf[x1]=='\t'){
+ tabs++;
+ continue;
+ }
+
+ if(tabs == 1)
+ if(!isdigit(test_buf[x1]))break;
+ }
+ if(rptr[0]=='@' || tabs>7)
+ ret = FILE_TYPE_SAM;
+ }
+ }
+ if(ret == FILE_TYPE_UNKNOWN)
+ {
+ rptr = fgets_noempty(test_buf, 4999, fp);
+ if(rptr[0] == '+')
+ {
+ rptr = fgets_noempty(test_buf, 4999, fp);
+ if(rptr && second_line_len == strlen(test_buf))
+ ret = FILE_TYPE_FASTQ;
+ }
+ }
+ }
+ else if(nch == '>') // FASTA
+ {
+ ret = FILE_TYPE_FASTA;
+ }
+ else if(nch == 31) // BAM OR GZ_FASTQ
+ {
+ nch = fgetc(fp);
+ if(nch == 139)
+ {
+ fclose(fp);
+ fp=NULL;
+ gzFile zfp = gzopen(fname, "rb");
+ if(zfp)
+ {
+ int rlen = gzread(zfp, test_buf,4);
+ if(rlen == 4 && memcmp(test_buf,"BAM\1",4)==0)
+ ret = FILE_TYPE_BAM;
+ if(rlen == 4 && test_buf[0]=='@')
+ ret = FILE_TYPE_GZIP_FASTQ;
+ if(rlen == 4 && test_buf[0]=='>')
+ ret = FILE_TYPE_GZIP_FASTA;
+ gzclose(zfp);
+ }
+ }
+ }
+ else if(nch >= 0x20 && nch <= 0x7f) // SAM without headers
+ {
+ int tabs = 0, x1;
+ char * rptr = fgets(test_buf, 4999, fp);
+ if(rptr)
+ for(x1=0;x1<4999;x1++)
+ {
+ if(test_buf[x1]=='\n' || !test_buf[x1]) break;
+ if(test_buf[x1]=='\t'){
+ tabs++;
+ continue;
+ }
+ if(tabs == 1)
+ if(!isdigit(test_buf[x1]))break;
+ }
+ if(tabs>7)
+ ret = FILE_TYPE_SAM;
+
+ }
+ }
+
+ if(fp)fclose(fp);
+
+ free(test_buf);
+ return ret;
+
+}
int probe_file_type(char * fname, int * is_first_read_PE)
{
FILE * fp = f_subr_open(fname, "rb");
@@ -2918,6 +5269,10 @@ int probe_file_type(char * fname, int * is_first_read_PE)
ret = FILE_TYPE_EMPTY;
break;
}
+ if(tbr[0]=='\n' || tbr[0]=='\r'){
+ ret = FILE_TYPE_UNKNOWN;
+ break;
+ }
if(tbr[0]=='@') continue;
char * rname_str, *tmpstr;
rname_str = strtok_r(tbr, "\t", &tmpstr);
diff --git a/src/input-files.h b/src/input-files.h
index 71aca46..063bc4c 100644
--- a/src/input-files.h
+++ b/src/input-files.h
@@ -22,7 +22,6 @@
#include "subread.h"
#include "hashtable.h"
-#include "core-indel.h"
#define GENE_SPACE_BASE 1
#define GENE_SPACE_COLOR 2
@@ -30,6 +29,7 @@
#define GENE_INPUT_PLAIN 0
#define GENE_INPUT_FASTQ 1
#define GENE_INPUT_FASTA 2
+#define GENE_INPUT_GZIP_FASTQ 51
#define GENE_INPUT_SAM_SINGLE 93
#define GENE_INPUT_SAM_PAIR_1 94
@@ -53,20 +53,21 @@
#include <stdlib.h>
#include <stdio.h>
-
+#include "core-indel.h"
+#include "hashtable.h"
#define SAM_SORT_BLOCKS 229
#define SAM_SORT_BLOCK_SIZE 512333303LLU
//#define SAM_SORT_BLOCK_SIZE 11123333LLU
-
-typedef struct
-{
+//
+typedef struct {
unsigned long long int output_file_size;
unsigned long long int current_chunk_size;
unsigned int current_chunk;
unsigned long long int written_reads;
unsigned long long int unpaired_reads;
+
FILE * current_block_fp_array [SAM_SORT_BLOCKS];
FILE * all_chunks_header_fp;
@@ -75,6 +76,95 @@ typedef struct
} SAM_sort_writer;
+typedef struct {
+ int thread_id;
+
+ char * input_buff_SBAM;
+ int input_buff_SBAM_used;
+ int input_buff_SBAM_ptr;
+ int reads_in_SBAM;
+ subread_lock_t SBAM_lock;
+
+ unsigned long long input_buff_SBAM_file_start;
+ unsigned long long input_buff_SBAM_file_end;
+
+ unsigned char * input_buff_BIN;
+ int input_buff_BIN_used;
+ int input_buff_BIN_ptr;
+ int orphant_block_no;
+ int need_find_start;
+ unsigned long long orphant_space;
+ z_stream strm;
+
+ char immediate_last_read_bin[3000];
+ char immediate_last_read_full_name[MAX_READ_NAME_LEN*2 +80 ];
+ int immediate_last_read_bin_len;
+ int immediate_last_read_name_len;
+
+ HashTable * orphant_table;
+ pthread_t thread_stab;
+} SAM_pairer_thread_t;
+
+typedef struct {
+ FILE * input_fp;
+ int input_is_BAM;
+ int tiny_mode;
+ int display_progress;
+ int is_bad_format;
+ int is_single_end_mode;
+ int force_do_not_sort;
+ int is_finished;
+ subread_lock_t input_fp_lock;
+ subread_lock_t output_header_lock;
+
+ unsigned long long total_input_reads;
+ unsigned long long total_orphan_reads;
+
+ HashTable * sam_contig_number_table;
+ HashTable * bam_margin_table;
+
+ int total_threads;
+ int input_buff_SBAM_size;
+ int input_buff_BIN_size;
+ char tmp_file_prefix[MAX_FILE_NAME_LENGTH];
+
+ SAM_pairer_thread_t * threads;
+ int BAM_header_parsed;
+ unsigned int BAM_l_text;
+ unsigned int BAM_n_ref;
+
+ void (* reset_output_function) (void * pairer);
+ int (* output_function) (void * pairer, int thread_no, char * rname, char * bin1, char * bin2);
+ int (* output_header) (void * pairer, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len);
+ // reserved for the application passing its own data to the output function.
+ void * appendix1;
+ void * appendix2;
+ void * appendix3;
+ void * appendix4;
+ void * appendix5;
+
+} SAM_pairer_context_t;
+
+
+#define SAM_PAIRER_WRITE_BUFFER ( 64000 )
+typedef struct {
+ unsigned char BIN_buffer[SAM_PAIRER_WRITE_BUFFER];
+ int BIN_buffer_ptr;
+ z_stream strm;
+
+} SAM_pairer_writer_thread_t;
+
+typedef struct {
+ SAM_pairer_writer_thread_t * threads;
+ int all_threads;
+ int compression_level;
+ int has_dummy;
+ FILE * bam_fp;
+ char bam_name[MAX_FILE_NAME_LENGTH];
+ subread_lock_t output_fp_lock;
+} SAM_pairer_writer_main_t;
+
+
void fastq_64_to_33(char * qs);
int chars2color(char c1, char c2);
@@ -100,7 +190,6 @@ int geinput_readline_back(gene_input_t * input, char * linebuffer) ;
// Return the length of this read or -1 if EOF.
// The memory space for read_string must be at least 512 bytes.
int geinput_next_read(gene_input_t * input, char * read_name, char * read_string, char * quality_string);
-int geinput_next_read_sam(gene_input_t * input, char * read_name, char * read_string, char * quality_string, gene_offset_t* offsets, unsigned int * pos, int * mapping_quality, int * mapping_flags, int need_reversed);
int geinput_next_read_trim(gene_input_t * input, char * read_name, char * read_string, char * quality_string, short trim_5, short trim_3, int * is_secondary);
void geinput_jump_read(gene_input_t * input);
@@ -155,7 +244,7 @@ int is_in_exon_annotations(gene_t *output_genes, unsigned int offset, int is_sta
int does_file_exist (char * filename);
-double guess_reads_density_format(char * fname, int is_sam, int * min_phred, int * max_phred);
+double guess_reads_density_format(char * fname, int is_sam, int * min_phred, int * max_phred, int * tested_reads);
FILE * get_temp_file_pointer(char *temp_file_name, HashTable* fp_table);
@@ -183,4 +272,22 @@ unsigned long long int sort_SAM_hash(char * str);
char * fgets_noempty(char * buf, int maxlen, FILE * fp);
char * gzgets_noempty(void * fp, char * buf, int maxlen);
+int probe_file_type(char * fname, int * is_first_PE);
+int probe_file_type_fast(char * fname);
+void geinput_seek(gene_input_t * input, gene_inputfile_position_t * pos);
+void geinput_tell(gene_input_t * input, gene_inputfile_position_t * pos);
+unsigned long long geinput_file_offset( gene_input_t * input);
+
+
+int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_buff_size_per_thread, int BAM_input, int is_Tiny_Mode, int is_single_end_mode, int force_do_not_sort, int display_progress, char * in_file, void (* reset_output_function) (void * pairer), int (* output_header_function) (void * pairer, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len), int (* output_function) (void * pairer, int thread_no, char * rname, char * bin1, char * bin2), [...]
+int SAM_pairer_run( SAM_pairer_context_t * pairer);
+void SAM_pairer_destroy(SAM_pairer_context_t * pairer);
+void SAM_pairer_writer_reset(void * pairer);
+
+int SAM_pairer_multi_thread_output( void * pairer, int thread_no, char * rname, char * bin1, char * bin2 );
+int SAM_pairer_multi_thread_header (void * pairer_vp, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len);
+
+int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_threads, int has_dummy , int BAM_output, int BAM_compression_level, char * out_file);
+void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ;
+int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value);
#endif
diff --git a/src/makefile.version b/src/makefile.version
index 5c6b909..3bebd60 100644
--- a/src/makefile.version
+++ b/src/makefile.version
@@ -1,3 +1,3 @@
-SUBREAD_VERSION="1.4.6-p5"
+SUBREAD_VERSION="1.5.0"
STATIC_MAKE=
#STATIC_MAKE= -static
diff --git a/src/propmapped.c b/src/propmapped.c
index 2fea038..0c7ac4e 100644
--- a/src/propmapped.c
+++ b/src/propmapped.c
@@ -186,23 +186,23 @@ void propMapped_usage()
SUBREADputs("");
SUBREADputs("Usage:");
SUBREADputs("");
- SUBREADputs(" ./prommapped -i <file> {optional arguments}");
+ SUBREADputs(" ./prommapped [options] -i <file>");
SUBREADputs("");
SUBREADputs("Required arguments:");
SUBREADputs("");
- SUBREADputs(" -i <file> : An input file containing read mapping results. Both SAM or BAM");
- SUBREADputs(" formats are supported.");
+ SUBREADputs(" -i <string> An input file containing read mapping results. Both SAM or BAM");
+ SUBREADputs(" formats are supported.");
SUBREADputs("");
SUBREADputs("Optional arguments:");
SUBREADputs("");
- SUBREADputs(" -o <file> : Name of the file containing the output of this program.");
+ SUBREADputs(" -o <string> Name of output file including mapping statistics.");
SUBREADputs("");
- SUBREADputs(" -f : If specified, fragments (read pairs) will be counted instead of");
- SUBREADputs(" individual reads. This option is only applicable for paired-end");
- SUBREADputs(" reads.");
+ SUBREADputs(" -f If specified, fragments (read pairs) will be counted instead of");
+ SUBREADputs(" individual reads. This option is only applicable for paired-end");
+ SUBREADputs(" reads.");
SUBREADputs("");
- SUBREADputs(" -p : If specified, only properly paired reads will be counted. This");
- SUBREADputs(" option is only applicable for paired-end reads.");
+ SUBREADputs(" -p If specified, only properly paired reads will be counted. This");
+ SUBREADputs(" option is only applicable for paired-end reads.");
SUBREADputs("");
}
diff --git a/src/qualityScores.c b/src/qualityScores.c
index 4819181..cfc7575 100644
--- a/src/qualityScores.c
+++ b/src/qualityScores.c
@@ -455,11 +455,40 @@ static struct option qs_long_options[] =
void qualscore_usage()
{
- SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION);
+ SUBREADprintf("\nqualityScore Version %s\n\n", SUBREAD_VERSION);
+ SUBREADputs(" Retrieve Phred score for read bases\n");
SUBREADputs("Usage:");
SUBREADputs("");
- SUBREADputs(" ./qualityScores -i input_file -o output_file {--FASTQinput --BAMinput or --SAMinput or --gzFASTQinput} {--first-end or --second-end or --both-ends} {--counted-reads <int>} {--phred-offset 33|64}");
+ SUBREADputs(" ./qualityScores [options] -i <input_file> -o <output_file>");
SUBREADputs("");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -i <string> Name of input file including read data. The default format is");
+ SUBREADputs(" Fastq.");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Name of output file that is a text file including Phred scores");
+ SUBREADputs(" for each read base.");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" --gzFASTQinput Input file is in gzipped Fastq format.");
+ SUBREADputs("");
+ SUBREADputs(" --BAMinput Input file is in BAM format.");
+ SUBREADputs("");
+ SUBREADputs(" --SAMinput Input file is in SAM format.");
+ SUBREADputs("");
+ SUBREADputs(" --first-end Use only first reads in paired-end data. Only applicable for");
+ SUBREADputs(" paired-end BAM/SAM input.");
+ SUBREADputs("");
+ SUBREADputs(" --second-end Use only second reads in paired-end data. Only applicable for");
+ SUBREADputs(" paired-end BAM/SAM input.");
+ SUBREADputs("");
+ SUBREADputs(" --counted-reads <int> Total number of reads to be extracted from the input");
+ SUBREADputs(" file. 10,000 by default.");
+ SUBREADputs("");
+ SUBREADputs(" --phred-offset <33|64> refer to subread aligner.");
+ SUBREADputs("");
+
}
#ifdef MAKE_STANDALONE
diff --git a/src/read-repair.c b/src/read-repair.c
new file mode 100644
index 0000000..038ee7a
--- /dev/null
+++ b/src/read-repair.c
@@ -0,0 +1,130 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <zlib.h>
+#include "subread.h"
+#include "core.h"
+#include "input-files.h"
+
+void print_usage_pairer(char * cmd){
+ SUBREADprintf("\nrepair Version %s\n\n", SUBREAD_VERSION);
+ SUBREADputs(" Find reads that are from the same pair in the input and then place them next");
+ SUBREADputs("to each other in the output. A dummy read is added for each singleton read");
+ SUBREADputs("that does not have a pair. The output file is compatible with featureCounts");
+ SUBREADputs("program.");
+ SUBREADputs("");
+ SUBREADputs("Usage:");
+ SUBREADputs("");
+ SUBREADputs(" ./removeDup [options] -i <input_file> -o <output_file>\n");
+ SUBREADputs("");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -i <string> Name of input file in BAM or SAM format. Use '-S' option if the");
+ SUBREADputs(" input file is in SAM format");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Name of output file. The output file is in BAM format.");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -S The input file is in SAM format.");
+ SUBREADputs("");
+ SUBREADputs(" -c Compress the output BAM file. This will reduce the size of BAM");
+ SUBREADputs(" file, but will increase the time of retrieving reads from BAM");
+ SUBREADputs(" file.");
+ SUBREADputs("");
+ SUBREADputs(" -T <int> Number of CPU threads. 8 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -d Do not add dummy reads for singleton reads.");
+ SUBREADputs("");
+ SUBREADputs(" -t Output file does not include sequences and quality scores of");
+ SUBREADputs(" reads.");
+ SUBREADputs("");
+}
+
+int main(int argc, char ** argv){
+
+ double t0 = miltime();
+ int threads = 8, is_BAM = 1;
+ char c;
+ char in_BAM_file[MAX_FILE_NAME_LENGTH+1];
+ char out_BAM_file[MAX_FILE_NAME_LENGTH+1];
+ char rand_prefix[40];
+ int no_compression = 1;
+ int has_dummy = 1;
+ int tiny_mode = 0;
+ optind = 1;
+ opterr = 1;
+ optopt = 63;
+ int memory = 64;
+ in_BAM_file[0] = out_BAM_file[0] = 0;
+
+ while ((c = getopt(argc, argv, "i:T:M:o:vtdcS?")) != -1)
+ {
+ switch(c)
+ {
+ case '?':
+ case 'v':
+ print_usage_pairer(argv[0]);
+ exit(0);
+ break;
+ case 'S':
+ is_BAM = 0;
+ break;
+ case 't':
+ tiny_mode = 1;
+ break;
+ case 'd':
+ has_dummy = 0;
+ break;
+ case 'o':
+ strcpy(out_BAM_file, optarg);
+ break;
+ case 'M':
+ memory = atoi(optarg);
+ if(memory < 1) memory = 1;
+ break;
+ case 'T':
+ threads = atoi(optarg);
+ if(threads < 1) threads = 1;
+ if(threads > MAX_THREADS) threads = MAX_THREADS;
+ break;
+ case 'c':
+ no_compression = 0;
+ break;
+ case 'i':
+ strcpy(in_BAM_file, optarg);
+ break;
+ }
+ }
+
+ if(in_BAM_file[0]==0 || out_BAM_file[0]==0){
+ print_usage_pairer(argv[0]);
+ //SUBREADprintf("\nNo input or output files are specified.\n");
+ exit(-1);
+ }
+
+ srand( (unsigned int) (miltime()*1000));
+
+ sprintf(rand_prefix, "fsbm-p%06d-%04X%04X%04X", getpid(), rand()&0xffff, rand()&0xffff,rand()&0xffff);
+
+ SAM_pairer_context_t pairer;
+ SAM_pairer_writer_main_t writer_main;
+ int ret = SAM_pairer_writer_create(&writer_main, threads, has_dummy,1, no_compression?Z_NO_COMPRESSION:Z_DEFAULT_COMPRESSION, out_BAM_file);
+ if(ret){
+ SUBREADprintf("Unable to open the output file. Program terminated.\n");
+ return -1;
+ }else{
+ ret = SAM_pairer_create(&pairer, threads, memory, is_BAM, tiny_mode,0,0 , 1, in_BAM_file, SAM_pairer_writer_reset, SAM_pairer_multi_thread_header, SAM_pairer_multi_thread_output, rand_prefix, &writer_main);
+ if(ret){
+ SUBREADprintf("Unable to open the input file. Program terminated.\n");
+ return -1;
+ }else{
+ SAM_pairer_run(&pairer);
+ SAM_pairer_destroy(&pairer);
+ SAM_pairer_writer_destroy(&writer_main);
+ SUBREADprintf("\nAll finished in %.2f minutes\nTotal input reads: %llu ; Unpaired reads: %llu\n\n", (miltime()-t0)/60, pairer.total_input_reads, pairer.total_orphan_reads);
+ return 0;
+ }
+ }
+}
diff --git a/src/readSummary.c b/src/readSummary.c
index 2112f22..91db194 100644
--- a/src/readSummary.c
+++ b/src/readSummary.c
@@ -63,8 +63,28 @@
#define MAX_HIT_NUMBER 3000
-typedef struct
-{
+typedef struct{
+ char gene_name[FEATURE_NAME_LENGTH];
+ unsigned int pos_first_base;
+ unsigned int pos_last_base;
+} fc_junction_gene_t;
+
+
+
+typedef struct {
+ int space;
+ int used;
+ fc_junction_gene_t ** genes;
+} gene_info_list_t;
+
+typedef struct {
+ char chromosome_name_left[CHROMOSOME_NAME_LENGTH + 1];
+ char chromosome_name_right[CHROMOSOME_NAME_LENGTH + 1];
+ unsigned int last_exon_base_left;
+ unsigned int first_exon_base_right;
+} fc_junction_info_t;
+
+typedef struct {
unsigned int feature_name_pos;
unsigned int start;
unsigned int end;
@@ -74,8 +94,7 @@ typedef struct
char is_negative_strand;
} fc_feature_info_t;
-typedef struct
-{
+typedef struct {
unsigned long long assigned_reads;
unsigned long long unassigned_ambiguous;
unsigned long long unassigned_multimapping;
@@ -91,11 +110,8 @@ typedef struct
typedef unsigned long long read_count_type_t;
-typedef struct
-{
+typedef struct {
unsigned short thread_id;
- char * line_buffer1;
- char * line_buffer2;
unsigned long long int nreads_mapped_to_exon;
unsigned long long int all_reads;
//unsigned short current_read_length1;
@@ -131,6 +147,8 @@ typedef struct
char * chro_name_buff;
z_stream * strm_buffer;
+ HashTable * junction_counting_table; // key: string chro_name \t last_base_previous_exont \t first_base_next_exon
+ HashTable * splicing_point_table;
fc_read_counters read_counters;
SamBam_Alignment aln_buffer;
@@ -140,8 +158,7 @@ typedef struct
#define REDUCE_TO_5_PRIME_END 5
#define REDUCE_TO_3_PRIME_END 3
-typedef struct
-{
+typedef struct {
unsigned int chro_number;
unsigned int chro_features;
unsigned int chro_feature_table_start;
@@ -154,8 +171,7 @@ typedef struct
//unsigned int * reverse_table_end_index;
} fc_chromosome_index_info;
-typedef struct
-{
+typedef struct {
int is_gene_level;
int is_paired_end_input_file;
int is_paired_end_mode_assign;
@@ -165,7 +181,6 @@ typedef struct
int is_chimertc_disallowed;
int is_PE_distance_checked;
int is_multi_mapping_allowed;
- int is_input_file_resort_needed;
int is_SAM_file;
int is_read_details_out;
int is_SEPEmix_warning_shown;
@@ -179,6 +194,7 @@ typedef struct
int reduce_5_3_ends_to_one;
int isCVersion;
int use_fraction_multi_mapping;
+ int do_junction_counting;
int min_mapping_quality_score;
int min_paired_end_distance;
@@ -199,13 +215,19 @@ typedef struct
fc_thread_thread_context_t * thread_contexts;
int is_all_finished;
unsigned int input_buffer_max_size;
+ int sambam_chro_table_items;
SamBam_Reference_Info * sambam_chro_table;
+ pthread_spinlock_t sambam_chro_table_lock;
+
+ SAM_pairer_context_t read_pairer;
char * debug_command;
char * unistr_buffer_space;
unsigned int unistr_buffer_size;
unsigned int unistr_buffer_used;
-
+ HashTable * junction_features_table;
+ HashTable * junction_bucket_table;
+ fasta_contigs_t * fasta_contigs;
HashTable * gene_name_table; // gene_name -> gene_number
HashTable * annot_chro_name_alias_table; // name in annotation file -> alias name
char alias_file_name[300];
@@ -247,6 +269,243 @@ typedef struct
unsigned int tick_time = 1000;
+int fetch_boundaries(char * chroname,char * cigar, unsigned int pos, char strand, int *has_left, unsigned short *left_on_read, unsigned int *left_pos, int *has_right, unsigned short *right_on_read, unsigned int *right_pos, fc_junction_info_t * result_junctions, int junction_space){
+
+ int cigar_cursor = 0, nch, read_len = 0, ret = 0;
+ unsigned int chro_cursor = pos, tmpi = 0;
+ unsigned int right_boundary = 0;
+ unsigned short left_clipped = 0;
+ unsigned short right_clipped = 0;
+ *has_right = 0;
+ *has_left = 0;
+
+ for(; (nch = cigar[cigar_cursor])!=0 ; cigar_cursor++){
+ if(isdigit(nch)){
+ tmpi = tmpi*10 + (nch - '0');
+ } else {
+ if (nch == 'S'){
+ if(chro_cursor == pos) left_clipped = tmpi;else right_clipped=tmpi;
+ read_len += tmpi;
+ } else if(nch == 'M' || nch == 'D'){
+ if(nch == 'M')read_len += tmpi;
+
+ chro_cursor += tmpi;
+ right_boundary = chro_cursor -1;
+ } else if(nch == 'N'){
+ unsigned int last_exon_last_base = chro_cursor - 1;
+ unsigned int next_exon_first_base = chro_cursor + tmpi;
+ chro_cursor += tmpi;
+
+ if(ret < junction_space){
+ result_junctions[ret].last_exon_base_left = last_exon_last_base;
+ result_junctions[ret].first_exon_base_right = next_exon_first_base;
+ strcpy(result_junctions[ret].chromosome_name_left, chroname);
+ strcpy(result_junctions[ret].chromosome_name_right, chroname);
+
+ ret ++;
+ }
+
+
+ } else if(nch == 'I') read_len += tmpi;
+ tmpi = 0;
+ }
+ }
+ if(left_clipped){
+ *has_left = 1;
+ *left_on_read = left_clipped;
+ *left_pos = pos;
+ }
+ if(right_clipped){
+ *has_right = 1;
+ *right_on_read = read_len - right_clipped - 1;
+ *right_pos = right_boundary;
+ }
+ return ret;
+}
+
+// This function parses the cigar string and returns the number of exon-exon junctions found in the cigar.
+// It returns 0 if no junctions are found.
+int calc_junctions_from_cigar(fc_thread_global_context_t * global_context, int flag, char * chroname, unsigned int pos, char * cigar , char * extra_tags, fc_junction_info_t * result_junctions){
+ unsigned short boundaries_inclusive_base_on_read[FC_CIGAR_PARSER_ITEMS];
+ unsigned int boundaries_inclusive_base_pos[FC_CIGAR_PARSER_ITEMS];
+ char boundaries_chromosomes[FC_CIGAR_PARSER_ITEMS][MAX_CHROMOSOME_NAME_LEN];
+ char boundaries_extend_to_left_on_read[FC_CIGAR_PARSER_ITEMS];
+ int boundaries = 0;
+
+ int cigar_cursor = 0, nch, ret = 0, read_len = 0, x1, x2;
+ unsigned int chro_cursor = pos, tmpi = 0;
+ unsigned int right_boundary = 0;
+ unsigned short left_clipped = 0;
+ unsigned short right_clipped = 0;
+
+ for(; (nch = cigar[cigar_cursor])!=0 ; cigar_cursor++){
+ if(isdigit(nch)){
+ tmpi = tmpi*10 + (nch - '0');
+ } else {
+ if (nch == 'S'){
+ if(chro_cursor == pos) left_clipped = tmpi;else right_clipped=tmpi;
+ read_len += tmpi;
+ } else if(nch == 'M' || nch == 'D'){
+ if(nch == 'M')read_len += tmpi;
+
+ chro_cursor += tmpi;
+ right_boundary = chro_cursor -1;
+ } else if(nch == 'N'){
+ unsigned int last_exon_last_base = chro_cursor - 1;
+ unsigned int next_exon_first_base = chro_cursor + tmpi;
+ if(ret <= FC_CIGAR_PARSER_ITEMS - 1){
+ result_junctions[ret].last_exon_base_left = last_exon_last_base;
+ result_junctions[ret].first_exon_base_right = next_exon_first_base;
+ strcpy(result_junctions[ret].chromosome_name_left, chroname);
+ strcpy(result_junctions[ret].chromosome_name_right, chroname);
+
+ ret ++;
+ }
+ chro_cursor += tmpi;
+ } else if(nch == 'I') read_len += tmpi;
+ tmpi = 0;
+ }
+ }
+ if(left_clipped){
+ strcpy(boundaries_chromosomes[boundaries] , chroname);
+ boundaries_extend_to_left_on_read[boundaries] = 0;
+ boundaries_inclusive_base_pos[boundaries] = pos;
+ boundaries_inclusive_base_on_read[boundaries++] = left_clipped;
+ }
+ if(right_clipped){
+ strcpy(boundaries_chromosomes[boundaries] , chroname);
+ boundaries_extend_to_left_on_read[boundaries] = 1;
+ boundaries_inclusive_base_pos[boundaries] = chro_cursor - 1;
+ boundaries_inclusive_base_on_read[boundaries++] = read_len - right_clipped - 1;
+ }
+
+ int tag_cursor=0;
+
+ //if(strstr(extra_tags, "CG:Z")) {
+ // SUBREADprintf("CIGAR=%s, EXTRA=%s\n", cigar, extra_tags);
+ //}
+ int status = PARSE_STATUS_TAGNAME;
+ char tag_name[2], typechar=0;
+ int tag_inner_cursor=0;
+
+ char read_main_strand = (((flag & 0x10) == 0x10) == ((flag & 0x40)==0x40))?'-':'+';
+ char current_fusion_char[MAX_CHROMOSOME_NAME_LEN];
+ unsigned int current_fusion_pos = 0;
+ char current_fusion_strand = 0;
+ char current_fusion_cigar[FC_CIGAR_PARSER_ITEMS * 15];
+ current_fusion_cigar [0] =0;
+ current_fusion_char [0]=0;
+
+ while(1){
+ int nch = extra_tags[tag_cursor];
+ if(status == PARSE_STATUS_TAGNAME){
+ tag_name[tag_inner_cursor++] = nch;
+ if(tag_inner_cursor == 2){
+ status = PARSE_STATUS_TAGTYPE;
+ tag_cursor += 1;
+ assert(extra_tags[tag_cursor] == ':');
+ }
+ }else if(status == PARSE_STATUS_TAGTYPE){
+ typechar = nch;
+ tag_cursor +=1;
+ assert(extra_tags[tag_cursor] == ':');
+ tag_inner_cursor = 0;
+ status = PARSE_STATUS_TAGVALUE;
+ }else if(status == PARSE_STATUS_TAGVALUE){
+ if(nch == '\t' || nch == 0){
+ if(current_fusion_cigar[0] && current_fusion_char[0] && current_fusion_pos && current_fusion_strand){
+
+ unsigned int left_pos = 0, right_pos = 0;
+ unsigned short left_on_read = 0, right_on_read = 0;
+ int has_left = 0, has_right = 0;
+
+ unsigned int start_pos = current_fusion_pos;
+ if(current_fusion_strand!=read_main_strand)
+ start_pos = find_left_end_cigar(current_fusion_pos, current_fusion_cigar);
+
+ ret += fetch_boundaries(current_fusion_char, current_fusion_cigar, start_pos, current_fusion_strand, &has_left, &left_on_read, &left_pos, &has_right, &right_on_read, &right_pos, result_junctions + ret, FC_CIGAR_PARSER_ITEMS - ret );
+
+ if(has_left){
+ strcpy(boundaries_chromosomes[boundaries] , current_fusion_char);
+ boundaries_extend_to_left_on_read[boundaries] = 0;
+ boundaries_inclusive_base_pos[boundaries] = left_pos;
+ boundaries_inclusive_base_on_read[boundaries++] = left_on_read;
+ }
+ if(has_right){
+ strcpy(boundaries_chromosomes[boundaries] , current_fusion_char);
+ boundaries_extend_to_left_on_read[boundaries] = 1;
+ boundaries_inclusive_base_pos[boundaries] = right_pos;
+ boundaries_inclusive_base_on_read[boundaries++] = right_on_read;
+ }
+
+
+ // SUBREADprintf("BOUND_EXT: %s:%u (at %u) (%c) ~ %s:%u (at %u) (%c)\n", current_fusion_char, left_pos, left_on_read, has_left?'Y':'X' , current_fusion_char, right_pos, right_on_read, has_right?'Y':'X');
+
+ current_fusion_pos = 0;
+ current_fusion_strand = 0;
+ current_fusion_cigar [0] =0;
+ current_fusion_char [0]=0;
+ }
+
+ tag_inner_cursor = 0;
+ status = PARSE_STATUS_TAGNAME;
+ }else{
+ if(tag_name[0]=='C' && tag_name[1]=='C' && typechar == 'Z'){
+ current_fusion_char[tag_inner_cursor++]=nch;
+ current_fusion_char[tag_inner_cursor]=0;
+ }else if(tag_name[0]=='C' && tag_name[1]=='G' && typechar == 'Z'){
+ current_fusion_cigar[tag_inner_cursor++]=nch;
+ current_fusion_cigar[tag_inner_cursor]=0;
+ }else if(tag_name[0]=='C' && tag_name[1]=='P' && typechar == 'i'){
+ current_fusion_pos = current_fusion_pos * 10 + (nch - '0');
+ }else if(tag_name[0]=='C' && tag_name[1]=='T' && typechar == 'Z'){
+ current_fusion_strand = nch;
+ }
+ }
+ }
+
+ if(nch == 0){
+ assert(status == PARSE_STATUS_TAGNAME);
+ break;
+ }
+
+ tag_cursor++;
+ }
+
+
+ //for(x1 = 0; x1 < boundaries; x1++)
+ // SUBREADprintf("HAS: LR:%d, READ:%d\n", boundaries_extend_to_left_on_read[x1], boundaries_inclusive_base_on_read[x1]);
+
+ for(x1 = 0; x1 < boundaries; x1++)
+ for(x2 = 0; x2 < boundaries; x2++){
+ if(x1==x2) continue;
+ if(boundaries_chromosomes[x1][0]==0 || boundaries_chromosomes[x2][0]==0) continue;
+ if(boundaries_extend_to_left_on_read[x1] == 1 && boundaries_extend_to_left_on_read[x2] == 0){
+ if( boundaries_inclusive_base_on_read[x1] == boundaries_inclusive_base_on_read[x2]-1 ){
+
+ if(ret <= FC_CIGAR_PARSER_ITEMS - 1){
+ result_junctions[ret].last_exon_base_left = boundaries_inclusive_base_pos[x1];
+ result_junctions[ret].first_exon_base_right = boundaries_inclusive_base_pos[x2];
+ strcpy(result_junctions[ret].chromosome_name_left, boundaries_chromosomes[x1]);
+ strcpy(result_junctions[ret].chromosome_name_right, boundaries_chromosomes[x2]);
+ ret++;
+ }
+
+
+ // SUBREADprintf("MATCH: %d ~ %d\n", boundaries_inclusive_base_on_read[x1], boundaries_inclusive_base_on_read[x2]);
+ boundaries_chromosomes[x1][0]=0;
+ boundaries_chromosomes[x2][0]=0;
+ }
+ }
+ }
+
+ //for(x1 = 0; x1 < boundaries; x1++)
+ // if(boundaries_chromosomes[x1][0])
+ // SUBREADprintf("LEFT: LR:%d, READ:%d\n", boundaries_extend_to_left_on_read[x1], boundaries_inclusive_base_on_read[x1]);
+ return ret;
+}
+
+
unsigned int unistr_cpy(fc_thread_global_context_t * global_context, char * str, int strl)
{
unsigned int ret;
@@ -336,6 +595,8 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
print_in_box(80,0,0," Annotations : %s (%s)", annot, is_GTF?"GTF":"SAF");
if(isReadSummaryReport)
print_in_box(80,0,0," Assignment details : <input_file>.featureCounts");
+ if(global_context -> do_junction_counting)
+ print_in_box(80,0,0," Junction Counting : <output_file>.junctions");
if(global_context -> alias_file_name[0])
print_in_box(80,0,0," Chromosome alias file : %s", global_context -> alias_file_name);
@@ -453,6 +714,83 @@ int is_comment_line(const char * l, int file_type, unsigned int lineno)
return tabs < ((file_type == FILE_TYPE_GTF)?8:4);
}
+void register_junc_feature(fc_thread_global_context_t *global_context, char * feature_name, char * chro, unsigned int start, unsigned int stop){
+ HashTable * gene_table = HashTableGet(global_context -> junction_features_table, chro);
+ //SUBREADprintf("REG %s : %p\n", chro, gene_table);
+ if(NULL == gene_table){
+ gene_table = HashTableCreate(48367);
+ HashTableSetDeallocationFunctions(gene_table, NULL, free);
+ HashTableSetKeyComparisonFunction(gene_table, fc_strcmp);
+ HashTableSetHashFunction(gene_table, fc_chro_hash);
+
+ char * new_name = malloc(strlen(chro)+1);
+ strcpy(new_name, chro);
+ HashTablePut(global_context -> junction_features_table, new_name, gene_table);
+ }
+ fc_junction_gene_t * gene_info = HashTableGet(gene_table, feature_name);
+ if(NULL == gene_info){
+ gene_info = malloc(sizeof(fc_junction_gene_t));
+ strcpy(gene_info -> gene_name, feature_name);
+ gene_info -> pos_first_base = start;
+ gene_info -> pos_last_base = stop;
+
+ HashTablePut(gene_table, gene_info -> gene_name, gene_info);
+ }else{
+ gene_info -> pos_first_base = min(start, gene_info -> pos_first_base);
+ gene_info -> pos_last_base = max(stop, gene_info -> pos_last_base);
+ }
+}
+
+void free_bucket_table_list(void * pv){
+ gene_info_list_t * list = (gene_info_list_t*) pv;
+ free(list -> genes);
+ free(list);
+}
+
+#define JUNCTION_BUCKET_STEP (128*1024)
+
+int locate_junc_features(fc_thread_global_context_t *global_context, char * chro, unsigned int pos, fc_junction_gene_t ** ret_info, int max_ret_info_size){
+ HashTable * gene_table = NULL;
+
+ if(global_context -> annot_chro_name_alias_table) {
+ char * anno_chro_name = HashTableGet( global_context -> annot_chro_name_alias_table , chro);
+ if(anno_chro_name)
+ gene_table = HashTableGet( global_context -> junction_features_table , anno_chro_name);
+ }
+ if(gene_table == NULL)
+ gene_table = HashTableGet(global_context -> junction_features_table, chro);
+
+ if(gene_table == NULL && strlen(chro)>3 && memcmp(chro, "chr", 3)==0){
+ gene_table = HashTableGet(global_context -> junction_features_table, chro + 3);
+ }
+
+ if(gene_table == NULL){
+ char new_name [FEATURE_NAME_LENGTH];
+
+ strcpy(new_name, "chr");
+ strcat(new_name, chro);
+ gene_table = HashTableGet(global_context -> junction_features_table, new_name);
+ }
+
+ int ret = 0;
+
+ char bucket_key[CHROMOSOME_NAME_LENGTH + 20];
+ sprintf(bucket_key, "%s:%u", chro, pos - pos % JUNCTION_BUCKET_STEP);
+ gene_info_list_t * list = HashTableGet(global_context -> junction_bucket_table, bucket_key);
+ if(list){
+ int x1;
+ for(x1 = 0; x1 < list -> used; x1++){
+ fc_junction_gene_t * gene_info = list -> genes[x1];
+ if(gene_info -> pos_first_base <= pos && gene_info -> pos_last_base >= pos){
+ if(ret < max_ret_info_size)
+ ret_info [ret ++] = gene_info;
+ }
+ }
+ }
+
+ return ret;
+}
+
// This function loads annotations from the file.
// It returns the number of featres loaded, or -1 if something is wrong.
// Memory will be allowcated in this function. The pointer is saved in *loaded_features.
@@ -471,6 +809,19 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
HashTableSetKeyComparisonFunction(chro_name_table, fc_strcmp_chro);
global_context -> longest_chro_name = 0;
+ if(global_context -> do_junction_counting){
+ global_context -> junction_bucket_table = HashTableCreate(76037);
+ HashTableSetDeallocationFunctions(global_context -> junction_bucket_table, free, free_bucket_table_list);
+ HashTableSetKeyComparisonFunction(global_context -> junction_bucket_table, fc_strcmp);
+ HashTableSetHashFunction(global_context -> junction_bucket_table, fc_chro_hash);
+
+ global_context -> junction_features_table = HashTableCreate(1603);
+ HashTableSetDeallocationFunctions(global_context -> junction_features_table, free, (void (*)(void *))HashTableDestroy);
+ HashTableSetKeyComparisonFunction(global_context -> junction_features_table, fc_strcmp);
+ HashTableSetHashFunction(global_context -> junction_features_table, fc_chro_hash);
+ }
+
+
// first scan: get the chromosome size, etc
while(1)
{
@@ -591,6 +942,11 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
chro_stab -> reverse_table_start_index[bin_location]++;
is_gene_id_found = 1;
+
+ assert(feature_name);
+ if(global_context -> do_junction_counting)
+ register_junc_feature(global_context , feature_name, seq_name, ret_features[xk1].start, ret_features[xk1].end);
+
xk1++;
}
else if(file_type == FILE_TYPE_GTF)
@@ -655,6 +1011,9 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
}
chro_stab -> reverse_table_start_index[bin_location]++;
+ if(global_context -> do_junction_counting)
+ register_junc_feature(global_context , feature_name_tmp, seq_name, ret_features[xk1].start, ret_features[xk1].end);
+
xk1++;
}
}
@@ -1078,52 +1437,490 @@ void print_read_wrapping(char * rl, int is_second){
}
-void report_unpair_warning(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, int * this_noproperly_paired_added){
- //printf("WARN:%d [%d]\n", global_context->is_unpaired_warning_shown, thread_context -> thread_id);
- if(!global_context->is_unpaired_warning_shown)
+void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context,
+ long * hits_indices1, unsigned short * hits_read_start_base1, short * hits_read_len1, int nhits1, unsigned short rl1,
+ long * hits_indices2, unsigned short * hits_read_start_base2, short * hits_read_len2, int nhits2, unsigned short rl2,
+ int fixed_fractional_count, char * read_name);
+
+
+void process_pairer_reset(void * pairer_vp){
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ fc_thread_global_context_t * global_context = (fc_thread_global_context_t * )pairer -> appendix1;
+ if(global_context -> sambam_chro_table) free(global_context -> sambam_chro_table);
+ global_context -> sambam_chro_table = NULL;
+ global_context -> sambam_chro_table_items = 0;
+
+ int xk1, xk2;
+ for(xk1=0; xk1<global_context-> thread_number; xk1++)
{
- global_context->is_unpaired_warning_shown=1;
- print_in_box(80,0,0," Found reads that are not properly paired.");
- print_in_box(80,0,0," (missing mate or the mate is not the next read)");
+ for(xk2=0; xk2<global_context -> exontable_exons; xk2++)
+ {
+ global_context -> thread_contexts[xk1].count_table[xk2] = 0;
+ }
- if(global_context -> do_not_sort){
- print_in_box(85,0,0," %c[31mHowever, the reads will not be re-ordered.", 27);
- }else{
- global_context->redo = 1;
+ global_context -> thread_contexts[xk1].all_reads = 0;
+ global_context -> thread_contexts[xk1].nreads_mapped_to_exon = 0;
+ global_context -> thread_contexts[xk1].unpaired_fragment_no = 0;
+
+
+
+ global_context -> thread_contexts[xk1].read_counters.unassigned_ambiguous = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_nofeatures = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_unmapped = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_mappingquality = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_fragmentlength = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_chimericreads = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_multimapping = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_secondary = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_nonjunction = 0;
+ global_context -> thread_contexts[xk1].read_counters.unassigned_duplicate = 0;
+ global_context -> thread_contexts[xk1].read_counters.assigned_reads = 0;
+ }
+
+ if(global_context -> SAM_output_fp){
+ ftruncate(fileno(global_context -> SAM_output_fp), 0);
+ fseek(global_context -> SAM_output_fp, 0 , SEEK_SET);
+ }
+}
+
+int process_pairer_header (void * pairer_vp, int thread_no, int is_text, unsigned int items, char * bin, unsigned int bin_len){
+
+
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ fc_thread_global_context_t * global_context = (fc_thread_global_context_t * )pairer -> appendix1;
+
+ //SUBREADprintf("ENTER PROCESS (THRD %d): IS_TXT=%d, ITEMS = %d, CURRENT_ITEMS=%d\n", thread_no, is_text, items, global_context -> sambam_chro_table_items);
+ pthread_spin_lock(&global_context -> sambam_chro_table_lock);
+
+ if( !is_text ){
+ if(global_context -> sambam_chro_table)
+ global_context -> sambam_chro_table = realloc(global_context -> sambam_chro_table, (items + global_context -> sambam_chro_table_items) * sizeof(SamBam_Reference_Info));
+ else global_context -> sambam_chro_table = malloc(items * sizeof(SamBam_Reference_Info));
+
+ int x1, bin_ptr = 0;
+ for(x1 = global_context -> sambam_chro_table_items; x1 < global_context -> sambam_chro_table_items+items; x1++){
+ int l_name;
+ memcpy(&l_name, bin + bin_ptr, 4);
+ assert(l_name < MAX_CHROMOSOME_NAME_LEN);
+ bin_ptr += 4;
+ memcpy(global_context -> sambam_chro_table[x1].chro_name , bin + bin_ptr, l_name);
+ //SUBREADprintf("The %d-th is '%s'\n", x1, global_context -> sambam_chro_table[x1].chro_name);
+ bin_ptr += l_name;
+ memcpy(&global_context -> sambam_chro_table[x1].chro_length , bin + bin_ptr, 4);
+ bin_ptr += 4;
+ }
+ global_context -> sambam_chro_table_items += items;
+ }
+ pthread_spin_unlock(&global_context -> sambam_chro_table_lock);
+ return 0;
+}
+
+void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, char * bin1, char * bin2);
+
+void make_dummy(char * rname, char * bin1, char * out_txt2, SamBam_Reference_Info * sambam_chro_table){
+ char * tmptr = NULL;
+
+ //SUBREADprintf("S=%s ", rname);
+ char * realname = strtok_r(rname, "\027", &tmptr);
+ //int len_name = strlen(realname);
+ int r1_chro = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r1_pos = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r2_chro = atoi(strtok_r(NULL, "\027", &tmptr));
+ int r2_pos = atoi(strtok_r(NULL, "\027", &tmptr));
+ int HItag = atoi(strtok_r(NULL, "\027", &tmptr));
+ int mate_FLAG = 0;
+ memcpy(&mate_FLAG, bin1 + 16, 4);
+ mate_FLAG = 0xffff&(mate_FLAG >>16);
+ int mate_tlen = 0;
+ memcpy(&mate_tlen, bin1 + 32, 4);
+
+ if(r1_chro<0) r1_pos=-1;
+ if(r2_chro<0) r2_pos=-1;
+
+ int my_chro = (mate_FLAG&0x40)? r2_chro : r1_chro;
+ int my_pos = (mate_FLAG&0x40)? r2_pos : r1_pos;
+ int mate_chro = (mate_FLAG&0x40)? r1_chro : r2_chro;
+ int mate_pos = (mate_FLAG&0x40)? r1_pos : r2_pos;
+
+ //int bin_mq_nl = (len_name+1);
+ int my_flag = (mate_FLAG&0x40)? 0x80:0x40;
+ my_flag |= 1;
+ if(mate_FLAG & 8)my_flag |=4;
+ if(mate_FLAG & 4)my_flag |=8;
+ if(mate_FLAG & 0x10) my_flag |= 0x20;
+ if(mate_FLAG & 0x20) my_flag |= 0x10;
+
+ char HItagStr[20];
+ if(HItag>=0){
+ sprintf(HItagStr, "\tHI:i:%d", HItag);
+ }else{
+ HItagStr[0]=0;
+ }
+
+ char * my_chro_str = "*";
+ if(my_chro >= 0) my_chro_str = sambam_chro_table[my_chro].chro_name;
+
+ char * mate_chro_str = "*";
+ if(mate_chro >= 0) mate_chro_str = sambam_chro_table[mate_chro].chro_name;
+
+ sprintf(out_txt2, "%s\t%d\t%s\t%d\t0\t*\t%s\t%d\t0\tN\tI\t%s", realname, my_flag, my_chro_str, max(0, my_pos),
+ mate_chro_str, max(0,mate_pos), HItagStr);
+}
+
+
+void convert_bin_to_read(char * bin, char * txt, SamBam_Reference_Info * sambam_chro_table){
+ unsigned int block_len;
+ memcpy(&block_len, bin, 4);
+ int ref_id;
+ memcpy(&ref_id, bin + 4, 4);
+ int pos;
+ memcpy(&pos, bin + 8, 4);
+ unsigned int bin_mq_nl;
+ memcpy(&bin_mq_nl, bin + 12, 4);
+ unsigned int flag_nc;
+ memcpy(&flag_nc, bin + 16, 4);
+ int l_seq;
+ memcpy(&l_seq, bin + 20, 4);
+ int next_refID;
+ memcpy(&next_refID, bin + 24, 4);
+ int next_pos;
+ memcpy(&next_pos, bin + 28, 4);
+ int tlen;
+ memcpy(&tlen, bin + 32, 4);
+
+ int txt_ptr = 0;
+ int l_read_name = bin_mq_nl & 0xff;
+ memcpy(txt , bin + 36, l_read_name);
+ txt_ptr += l_read_name - 1;
+ txt_ptr += sprintf(txt+txt_ptr, "\t%d", flag_nc >> 16);
+ if(ref_id < 0){
+ strcpy(txt+txt_ptr, "\t*\t0\t0");
+ txt_ptr += 6;
+ }else txt_ptr += sprintf(txt+txt_ptr, "\t%s\t%d\t%d", sambam_chro_table[ref_id].chro_name, pos + 1, (bin_mq_nl >> 8 & 0xff));
+
+ int cigar_ops = flag_nc & 0xffff;
+ if(cigar_ops < 1){
+ strcpy(txt+txt_ptr, "\t*");
+ txt_ptr += 2;
+ }else{
+ int x1;
+ strcpy(txt+txt_ptr, "\t");
+ txt_ptr++;
+ for(x1=0; x1 < cigar_ops; x1++){
+ unsigned int cigar_sec;
+ memcpy(&cigar_sec, bin + 36 + l_read_name + 4 * x1 , 4);
+ txt_ptr += sprintf(txt+txt_ptr, "%u%c", cigar_sec >> 4 , cigar_op_char( cigar_sec & 15 ));
}
- print_in_box(80,0,0," Below are the two reads that are not properly paired:");
- print_read_wrapping(thread_context -> line_buffer1,0);
- print_read_wrapping(thread_context -> line_buffer2,1);
+ }
+ if(next_refID < 0)
+ txt_ptr += sprintf(txt+txt_ptr, "\t*\t0\t%d", tlen);
+ else txt_ptr += sprintf(txt+txt_ptr, "\t%s\t%d\t%d", sambam_chro_table[next_refID].chro_name, next_pos + 1, tlen);
+ strcpy(txt+txt_ptr, "\tN\tI");
+ txt_ptr += 4;
+
+ int bin_ptr = 36 + l_read_name + 4 * cigar_ops + l_seq + (l_seq+1)/2;
+
+ while(bin_ptr < block_len + 4){
+ char tag_name[3];
+ tag_name[0]=bin[bin_ptr];
+ tag_name[1]=bin[bin_ptr+1];
+ tag_name[2]=0;
+
+ char tagtype = bin[bin_ptr+2];
+ int delta = 0;
+ int tmpi = 0;
+ if(tagtype == 'i' || tagtype == 'I'){
+ delta = 4;
+ memcpy(&tmpi, bin + bin_ptr + 3, 4);
+ txt_ptr += sprintf(txt+txt_ptr, "\t%s:i:%d", tag_name,tmpi);
+ }else if(tagtype == 's' || tagtype == 'S'){
+ delta = 2;
+ memcpy(&tmpi, bin + bin_ptr + 3, 2);
+ txt_ptr += sprintf(txt+txt_ptr, "\t%s:i:%d", tag_name,tmpi);
+ }else if(tagtype == 'c' || tagtype == 'C'){
+ delta = 1;
+ memcpy(&tmpi, bin + bin_ptr + 3, 1);
+ txt_ptr += sprintf(txt+txt_ptr, "\t%s:i:%d", tag_name,tmpi);
+ }else if(tagtype == 'A'){
+ delta = 1;
+ txt_ptr += sprintf(txt+txt_ptr, "\t%s:%c:%c", tag_name, tagtype, *(bin + bin_ptr + 3));
+ }else if(tagtype == 'f')
+ delta = 4;
+ else if(tagtype == 'Z' ||tagtype == 'H'){
+ txt_ptr += sprintf(txt+txt_ptr, "\t%s:%c", tag_name, tagtype);
+ while(bin[bin_ptr + 3+delta]){
+ *(txt+txt_ptr) = bin[bin_ptr + 3+delta];
+ txt_ptr ++;
+ delta ++;
+ }
+ *(txt+txt_ptr) = 0;
+ }else if(tagtype == 'B'){
+ char celltype = bin[bin_ptr + 4];
+ int cellitems ;
+ memcpy(&cellitems, bin + bin_ptr + 5, 4);
+ int celldelta = 1;
+ if(celltype == 's' || celltype == 'S') celldelta = 2;
+ else if(celltype == 'i' || celltype == 'I' || celltype == 'f') celldelta = 4;
+ delta = cellitems * celldelta;
+ }
+ bin_ptr += 3 + delta;
}
- if(0==(*this_noproperly_paired_added))thread_context -> unpaired_fragment_no++;
- (*this_noproperly_paired_added) = 1;
}
+int reverse_flag(int mf){
+ int ret = mf & 3;
+ if(mf & 4) ret |= 8;
+ if(mf & 8) ret |= 4;
+ if(mf & 0x10) ret |= 0x20;
+ if(mf & 0x20) ret |= 0x10;
-void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context,
- long * hits_indices1, unsigned short * hits_read_start_base1, short * hits_read_len1, int nhits1, unsigned short rl1,
- long * hits_indices2, unsigned short * hits_read_start_base2, short * hits_read_len2, int nhits2, unsigned short rl2,
- int fixed_fractional_count, char * read_name);
+ if(mf & 0x40) ret |= 0x80;
+ if(mf & 0x80) ret |= 0x40;
+ return ret;
+}
+
+void parse_bin(SamBam_Reference_Info * sambam_chro_table, char * bin, char * bin2, char ** read_name, int * flag, char ** chro, long * pos, int * mapq, char ** mate_chro, long * mate_pos, long * tlen, int * is_junction_read, int * cigar_sect, unsigned int * Starting_Chro_Points, unsigned short * Starting_Read_Points, unsigned short * Section_Read_Lengths, char ** ChroNames, char * Event_After_Section, int * NH_value){
+ int x1;
+ *cigar_sect = 0;
+ *NH_value = 1;
+ *flag = 0;
+ *is_junction_read = 0;
+ assert(bin||bin2);
+
+ if(bin){
+ (*read_name) = bin + 36;
+ memcpy(flag, bin + 16, 4);
+ int cigar_opts = (*flag) & 0xffff;
+ (*flag) = (*flag) >> 16;
+ int refID, mate_refID;
+ memcpy(&refID, bin + 4, 4);
+ if(refID >= 0){
+ /*if(sambam_chro_table[refID].chro_name < NULL + 0xfffff){
+ SUBREADprintf("DANGEROUS: PARSE: chro[%d] = %p, TABLE_PTR=%p\n", refID , sambam_chro_table[refID].chro_name, sambam_chro_table);
+ }*/
+ (*chro) = sambam_chro_table[refID].chro_name;
+ }
+ else (*chro) = NULL;
+ (*pos) = 0;
+ memcpy(pos, bin+8, 4);
+ (*pos) ++;
+
+ memcpy(mapq, bin+12, 4);
+ int l_read_name = (*mapq)& 0xff;
+ (*mapq) = ((*mapq)>>8)&0xff;
+
+ int seq_len;
+ memcpy(&seq_len, bin + 20,4);
+ memcpy(&mate_refID, bin+24, 4);
+ if(mate_refID>=0){
+ /*if(sambam_chro_table[mate_refID].chro_name < NULL + 0xfffff){
+ SUBREADprintf("DANGEROUS: PARSE: matechro[%d] = %p, TABLE_PTR=%p\n", mate_refID , sambam_chro_table[mate_refID].chro_name, sambam_chro_table);
+ }*/
+ (*mate_chro) = sambam_chro_table[mate_refID].chro_name;
+ }
+ else (*mate_chro) = NULL;
+
+ (*mate_pos)=0;
+ memcpy(mate_pos, bin+28, 4);
+ (*mate_pos)++;
+
+ (*tlen) = 0;
+ memcpy(tlen, bin+32, 4);
+
+ int * cigar_opt_ints = (int *)(bin + 36 + l_read_name);
+ unsigned int chro_cursor = (*pos), section_start_chro = (*pos);
+ unsigned short read_cursor = 0, this_section_length = 0, section_start_read = 0;
+ for(x1 = 0 ; x1 < cigar_opts; x1++){
+ int optype = cigar_opt_ints[x1]&0xf;
+ int optval = (cigar_opt_ints[x1]>>4)& 0xfffffff;
+ if(optype == 0){ // 'M'
+ chro_cursor += optval;
+ read_cursor += optval;
+ this_section_length += optval;
+/* }else if(optype == 1){ // 'I'
+ read_cursor += optval;
+ }else if(optype == 2){ // 'D'
+ chro_cursor += optval;
+*/ }else if(optype == 1 || optype == 2 || optype == 3){ // 'I', 'D' or 'N'
+ (*is_junction_read) = 1;
+ char event_char=0;
+ if(optype == 3) event_char = 'N';
+ if(optype == 2) event_char = 'D';
+ else if(optype == 1) event_char = 'I';
+
+ if( (*cigar_sect) < FC_CIGAR_PARSER_ITEMS){
+ Event_After_Section[*cigar_sect] = event_char;
+ Starting_Chro_Points[*cigar_sect] = section_start_chro;
+ Starting_Read_Points[*cigar_sect] = section_start_read;
+ Section_Read_Lengths[*cigar_sect] = this_section_length;
+ ChroNames[*cigar_sect] = (*chro);
+ (*cigar_sect)++;
+ }
+
+ if(optype == 2 || optype == 3)
+ chro_cursor += optval;
+ else read_cursor += optval;
+
+ section_start_chro = chro_cursor;
+ this_section_length = 0;
+ section_start_read = read_cursor;
+ }else if(optype == 4){ // 'S'
+ if(read_cursor==0)
+ {
+ read_cursor += optval;
+ section_start_read = read_cursor;
+ }
+ }
+ }
+ if(this_section_length>0){
+ // add new section
+ if( (*cigar_sect) < FC_CIGAR_PARSER_ITEMS){
+ Starting_Chro_Points[*cigar_sect] = section_start_chro;
+ Starting_Read_Points[*cigar_sect] = section_start_read;
+ Section_Read_Lengths[*cigar_sect] = this_section_length ;
+ ChroNames[*cigar_sect] = (*chro);
+ (*cigar_sect)++;
+ }
+ }
+
+ int bin_ptr = 36 + l_read_name + seq_len + (seq_len+1)/2 + 4 * cigar_opts;
+ int block_len;
+ memcpy(&block_len, bin, 4);
+ int found_NH = SAM_pairer_iterate_int_tags((unsigned char *)bin+bin_ptr, block_len + 4 - bin_ptr, "NH", NH_value);
+ if(!found_NH) *(NH_value) = 1;
+ //SUBREADprintf("FOUND=%d, NH=%d, TAG=%.*s\n", found_NH, *(NH_value), 3 , bin+bin_ptr);
+ }else{
+ (*read_name) = bin2 + 36;
+ int mate_flag;
+ memcpy(&mate_flag, bin2 + 16, 4);
+ mate_flag = mate_flag >> 16;
+ (*flag) = reverse_flag(mate_flag);
+
+ int refID, mate_refID;
+ memcpy(&refID, bin2 + 24, 4);
+ memcpy(&mate_refID, bin2 + 4, 4);
+ if(refID < 0) *chro = NULL;
+ else (*chro) = sambam_chro_table[refID].chro_name;
+
+ if(mate_refID < 0) *mate_chro = NULL;
+ else (*mate_chro) = sambam_chro_table[mate_refID].chro_name;
+
+ *pos=0;
+ memcpy(pos, bin2+28, 4);
+ (*pos)++;
+
+ *mate_pos=0;
+ memcpy(mate_pos, bin2+8, 4);
+ (*mate_pos)++;
+
+ (*tlen) = 0;
+ memcpy(tlen, bin2+32, 4);
+ (*tlen) = -(*tlen);
+ }
+}
+
+/*
+typedef struct {
+ char chromosome_name_left[CHROMOSOME_NAME_LENGTH + 1];
+ char chromosome_name_right[CHROMOSOME_NAME_LENGTH + 1];
+ unsigned int last_exon_base_left;
+ unsigned int first_exon_base_right;
+} fc_junction_info_t;
+
+*/
+int calc_junctions_from_cigarInts(fc_thread_global_context_t * global_context, int alignment_masks , int cigar_sections, unsigned int * Starting_Chro_Points, unsigned short * Starting_Read_Points, unsigned short * Section_Lengths, char ** ChroNames, char * Event_After_Section, fc_junction_info_t * junctions_current){
+ int x1, ret = 0;
+ unsigned int last_base_pos = Starting_Chro_Points[0] + Section_Lengths[0] - 1;
+ for(x1 = 1; x1 < cigar_sections; x1++){
+ if(Event_After_Section[x1-1] == 'N'){
+ unsigned int first_base_pos = Starting_Chro_Points[x1];
+ junctions_current[ret].last_exon_base_left = last_base_pos;
+ junctions_current[ret].first_exon_base_right = first_base_pos;
+ strcpy(junctions_current[ret].chromosome_name_left, ChroNames[x1]);
+ strcpy(junctions_current[ret].chromosome_name_right, ChroNames[x1]);
+ ret ++;
+ }
+
+ last_base_pos = Starting_Chro_Points[x1] + Section_Lengths[x1] - 1;
+ }
+ return ret;
+}
+
+void add_fragment_supported_junction( fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, fc_junction_info_t * supported_junctions1,
+ int njunc1, fc_junction_info_t * supported_junctions2, int njunc2);
+
+void process_line_junctions(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, char * bin1, char * bin2) {
+ fc_junction_info_t supported_junctions1[FC_CIGAR_PARSER_ITEMS], supported_junctions2[FC_CIGAR_PARSER_ITEMS];
+ int is_second_read, njunc1=0, njunc2=0, is_junction_read, cigar_sections;
+ int alignment_masks, mapping_qual, NH_value;
+
+ for(is_second_read = 0 ; is_second_read < 2; is_second_read++){
+ char * read_chr, *read_name, *mate_chr;
+ long read_pos, fragment_length = 0, mate_pos;
+ unsigned int Starting_Chro_Points[FC_CIGAR_PARSER_ITEMS];
+ unsigned short Starting_Read_Points[FC_CIGAR_PARSER_ITEMS];
+ unsigned short Section_Read_Lengths[FC_CIGAR_PARSER_ITEMS];
+ char * ChroNames[FC_CIGAR_PARSER_ITEMS];
+ char Event_After_Section[FC_CIGAR_PARSER_ITEMS];
+ if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
+
+ parse_bin(global_context -> sambam_chro_table, is_second_read?bin2:bin1, is_second_read?bin1:bin2 , &read_name, &alignment_masks , &read_chr, &read_pos, &mapping_qual, &mate_chr, &mate_pos, &fragment_length, &is_junction_read, &cigar_sections, Starting_Chro_Points, Starting_Read_Points, Section_Read_Lengths, ChroNames, Event_After_Section, &NH_value);
+ assert(cigar_sections <= FC_CIGAR_PARSER_ITEMS);
+
+ int * njunc_current = is_second_read?&njunc2:&njunc1;
+ fc_junction_info_t * junctions_current = is_second_read?supported_junctions2:supported_junctions1;
+ (*njunc_current) = calc_junctions_from_cigarInts(global_context, alignment_masks , cigar_sections, Starting_Chro_Points, Starting_Read_Points, Section_Read_Lengths, ChroNames, Event_After_Section, junctions_current);
+
+ if(0 && FIXLENstrcmp("HWI-ST212:219:C0C1TACXX:1:1101:13391:171460", read_name)==0){
+ SUBREADprintf("JUNC_FOUND_IN_READ OF %s : %d\n", read_name , *njunc_current);
+ }
+ }
+ if(njunc1 >0 || njunc2>0)
+ add_fragment_supported_junction(global_context, thread_context, supported_junctions1, njunc1, supported_junctions2, njunc2);
+
+}
+
+int process_pairer_output(void * pairer_vp, int thread_no, char * rname, char * bin1, char * bin2){
+ SAM_pairer_context_t * pairer = (SAM_pairer_context_t *) pairer_vp;
+ fc_thread_global_context_t * global_context = (fc_thread_global_context_t * )pairer -> appendix1;
+ fc_thread_thread_context_t * thread_context = global_context -> thread_contexts + thread_no;
+
+ /*if(bin1) convert_bin_to_read( bin1, thread_context -> line_buffer1 , global_context -> sambam_chro_table);
+ else make_dummy(rname, bin2, thread_context -> line_buffer1, global_context -> sambam_chro_table);
+ if(bin2) convert_bin_to_read( bin2, thread_context -> line_buffer2 , global_context -> sambam_chro_table );
+ else make_dummy(rname, bin1, thread_context -> line_buffer2, global_context -> sambam_chro_table);*/
+ process_line_buffer(global_context, thread_context, bin1, bin2);
+ if(global_context -> do_junction_counting){
+ process_line_junctions(global_context, thread_context, bin1, bin2);
+ }
+ return 0;
+}
-void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context)
+void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, char * bin1, char * bin2)
{
- char * read_chr, *read_1_chr = NULL, *tmp_tok_ptr= NULL, *CIGAR_str , *read_name = NULL, *read_name1 = NULL;
- long read_pos, fragment_length = 0, read_1_pos = 0;
+ char * read_chr, *read_name, *mate_chr;
+ long read_pos, fragment_length = 0, mate_pos;
unsigned int search_start = 0, search_end;
- int nhits1 = 0, nhits2 = 0, alignment_masks, search_block_id, search_item_id;
+ int nhits1 = 0, nhits2 = 0, alignment_masks, search_block_id, search_item_id, mapping_qual;
long * hits_indices1 = thread_context -> hits_indices1, * hits_indices2 = thread_context -> hits_indices2;
unsigned short * hits_read_start_base1 = thread_context -> hits_read_start_base1 , * hits_read_start_base2 = thread_context -> hits_read_start_base2;
short * hits_read_len1 = thread_context -> hits_read_len1, * hits_read_len2 = thread_context -> hits_read_len2;
unsigned short cigar_read_len1 = 0, cigar_read_len2 = 0;
+ int cigar_sections, is_junction_read;
+ unsigned int Starting_Chro_Points[FC_CIGAR_PARSER_ITEMS];
+ unsigned short Starting_Read_Points[FC_CIGAR_PARSER_ITEMS];
+ unsigned short Section_Read_Lengths[FC_CIGAR_PARSER_ITEMS];
+ char * ChroNames[FC_CIGAR_PARSER_ITEMS];
+ char Event_After_Section[FC_CIGAR_PARSER_ITEMS];
+
int is_second_read;
- int maximum_NH_value = 1;
+ int maximum_NH_value = 1, NH_value;
int skipped_for_exonic = 0;
int first_read_quality_score = 0;
- int this_noproperly_paired_added = 0;
thread_context->all_reads++;
//if(thread_context->all_reads>1000000) printf("TA=%llu\n%s\n",thread_context->all_reads, thread_context -> line_buffer1);
@@ -1132,39 +1929,9 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
- char * line = is_second_read? thread_context -> line_buffer2:thread_context -> line_buffer1;
-
- //printf("LINE_BUF=%s\n",line);
-
- read_name = strtok_r(line,"\t", &tmp_tok_ptr); // read name
- if(!read_name)return;
-
- if(is_second_read)
- {
- if(read_name)
- {
- int x1;
- for(x1=0; read_name[x1]; x1++)
- {
- if(read_name[x1]=='/')
- {
- read_name[x1]=0;
- read_name1[x1]=0;
- break;
- }
- }
- //printf("R1=%s; R2=%s\n",read_name,read_name1 );
- if(strcmp_slash(read_name,read_name1)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
- }
- else
- read_name1 = read_name;
-
- char * mask_str = strtok_r(NULL,"\t", &tmp_tok_ptr);
- if((!mask_str) || !isdigit(mask_str[0])) return;
+ parse_bin(global_context -> sambam_chro_table, is_second_read?bin2:bin1, is_second_read?bin1:bin2 , &read_name, &alignment_masks , &read_chr, &read_pos, &mapping_qual, &mate_chr, &mate_pos, &fragment_length, &is_junction_read, &cigar_sections, Starting_Chro_Points, Starting_Read_Points, Section_Read_Lengths, ChroNames, Event_After_Section, &NH_value);
- alignment_masks = atoi(mask_str);
+ // SUBREADprintf(" RNAME=%s\n", read_name);
if(is_second_read == 0)
{
@@ -1178,11 +1945,6 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Unmapped\t*\t*\n", read_name);
- if(global_context -> is_paired_end_mode_assign){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
return; // do nothing if a read is unmapped, or the first read in a pair of reads is unmapped.
}
}
@@ -1190,41 +1952,18 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(global_context -> is_paired_end_mode_assign && (!global_context ->is_SEPEmix_warning_shown)){
if(((!global_context -> is_paired_end_input_file) && ( alignment_masks & SAM_FLAG_PAIRED_TASK )) || ((global_context -> is_paired_end_input_file) && 0 == ( alignment_masks & SAM_FLAG_PAIRED_TASK ))){
print_in_box(85,0,0," %c[31mBoth single-end and paired-end reads were found.", 27);
+ //SUBREADprintf("BAD READ:%s, FLAG=%d\n", read_name, alignment_masks);
global_context ->is_SEPEmix_warning_shown = 1;
}
}
-
-
- read_chr = strtok_r(NULL,"\t", &tmp_tok_ptr);
- if(!read_chr) return;
- char * read_pos_str = strtok_r(NULL,"\t", &tmp_tok_ptr);
- if(!read_pos_str) return;
-
- read_pos = atoi(read_pos_str);
- if(read_pos < 1 && read_pos_str[0]!='0') return;
-
- char * mapping_qual_str = strtok_r(NULL,"\t", &tmp_tok_ptr);
-
- CIGAR_str = strtok_r(NULL,"\t", &tmp_tok_ptr);
- if(!CIGAR_str)
- continue;
-
if(global_context -> min_mapping_quality_score>0)
{
- int mapping_qual =atoi(mapping_qual_str);
-
//printf("SECOND=%d; FIRST=%d; THIS=%d; Q=%d\n", is_second_read, first_read_quality_score, mapping_qual, );
if(( mapping_qual < global_context -> min_mapping_quality_score && ! global_context -> is_paired_end_mode_assign)||( is_second_read && max( first_read_quality_score, mapping_qual ) < global_context -> min_mapping_quality_score))
{
thread_context->read_counters.unassigned_mappingquality ++;
- if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
if(global_context -> SAM_output_fp)
{
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MappingQuality\t*\tMapping_Quality=%d,%d\n", read_name, first_read_quality_score, mapping_qual);
@@ -1237,19 +1976,6 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
}
-
- long mate_pos = 0;
- char * mate_chr = NULL;
-
- if(is_second_read)
- {
- mate_chr = strtok_r(NULL,"\t", &tmp_tok_ptr);// mate_chr
- if(mate_chr[0]=='=') mate_chr = read_chr;
- char * mate_pos_str = strtok_r(NULL,"\t", &tmp_tok_ptr); // mate_pos
- mate_pos = atol(mate_pos_str);
-
- }
-
if(is_second_read == 0 && global_context -> is_paired_end_mode_assign &&
(global_context -> is_PE_distance_checked || global_context -> is_chimertc_disallowed)
)
@@ -1258,29 +1984,16 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(!is_half_mapped)
{
- char * mate_chrx = strtok_r(NULL,"\t", &tmp_tok_ptr); //get chr which the mate read is mapped to
- if(!mate_chrx) return;
- strtok_r(NULL,"\t", &tmp_tok_ptr);
- if(!tmp_tok_ptr) return;
- char * frag_len_str = strtok_r(NULL,"\t", &tmp_tok_ptr);
- if(!tmp_tok_ptr) return;
-
- fragment_length = abs(atoi(frag_len_str)); //get the fragment length
+ fragment_length = abs( fragment_length ); //get the fragment length
int is_first_read_negative_strand = (alignment_masks & SAM_FLAG_REVERSE_STRAND_MATCHED)?1:0;
int is_second_read_negative_strand = (alignment_masks & SAM_FLAG_MATE_REVERSE_STRAND_MATCHED)?1:0;
- if(mate_chrx[0]=='=' && is_first_read_negative_strand!=is_second_read_negative_strand)
- {
+ if(mate_chr == read_chr && is_first_read_negative_strand!=is_second_read_negative_strand) {
+ //^^^^^^^^^^^^^^^^^^^^ They are directly compared because they are both pointers in the same contig name table.
+ //
if(global_context -> is_PE_distance_checked && ((fragment_length > global_context -> max_paired_end_distance) || (fragment_length < global_context -> min_paired_end_distance)))
{
-
- if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
thread_context->read_counters.unassigned_fragmentlength ++;
if(global_context -> SAM_output_fp)
@@ -1292,13 +2005,6 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
{
if(global_context -> is_chimertc_disallowed)
{
-
- if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
thread_context->read_counters.unassigned_chimericreads ++;
if(global_context -> SAM_output_fp)
@@ -1309,21 +2015,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
}
- if(!tmp_tok_ptr) return;
-
-
// This filter has to be put here because the 0x400 FLAG is not about mapping but about sequencing.
// A unmapped read with 0x400 FLAG should be able to kill the mapped mate which may have no 0x400 FLAG.
if(global_context -> is_duplicate_ignored)
{
if(alignment_masks & SAM_FLAG_DUPLICATE)
{
- if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
thread_context->read_counters.unassigned_duplicate ++;
if(global_context -> SAM_output_fp)
fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Duplicate\t*\t*\n", read_name);
@@ -1335,64 +2032,25 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
if(SAM_FLAG_UNMAPPED & alignment_masks) continue;
- int NH_value = 1;
- char * NH_pos = strstr(tmp_tok_ptr,"\tNH:i:");
- if(NH_pos)
- {
- if(NH_pos[6]>'1' || isdigit(NH_pos[7]))
+ if( NH_value > 1 ) {
+ if(global_context -> is_multi_mapping_allowed == 0)
{
+ // now it is a NH>1 read!
+ // not allow multimapping -> discard!
+ thread_context->read_counters.unassigned_multimapping ++;
- if(is_second_read && read_1_chr)
- {
- if((strcmp(read_1_chr, mate_chr)!=0 || mate_pos!=read_1_pos) && read_1_chr[0] != '*' && mate_chr[0]!='*')
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
- else
- {
- read_1_chr = read_chr;
- read_1_pos = read_pos;
- }
+ if(global_context -> SAM_output_fp)
+ fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MultiMapping\t*\t*\n", read_name);
+ return;
+ }
+ }
- if(global_context -> is_multi_mapping_allowed == 0)
- {
- // now it is a NH>1 read!
- // not allow multimapping -> discard!
- thread_context->read_counters.unassigned_multimapping ++;
-
- if(global_context -> SAM_output_fp)
- fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MultiMapping\t*\t*\n", read_name);
-
- if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
- return;
- }
- }
- int nh_i, NHtmpi=0;
- for(nh_i = 6; nh_i < 15; nh_i++){
- char nch = NH_pos[nh_i];
- if(isdigit(nch)){
- NHtmpi = NHtmpi * 10 + (nch-'0');
- }else{break; }
- }
- NH_value = NHtmpi;
- }
-
-
- maximum_NH_value = max(maximum_NH_value, NH_value);
+ maximum_NH_value = max(maximum_NH_value, NH_value);
// if a pair of reads have one secondary, the entire fragment is seen as secondary.
if((alignment_masks & SAM_FLAG_SECONDARY_MAPPING) && (global_context -> is_multi_mapping_allowed == ALLOW_PRIMARY_MAPPING))
{
- if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
thread_context->read_counters.unassigned_secondary ++;
if(global_context -> SAM_output_fp)
@@ -1412,118 +2070,114 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
is_fragment_negative_strand = global_context -> is_first_read_reversed?(!is_this_negative_strand):is_this_negative_strand;
}
- fc_chromosome_index_info * this_chro_info = HashTableGet(global_context -> exontable_chro_table, read_chr);
- if(this_chro_info == NULL)
- {
- if(global_context -> annot_chro_name_alias_table)
- {
- char * anno_chro_name = HashTableGet( global_context -> annot_chro_name_alias_table , read_chr);
- if(anno_chro_name)
- this_chro_info = HashTableGet(global_context -> exontable_chro_table, anno_chro_name);
- }
- if(this_chro_info == NULL && memcmp(read_chr, "chr", 3)==0)
- {
- this_chro_info = HashTableGet(global_context -> exontable_chro_table, read_chr+3);
- }
- //printf("NL=%s, CI=%p\n", (read_chr), this_chro_info);
- if(this_chro_info == NULL && strlen(read_chr)<=2)
- {
- strcpy(thread_context -> chro_name_buff, "chr");
- strcpy(thread_context -> chro_name_buff+3, read_chr);
- this_chro_info = HashTableGet(global_context -> exontable_chro_table, thread_context -> chro_name_buff);
- }
- }
-
- if(this_chro_info)
- {
- int nhits = 0;
+ int nhits = 0;
- int cigar_section_id, cigar_sections, is_junction_read = 0;
- int Starting_Chro_Points[FC_CIGAR_PARSER_ITEMS];
- unsigned short Starting_Read_Points[FC_CIGAR_PARSER_ITEMS];
- unsigned short Section_Lengths[FC_CIGAR_PARSER_ITEMS];
- long * hits_indices = (is_second_read?hits_indices2:hits_indices1);
- unsigned short * hits_read_start_base = is_second_read?hits_read_start_base2:hits_read_start_base1;
- short * hits_read_len = is_second_read?hits_read_len2:hits_read_len1;
- unsigned short * cigar_read_len = is_second_read?&cigar_read_len2:&cigar_read_len1;
+ int cigar_section_id;
+ long * hits_indices = (is_second_read?hits_indices2:hits_indices1);
+ unsigned short * hits_read_start_base = is_second_read?hits_read_start_base2:hits_read_start_base1;
+ short * hits_read_len = is_second_read?hits_read_len2:hits_read_len1;
+ unsigned short * cigar_read_len = is_second_read?&cigar_read_len2:&cigar_read_len1;
- cigar_sections = RSubread_parse_CIGAR_string(CIGAR_str, Starting_Chro_Points, Starting_Read_Points, Section_Lengths, &is_junction_read);
+ (*cigar_read_len) = Starting_Read_Points[cigar_sections-1] + Section_Read_Lengths[cigar_sections-1];
- (*cigar_read_len) = Starting_Read_Points[cigar_sections-1] + Section_Lengths[cigar_sections-1];
+ if(is_junction_read || !global_context->is_split_alignments_only)
+ {
+ //#warning "=================== COMMENT THESE 2 LINES ================================"
+ //for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
+ // SUBREADprintf("BCCC: %llu , sec[%d] %s: %u ~ %u ; secs=%d ; flags=%d ; second=%d\n", read_pos, cigar_section_id , ChroNames[cigar_section_id] , Starting_Chro_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections, alignment_masks, is_second_read);
- if(is_junction_read || !global_context->is_split_alignments_only)
+ if(global_context -> reduce_5_3_ends_to_one)
{
-
- //#warning "=================== COMMENT THESE 2 LINES ================================"
- //for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
- // SUBREADprintf("BCCC: %llu , sec[%d] %d ~ %d ; secs=%d ; flags=%d ; second=%d\n", read_pos, cigar_section_id , Starting_Chro_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections, alignment_masks, is_second_read);
- if(global_context -> reduce_5_3_ends_to_one)
+ if((REDUCE_TO_5_PRIME_END == global_context -> reduce_5_3_ends_to_one) + is_this_negative_strand == 1) // reduce to 5' end (small coordinate if positive strand / large coordinate if negative strand)
{
- if((REDUCE_TO_5_PRIME_END == global_context -> reduce_5_3_ends_to_one) + is_this_negative_strand == 1) // reduce to 5' end (small coordinate if positive strand / large coordinate if negative strand)
+ Section_Read_Lengths[0]=1;
+ }
+ else
+ {
+ Starting_Chro_Points[0] = Starting_Chro_Points[cigar_sections-1] + Section_Read_Lengths[cigar_sections-1] - 1;
+ Section_Read_Lengths[0]=1;
+ }
+
+ cigar_sections = 1;
+ }
+
+ // Extending the reads to the 3' and 5' ends. (from the read point of view)
+ if(global_context -> five_end_extension)
+ {
+ if(is_this_negative_strand){
+ Section_Read_Lengths [cigar_sections - 1] += global_context -> five_end_extension;
+ }else{
+ //SUBREADprintf("5-end extension: %d [%d]\n", Starting_Chro_Points[0], Section_Lengths[0]);
+ if( read_pos > global_context -> five_end_extension)
{
- Section_Lengths[0]=1;
+ Section_Read_Lengths [0] += global_context -> five_end_extension;
+ Starting_Chro_Points [0] -= global_context -> five_end_extension;
}
else
{
- Starting_Chro_Points[0] = Starting_Chro_Points[cigar_sections-1] + Section_Lengths[cigar_sections-1] - 1;
- Section_Lengths[0]=1;
+ Section_Read_Lengths [0] += read_pos-1;
+ Starting_Chro_Points [0] -= read_pos-1;
}
-
- cigar_sections = 1;
}
+ }
- // Extending the reads to the 3' and 5' ends. (from the read point of view)
- if(global_context -> five_end_extension)
- {
- if(is_this_negative_strand){
- Section_Lengths [cigar_sections - 1] += global_context -> five_end_extension;
- }else{
- //SUBREADprintf("5-end extension: %d [%d]\n", Starting_Chro_Points[0], Section_Lengths[0]);
- if( read_pos > global_context -> five_end_extension)
- {
- Section_Lengths [0] += global_context -> five_end_extension;
- Starting_Chro_Points [0] -= global_context -> five_end_extension;
- }
- else
- {
- Section_Lengths [0] += read_pos-1;
- Starting_Chro_Points [0] -= read_pos-1;
- }
+ if(global_context -> three_end_extension)
+ {
+
+ if(is_this_negative_strand){
+ if( read_pos > global_context -> three_end_extension)
+ {
+ Section_Read_Lengths [0] += global_context -> three_end_extension;
+ Starting_Chro_Points [0] -= global_context -> three_end_extension;
+ }
+ else
+ {
+ Section_Read_Lengths [0] += read_pos - 1;
+ Starting_Chro_Points [0] -= read_pos - 1;
}
}
+ else Section_Read_Lengths [cigar_sections - 1] += global_context -> three_end_extension;
- if(global_context -> three_end_extension)
- {
+ }
- if(is_this_negative_strand){
- if( read_pos > global_context -> three_end_extension)
- {
- Section_Lengths [0] += global_context -> three_end_extension;
- Starting_Chro_Points [0] -= global_context -> three_end_extension;
- }
- else
- {
- Section_Lengths [0] += read_pos - 1;
- Starting_Chro_Points [0] -= read_pos - 1;
- }
- }
- else Section_Lengths [cigar_sections - 1] += global_context -> three_end_extension;
+ for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
+ {
+ long section_begin_pos = Starting_Chro_Points[cigar_section_id];
+ long section_end_pos = Section_Read_Lengths[cigar_section_id] + section_begin_pos - 1;
- }
+
+ int start_reverse_table_index = section_begin_pos / REVERSE_TABLE_BUCKET_LENGTH;
+ int end_reverse_table_index = (1+section_end_pos) / REVERSE_TABLE_BUCKET_LENGTH;
- //#warning "=================== COMMENT THESE 2 LINES ================================"
- //for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
- // SUBREADprintf("ACCC: %llu , sec[%d] %u ~ %d ; secs=%d\n", read_pos, cigar_section_id, Starting_Chro_Points[cigar_section_id], Section_Lengths[cigar_section_id], cigar_sections);
+ /*if(ChroNames[cigar_section_id] < (char *)NULL + 0xfffff)
+ SUBREADprintf("DANGEROUS! RNAME=%s, CNAME=%p, LEN_P=%d, SECID=%d\n", read_name, ChroNames[cigar_section_id], Section_Read_Lengths[cigar_section_id], cigar_section_id);*/
- for(cigar_section_id = 0; cigar_section_id<cigar_sections; cigar_section_id++)
+ fc_chromosome_index_info * this_chro_info = HashTableGet(global_context -> exontable_chro_table, ChroNames[cigar_section_id]);
+ if(this_chro_info == NULL)
{
- long section_begin_pos = read_pos + Starting_Chro_Points[cigar_section_id];
- long section_end_pos = Section_Lengths[cigar_section_id] + section_begin_pos - 1;
+ if(global_context -> annot_chro_name_alias_table)
+ {
+ char * anno_chro_name = HashTableGet( global_context -> annot_chro_name_alias_table , ChroNames[cigar_section_id]);
+ if(anno_chro_name)
+ this_chro_info = HashTableGet(global_context -> exontable_chro_table, anno_chro_name);
+ }
+ if(this_chro_info == NULL && memcmp(ChroNames[cigar_section_id], "chr", 3)==0)
+ {
+ this_chro_info = HashTableGet(global_context -> exontable_chro_table, ChroNames[cigar_section_id]+3);
+ // SUBREADprintf("INQ: %p : '%s'\n", this_chro_info , ChroNames[cigar_section_id]+3);
+ }
+ if(this_chro_info == NULL && strlen(ChroNames[cigar_section_id])<=2)
+ {
+ strcpy(thread_context -> chro_name_buff, "chr");
+ strcpy(thread_context -> chro_name_buff+3, ChroNames[cigar_section_id]);
+ this_chro_info = HashTableGet(global_context -> exontable_chro_table, thread_context -> chro_name_buff);
+ }
+ }
-
- int start_reverse_table_index = section_begin_pos / REVERSE_TABLE_BUCKET_LENGTH;
- int end_reverse_table_index = (1+section_end_pos) / REVERSE_TABLE_BUCKET_LENGTH;
+ //SUBREADprintf("INF: %p : %s\n", this_chro_info , ChroNames[cigar_section_id]);
+ if(this_chro_info)
+ {
start_reverse_table_index = min(start_reverse_table_index, this_chro_info-> chro_possible_length / REVERSE_TABLE_BUCKET_LENGTH);
end_reverse_table_index = min(end_reverse_table_index, this_chro_info-> chro_possible_length / REVERSE_TABLE_BUCKET_LENGTH+ 1);
@@ -1588,30 +2242,22 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
}
}
}
- } else {
- if(global_context->is_split_alignments_only) // must be true.
- {
- skipped_for_exonic ++;
- if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_mode_assign) || (alignment_masks & 0x8))
- {
- if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
- char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
- if(strcmp_slash(read_name,read_name2)!=0)
- report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
- }
-
- if(global_context -> SAM_output_fp)
- fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Nonjunction\t*\t*\n", read_name);
-
- thread_context->read_counters.unassigned_nonjunction ++;
- return;
- }
- }
}
+ }else if(global_context->is_split_alignments_only) // must be true.
+ {
+ skipped_for_exonic ++;
+ if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_mode_assign) || (alignment_masks & 0x8))
+ {
+ if(global_context -> SAM_output_fp)
+ fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Nonjunction\t*\t*\n", read_name);
- if(is_second_read) nhits2 = nhits;
- else nhits1 = nhits;
+ thread_context->read_counters.unassigned_nonjunction ++;
+ return;
+ }
}
+
+ if(is_second_read) nhits2 = nhits;
+ else nhits1 = nhits;
} // loop for is_second_read
@@ -1659,12 +2305,54 @@ int count_bitmap_overlapping(char * x1_bitmap, unsigned short rl){
return ret;
}
+void add_fragment_supported_junction( fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, fc_junction_info_t * supported_junctions1,
+ int njunc1, fc_junction_info_t * supported_junctions2, int njunc2){
+ assert(njunc1 >= 0 && njunc1 < FC_CIGAR_PARSER_ITEMS -1 );
+ assert(njunc2 >= 0 && njunc2 < FC_CIGAR_PARSER_ITEMS -1 );
+ int x1,x2, in_total_junctions = njunc2 + njunc1;
+ for(x1 = 0; x1 < in_total_junctions; x1 ++){
+ fc_junction_info_t * j_one = (x1 >= njunc1)?supported_junctions2+(x1-njunc1):(supported_junctions1+x1);
+ if(j_one->chromosome_name_left[0]==0) continue;
+
+ for(x2 = x1+1; x2 < in_total_junctions ; x2 ++){
+ fc_junction_info_t * j_two = (x2 >= njunc1)?supported_junctions2+(x2-njunc1):(supported_junctions1+x2);
+ if(j_two->chromosome_name_left[0]==0) continue;
+ if(
+ j_one -> last_exon_base_left == j_two -> last_exon_base_left &&
+ j_one -> first_exon_base_right == j_two -> first_exon_base_right &&
+ strcmp(j_one -> chromosome_name_left, j_two -> chromosome_name_left) == 0 &&
+ strcmp(j_one -> chromosome_name_right, j_two -> chromosome_name_right) == 0
+ ) j_two -> chromosome_name_left[0]=0;
+ }
+
+ char * this_key = malloc(strlen(j_one->chromosome_name_left) + strlen(j_one->chromosome_name_right) + 36);
+ sprintf(this_key, "%s\t%u\t%s\t%u", j_one->chromosome_name_left, j_one -> last_exon_base_left, j_one->chromosome_name_right, j_one -> first_exon_base_right);
+ void * count_ptr = HashTableGet(thread_context -> junction_counting_table, this_key);
+ unsigned long long count_junc = count_ptr - NULL;
+ HashTablePut(thread_context -> junction_counting_table, this_key, NULL+count_junc + 1);
+
+// #warning "CONTINUE SHOULD BE REMOVED!!!!"
+// continue;
+
+ char * left_key = malloc(strlen(j_one->chromosome_name_left) + 16);
+ char * right_key = malloc(strlen(j_one->chromosome_name_right) + 16);
+ sprintf(left_key, "%s\t%u", j_one->chromosome_name_left, j_one -> last_exon_base_left);
+ sprintf(right_key, "%s\t%u", j_one->chromosome_name_right, j_one -> first_exon_base_right);
+
+ for( x2 = 0 ; x2 < 2 ; x2++ ){
+ char * lr_key = x2?right_key:left_key;
+ count_ptr = HashTableGet(thread_context -> splicing_point_table, lr_key);
+ count_junc = count_ptr - NULL;
+ HashTablePut(thread_context -> splicing_point_table, lr_key, NULL + count_junc + 1);
+ }
+ }
+}
void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context,
long * hits_indices1, unsigned short * hits_read_start_base1, short * hits_read_len1, int nhits1, unsigned short rl1,
long * hits_indices2, unsigned short * hits_read_start_base2, short * hits_read_len2, int nhits2, unsigned short rl2,
- int fixed_fractional_count, char * read_name)
-{
+ int fixed_fractional_count, char * read_name){
+//char * read_name, fc_junction_info_t * supported_junctions1, int njunc1, fc_junction_info_t * supported_junctions2, int njunc2)
if(global_context -> calculate_overlapping_lengths == 0 && nhits2+nhits1==1)
{
long hit_exon_id = nhits2?hits_indices2[0]:hits_indices1[0];
@@ -1735,12 +2423,14 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
char * x1_bitmap = read_coverage_bits + (MAX_READ_LENGTH /8 +1) * x1;
- //if(FIXLENstrcmp("V0112_0155:7:1308:19321:196983", read_name)==0)
+ //if(FIXLENstrcmp("V0112_0155:7:1207:5041:185928", read_name)==0)
// SUBREADprintf("CREATE bitmap: for x1 = %d, on read %d len = %d \n", x1, hits_read_start_base[x1], hits_read_len[x1]);
if(read_coverage_bits)
add_bitmap_overlapping(x1_bitmap, hits_read_start_base[x1], hits_read_len[x1]);
+ // if(FIXLENstrcmp("V0112_0155:7:1306:9527:74733", read_name)==0)
+ // SUBREADprintf("CREATE bitmap: for x1 = %d, on read %d len = %d \n", x1, hits_read_start_base[x1], hits_read_len[x1]);
long merge_key = global_context -> is_gene_level? global_context -> exontable_geneid[exon_no] : exon_no;
int x2;
for(x2=x1+1; x2<nhits; x2++)
@@ -1754,9 +2444,8 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
if(read_coverage_bits)
add_bitmap_overlapping(x1_bitmap, hits_read_start_base[x2], hits_read_len[x2]);
- //if(FIXLENstrcmp("V0112_0155:7:1308:19321:196983", read_name)==0)
+ //if(FIXLENstrcmp("V0112_0155:7:1306:9527:74733", read_name)==0)
// SUBREADprintf("APPEND bitmap: for x1 = %d, on read %d len = %d \n", x1, hits_read_start_base[x2], hits_read_len[x2]);
-
//TODO: change this part to uniquely-overlapping length. Not simply adding.
//hits_read_start_base[x1]+=hits_read_start_base[x2];
hits_indices[x2]=0x7fffffff;
@@ -1836,8 +2525,13 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
assert(read2_used[decision_table_no]<2);
assert(read1_used[decision_table_no]<2);
- if(global_context -> calculate_overlapping_lengths)
+ if(global_context -> calculate_overlapping_lengths){
+
+ //if(FIXLENstrcmp("V0112_0155:7:1306:9527:74733", read_name)==0)
+ // SUBREADprintf("READ DECIDE LEN=%d, += %d for %s\n", decision_total_lengths[decision_table_no], hits_read_len[hit_x1] , read_name);
+
decision_total_lengths[decision_table_no] += hits_read_len[hit_x1];
+ }
}
}
@@ -1847,6 +2541,8 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
int maximum_decision_no = 0;
for(decision_table_no = 0; decision_table_no < decision_number; decision_table_no++)
{
+ //if(FIXLENstrcmp("V0112_0155:7:1306:9527:74733", read_name)==0)
+ // SUBREADprintf("READ DECIDE LEN=%d for %s\n", decision_total_lengths[decision_table_no] , read_name);
if(global_context -> fragment_minimum_overlapping == 1 || decision_total_lengths[decision_table_no] >= global_context -> fragment_minimum_overlapping)
{
int this_decision_score = global_context -> use_overlapping_break_tie? decision_total_lengths[decision_table_no] :(read1_used[decision_table_no] + read2_used[decision_table_no]);
@@ -1884,7 +2580,6 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\t*\n", read_name, final_feture_name);
}
thread_context->read_counters.assigned_reads ++;
-
}else if(global_context -> is_multi_overlap_allowed) {
char final_feture_names[1000];
int assigned_no = 0, xk1;
@@ -1916,10 +2611,9 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
fprintf(global_context -> SAM_output_fp,"%s\tAssigned\t%s\tTotal=%d\n", read_name, final_feture_names, assigned_no);
}
thread_context->read_counters.assigned_reads ++;
-
} else {
if(global_context -> SAM_output_fp)
- fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Ambiguit\t*\tNumber_Of_Overlapped_Genes=%d\n", read_name, maximum_total_count);
+ fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Ambiguity\t*\tNumber_Of_Overlapped_Genes=%d\n", read_name, maximum_total_count);
thread_context->read_counters.unassigned_ambiguous ++;
}
@@ -1927,186 +2621,7 @@ void vote_and_add_count(fc_thread_global_context_t * global_context, fc_thread_t
}
}
-
-void * feature_count_worker(void * vargs)
-{
- void ** args = (void **) vargs;
-
- fc_thread_global_context_t * global_context = args[0];
- fc_thread_thread_context_t * thread_context = args[1];
-
- free(vargs);
-
-
- //printf("QQQ0:T%d\n", thread_context->thread_id);
- //Rprintf("QQQ1:T%d\n", thread_context->thread_id);
- //printf("QQQ2:T%d\n", thread_context->thread_id);
-
- if(global_context -> is_SAM_file)
- {
- //thread_context -> current_read_length1 = global_context -> read_length;
- //thread_context -> current_read_length2 = global_context -> read_length;
- while (1)
- {
- while(1)
- {
- int is_retrieved = 0;
- pthread_spin_lock(&thread_context->input_buffer_lock);
- if(thread_context->input_buffer_remainder)
- {
- int is_second_read;
- unsigned int buffer_read_bytes ;
- unsigned int buffer_read_ptr;
- if(thread_context->input_buffer_remainder <= thread_context->input_buffer_write_ptr)
- buffer_read_ptr = thread_context->input_buffer_write_ptr - thread_context->input_buffer_remainder;
- else
- buffer_read_ptr = thread_context->input_buffer_write_ptr + global_context->input_buffer_max_size - thread_context->input_buffer_remainder;
-
- //if(buffer_read_ptr>= global_context->input_buffer_max_size)
- // if(buffer_read_ptr>6*1024*1024) printf("REALLY BIG PTR:%u = %u + %u - %u\n", buffer_read_ptr, thread_context->input_buffer_write_ptr , global_context->input_buffer_max_size, thread_context->input_buffer_remainder);
-
- for(is_second_read = 0; is_second_read < (global_context->is_paired_end_mode_assign ? 2:1); is_second_read++)
- {
- char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
- //printf("R=%u; WPTR=%u ;RPTR=%u\n", thread_context->input_buffer_remainder, thread_context->input_buffer_write_ptr, buffer_read_ptr);
- //if(buffer_read_ptr % 7 == 0)
- // fflush(stdout);
-
- for(buffer_read_bytes=0; ; buffer_read_bytes++)
- {
- //printf("%p + %d\n", thread_context->input_buffer, buffer_read_ptr);
- //if(buffer_read_ptr>6*1024*1024) printf("VERY BIG PTR:%u > %u\n", buffer_read_ptr , global_context->input_buffer_max_size);
- char nch = thread_context->input_buffer[buffer_read_ptr ++];
- curr_line_buff[buffer_read_bytes] = nch;
- if(buffer_read_ptr >= global_context->input_buffer_max_size)
- buffer_read_ptr = 0;
- if(nch=='\n' || buffer_read_bytes>2998){
- curr_line_buff[buffer_read_bytes+1]=0;
- curr_line_buff[buffer_read_bytes+2]=0;
- break;
- }
- }
-
- //printf("%s\n", curr_line_buff);
-
- //if(buffer_read_bytes + 1 > thread_context->input_buffer_remainder)
- // (*(int*)NULL) = 1;
- thread_context->input_buffer_remainder -= buffer_read_bytes + 1;
- }
- is_retrieved = 1;
-
- }
-
- pthread_spin_unlock(&thread_context->input_buffer_lock);
- if(global_context->is_all_finished && !is_retrieved) return NULL;
-
- if(is_retrieved) break;
- else
- usleep(tick_time);
- }
-
-
-
- process_line_buffer(global_context, thread_context);
-
- }
- }
- else
- { // if is BAM: decompress the chunk and process reads.
- char * PDATA = malloc(2*70000);
- SamBam_Alignment * aln = &thread_context->aln_buffer;
-
- //thread_context -> current_read_length1 = global_context -> read_length;
- //thread_context -> current_read_length2 = global_context -> read_length;
- while(1)
- {
- int PDATA_len = 0;
- while(1)
- {
- int is_retrieved = 0;
- PDATA_len = 0;
- //retrieve the next chunk.
-
- pthread_spin_lock(&thread_context->input_buffer_lock);
- if(thread_context->input_buffer_remainder)
- {
- assert(thread_context->input_buffer_remainder>4);
- unsigned int tail_bytes = global_context->input_buffer_max_size - thread_context -> chunk_read_ptr ;
- if(tail_bytes<4)
- {
- thread_context -> chunk_read_ptr = 0;
- thread_context -> input_buffer_remainder -= tail_bytes;
- memcpy(&PDATA_len, thread_context->input_buffer + thread_context -> chunk_read_ptr , 4);
- }
- else
- {
- memcpy(&PDATA_len, thread_context->input_buffer + thread_context -> chunk_read_ptr , 4);
- if(PDATA_len==0)
- {
- thread_context -> chunk_read_ptr = 0;
- thread_context -> input_buffer_remainder -= tail_bytes;
- memcpy(&PDATA_len, thread_context->input_buffer , 4);
- }
- }
- thread_context -> chunk_read_ptr+=4;
- thread_context -> input_buffer_remainder -= 4;
-
- //fprintf(stderr,"chunk_read_ptr=%d , input_buffer_remainder = %d\n", thread_context -> chunk_read_ptr , thread_context -> input_buffer_remainder);
- if(PDATA_len<0 || PDATA_len > 140000)
- {
- SUBREADprintf("THREAD ABNORMALLY QUIT\n");
- return NULL;
- }
-
- memcpy(PDATA, thread_context -> input_buffer + thread_context -> chunk_read_ptr , PDATA_len);
- thread_context -> chunk_read_ptr += PDATA_len;
- thread_context -> input_buffer_remainder -= PDATA_len;
-
- if( PDATA_len > 0 )
- is_retrieved = 1;
- }
-
-
- pthread_spin_unlock(&thread_context->input_buffer_lock);
- if(global_context->is_all_finished && !is_retrieved){
- free(PDATA);
- return NULL;
- }
-
- if(is_retrieved) break;
- else
- usleep(tick_time);
-
- }
-
- // convert binary reads into sam lines and process;
- int processed_reads = 0, PDATA_ptr = 0;
- while(PDATA_ptr < PDATA_len)
- {
- int is_second_read;
- for(is_second_read = 0; is_second_read <= global_context -> is_paired_end_mode_assign; is_second_read++)
- {
- int binary_read_len, local_PDATA_ptr = PDATA_ptr;
- char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
-
- memcpy(&binary_read_len, PDATA + PDATA_ptr, 4);
- int ret = PBam_chunk_gets(PDATA, &local_PDATA_ptr, PDATA_len, global_context -> sambam_chro_table, curr_line_buff, 2999, aln,0);
- //printf("LL=%s\n", curr_line_buff);
- if(ret<0)
- SUBREADprintf("READ DECODING ERROR!\n");
-
- PDATA_ptr += 4+binary_read_len;
- processed_reads++;
- }
-
- process_line_buffer(global_context, thread_context);
- //printf("LE\n\n");
- }
- }
- }
-}
-
-void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_count_type_t * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter)
+void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_count_type_t * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter, HashTable * junction_global_table, HashTable * splicing_global_table)
{
int xk1, xk2;
@@ -2115,6 +2630,8 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_c
(*nreads_mapped_to_exon)=0;
+ SAM_pairer_destroy(&global_context -> read_pairer);
+
for(xk1=0; xk1<global_context-> thread_number; xk1++)
{
for(xk2=0; xk2<global_context -> exontable_exons; xk2++)
@@ -2148,7 +2665,44 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_c
my_read_counter->unassigned_nonjunction += global_context -> thread_contexts[xk1].read_counters.unassigned_nonjunction;
my_read_counter->unassigned_duplicate += global_context -> thread_contexts[xk1].read_counters.unassigned_duplicate;
my_read_counter->assigned_reads += global_context -> thread_contexts[xk1].read_counters.assigned_reads;
-
+
+ if(global_context -> do_junction_counting){
+ int bucket_i;
+ for(bucket_i = 0 ; bucket_i < global_context -> thread_contexts[xk1].junction_counting_table -> numOfBuckets; bucket_i++){
+ KeyValuePair * cursor;
+ cursor = global_context -> thread_contexts[xk1].junction_counting_table -> bucketArray[bucket_i];
+ while(cursor){
+ char * junckey = (char *) cursor -> key;
+
+ void * globval = HashTableGet(junction_global_table, junckey);
+ char * new_key = malloc(strlen(junckey)+1);
+ strcpy(new_key, junckey);
+ globval += (cursor -> value - NULL);
+ HashTablePut(junction_global_table, new_key, globval);
+ // new_key will be freed when it is replaced next time or when the global table is destroyed.
+
+ cursor = cursor->next;
+ }
+ }
+
+ for(bucket_i = 0 ; bucket_i < global_context -> thread_contexts[xk1].splicing_point_table -> numOfBuckets; bucket_i++){
+ KeyValuePair * cursor;
+ cursor = global_context -> thread_contexts[xk1].splicing_point_table -> bucketArray[bucket_i];
+ while(cursor){
+ char * junckey = (char *) cursor -> key;
+ void * globval = HashTableGet(splicing_global_table, junckey);
+ char * new_key = malloc(strlen(junckey)+1);
+ strcpy(new_key, junckey);
+
+ //if(xk1>0)
+ //SUBREADprintf("MERGE THREAD-%d : %s VAL=%u, ADD=%u\n", xk1, junckey, globval - NULL, cursor -> value - NULL);
+
+ globval += (cursor -> value - NULL);
+ HashTablePut(splicing_global_table, new_key, globval);
+ cursor = cursor->next;
+ }
+ }
+ }
}
char pct_str[10];
@@ -2205,7 +2759,7 @@ HashTable * load_alias_table(char * fname)
return ret;
}
-void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
+void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
{
global_context -> input_buffer_max_size = buffer_size;
@@ -2233,6 +2787,7 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
global_context -> do_not_sort = is_not_sort;
global_context -> is_SAM_file = is_SAM;
global_context -> use_fraction_multi_mapping = use_fraction_multimapping;
+ global_context -> do_junction_counting = do_junction_cnt;
global_context -> thread_number = threads;
global_context -> min_mapping_quality_score = min_map_qual_score;
@@ -2241,7 +2796,6 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
global_context -> unistr_buffer_space = malloc(global_context -> unistr_buffer_size);
global_context -> annot_chro_name_alias_table = NULL;
global_context -> cmd_rebuilt = cmd_rebuilt;
- global_context -> is_input_file_resort_needed = is_input_file_resort_needed;
global_context -> feature_block_size = feature_block_size;
global_context -> five_end_extension = fiveEndExtension;
global_context -> three_end_extension = threeEndExtension;
@@ -2261,7 +2815,7 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
global_context -> read_counters.unassigned_nonjunction=0;
global_context -> read_counters.unassigned_duplicate=0;
global_context -> read_counters.assigned_reads=0;
-
+
if(alias_file_name && alias_file_name[0])
{
strcpy(global_context -> alias_file_name,alias_file_name);
@@ -2323,6 +2877,9 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
global_context -> exontable_block_end_index = et_bk_end_index;
global_context -> exontable_block_max_end = et_bk_max_end;
global_context -> exontable_block_min_start = et_bk_min_start;
+ global_context -> sambam_chro_table_items = 0;
+ global_context -> sambam_chro_table = NULL;
+ pthread_spin_init(&global_context->sambam_chro_table_lock, PTHREAD_PROCESS_PRIVATE);
global_context -> is_all_finished = 0;
global_context -> thread_contexts = malloc(sizeof(fc_thread_thread_context_t) * global_context -> thread_number);
@@ -2338,8 +2895,6 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
global_context -> thread_contexts[xk1].count_table = calloc(sizeof(read_count_type_t), et_exons);
global_context -> thread_contexts[xk1].nreads_mapped_to_exon = 0;
global_context -> thread_contexts[xk1].all_reads = 0;
- global_context -> thread_contexts[xk1].line_buffer1 = malloc(global_context -> line_length + 2);
- global_context -> thread_contexts[xk1].line_buffer2 = malloc(global_context -> line_length + 2);
global_context -> thread_contexts[xk1].chro_name_buff = malloc(CHROMOSOME_NAME_LENGTH);
global_context -> thread_contexts[xk1].strm_buffer = malloc(sizeof(z_stream));
if(global_context -> calculate_overlapping_lengths)
@@ -2359,15 +2914,30 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
global_context -> thread_contexts[xk1].read_counters.unassigned_nonjunction = 0;
global_context -> thread_contexts[xk1].read_counters.unassigned_duplicate = 0;
+ if(global_context -> do_junction_counting)
+ {
+ global_context -> thread_contexts[xk1].junction_counting_table = HashTableCreate(131317);
+ HashTableSetHashFunction(global_context -> thread_contexts[xk1].junction_counting_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(global_context -> thread_contexts[xk1].junction_counting_table, free, NULL);
+ HashTableSetKeyComparisonFunction(global_context -> thread_contexts[xk1].junction_counting_table, fc_strcmp_chro);
+
+ global_context -> thread_contexts[xk1].splicing_point_table = HashTableCreate(131317);
+ HashTableSetHashFunction(global_context -> thread_contexts[xk1].splicing_point_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(global_context -> thread_contexts[xk1].splicing_point_table, free, NULL);
+ HashTableSetKeyComparisonFunction(global_context -> thread_contexts[xk1].splicing_point_table, fc_strcmp_chro);
+ }
+
if(!global_context -> thread_contexts[xk1].count_table) return 1;
void ** thread_args = malloc(sizeof(void *)*2);
thread_args[0] = global_context;
thread_args[1] = & global_context -> thread_contexts[xk1];
-
- if(global_context -> thread_number>1 || ! global_context -> is_SAM_file)
- pthread_create(&global_context -> thread_contexts[xk1].thread_object, NULL, feature_count_worker, thread_args);
}
+ char rand_prefix[200];
+ sprintf(rand_prefix, "./temp-core-%06u-%08X.sam", getpid(), rand());
+
+ SAM_pairer_create(&global_context -> read_pairer, global_context -> thread_number , 64, !global_context-> is_SAM_file, 1, !global_context -> is_paired_end_mode_assign, global_context ->is_paired_end_mode_assign && global_context -> do_not_sort ,0, global_context -> input_file_name, process_pairer_reset, process_pairer_header, process_pairer_output, rand_prefix, global_context);
+
return 0;
}
@@ -2386,74 +2956,24 @@ void fc_thread_destroy_thread_context(fc_thread_global_context_t * global_contex
if(global_context -> thread_contexts[xk1].read_coverage_bits)
free(global_context -> thread_contexts[xk1].read_coverage_bits);
free(global_context -> thread_contexts[xk1].count_table);
- free(global_context -> thread_contexts[xk1].line_buffer1);
- free(global_context -> thread_contexts[xk1].line_buffer2);
free(global_context -> thread_contexts[xk1].input_buffer);
free(global_context -> thread_contexts[xk1].chro_name_buff);
free(global_context -> thread_contexts[xk1].strm_buffer);
pthread_spin_destroy(&global_context -> thread_contexts[xk1].input_buffer_lock);
+ if(global_context -> do_junction_counting){
+ HashTableDestroy(global_context -> thread_contexts[xk1].junction_counting_table);
+ HashTableDestroy(global_context -> thread_contexts[xk1].splicing_point_table);
+ }
}
+
+ pthread_spin_destroy(&global_context->sambam_chro_table_lock);
free(global_context -> thread_contexts);
}
void fc_thread_wait_threads(fc_thread_global_context_t * global_context)
{
- int xk1;
- for(xk1=0; xk1<global_context-> thread_number; xk1++)
- pthread_join(global_context -> thread_contexts[xk1].thread_object, NULL);
-}
-
-int resort_input_file(fc_thread_global_context_t * global_context)
-{
- char * temp_file_name = malloc(300), * fline = malloc(3000);
- SamBam_FILE * sambam_reader ;
-
- if(!global_context->redo)
- print_in_box(80,0,0," Resort the input file ...");
- sprintf(temp_file_name, "./temp-core-%06u-%08X.sam", getpid(), rand());
- sambam_reader = SamBam_fopen(global_context-> input_file_name, global_context-> is_SAM_file?SAMBAM_FILE_SAM:SAMBAM_FILE_BAM);
-
- if(!sambam_reader){
- SUBREADprintf("Unable to open %s.\n", global_context-> input_file_name);
- return -1;
- }
-
- SAM_sort_writer writer;
- int ret = sort_SAM_create(&writer, temp_file_name, ".");
- if(ret)
- {
- SUBREADprintf("Unable to sort input file because temporary file '%s' cannot be created.\n", temp_file_name);
- return -1;
- }
- int is_read_len_warned = 0;
-
- while(1)
- {
- char * is_ret = SamBam_fgets(sambam_reader, fline, 2999, 1);
- if(!is_ret) break;
- int ret = sort_SAM_add_line(&writer, fline, strlen(fline));
- if(ret<0)
- {
- if(!is_read_len_warned)
- print_in_box(80,0,0,"WARNING: reads with very long names were found.");
- is_read_len_warned = 1;
- // break;
- }
- //printf("N1=%llu\n", writer.unpaired_reads);
- }
-
- sort_SAM_finalise(&writer);
- print_in_box(80,0,0," %llu read%s ha%s missing mates.", writer.unpaired_reads, writer.unpaired_reads>1?"s":"", writer.unpaired_reads>1?"ve":"s");
- print_in_box(80,0,0," Input was converted to a format accepted by featureCounts.");
-
- SamBam_fclose(sambam_reader);
- strcpy(global_context-> input_file_name, temp_file_name);
- global_context->is_SAM_file = 1;
- free(temp_file_name);
- free(fline);
- return 0;
+ SAM_pairer_run(&global_context -> read_pairer);
}
-
void BUFstrcat(char * targ, char * src, char ** buf){
int srclen = strlen(src);
if( (*buf) == NULL){
@@ -2784,6 +3304,7 @@ static struct option long_options[] =
{"donotsort", no_argument, 0, 0},
{"fraction", no_argument, 0, 0},
{"order", required_argument, 0, 'S'},
+ {"fasta", required_argument, 0, 0},
{"largestOverlap", no_argument, 0,0},
{0, 0, 0, 0}
};
@@ -2792,174 +3313,394 @@ void print_usage()
{
SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION);
- SUBREADputs("\nUsage: featureCounts [options] -a <annotation_file> -o <output_file> input_file1 [input_file2] ... \n");
- SUBREADputs(" Required parameters:\n");
- SUBREADputs(" -a <input>\tGive the name of the annotation file. The program assumes");
- SUBREADputs(" \tthat the provided annotation file is in GTF format. Use -F");
- SUBREADputs(" \toption to specify other annotation formats.");
- SUBREADputs(" ");
- SUBREADputs(" -o <input>\tGive the name of the output file. The output file contains");
- SUBREADputs(" \tthe number of reads assigned to each meta-feature (or each");
- SUBREADputs(" \tfeature if -f is specified). A meta-feature is the aggregation");
- SUBREADputs(" \tof features, grouped by using gene identifiers. Please refer");
- SUBREADputs(" \tto the users guide for more details.");
- SUBREADputs(" ");
- SUBREADputs(" input_files\tGive the names of input read files that include the read");
- SUBREADputs(" \tmapping results. Format of input files is automatically");
- SUBREADputs(" \tdetermined (SAM or BAM). Paired-end reads will be");
- SUBREADputs(" \tautomatically re-ordered if it is found that reads from the");
- SUBREADputs(" \tsame pair are not adjacent to each other. Multiple files can");
- SUBREADputs(" \tbe provided at the same time.");
- SUBREADputs(" ");
- SUBREADputs(" Optional parameters:");
- SUBREADputs(" ");
- SUBREADputs(" -A <input>\tSpecify the name of a file including aliases of chromosome");
- SUBREADputs(" \tnames. The file should be a comma delimited text file that");
- SUBREADputs(" \tincludes two columns. The first column gives the chromosome");
- SUBREADputs(" \tnames used in the annotation and the second column gives the");
- SUBREADputs(" \tchromosome names used by reads. This file should not contain");
- SUBREADputs(" \theader lines. Names included in this file are case sensitive.");
- SUBREADputs(" ");
- SUBREADputs(" -F <input>\tSpecify the format of the annotation file. Acceptable formats");
- SUBREADputs(" \tinclude `GTF' and `SAF'. `GTF' by default. Please refer to the");
- SUBREADputs(" \tusers guide for SAF annotation format.");
- SUBREADputs(" ");
- SUBREADputs(" -t <input>\tSpecify the feature type. Only rows which have the matched");
- SUBREADputs(" \tmatched feature type in the provided GTF annotation file");
- SUBREADputs(" \twill be included for read counting. `exon' by default.");
- SUBREADputs(" ");
- SUBREADputs(" -g <input>\tSpecify the attribute type used to group features (eg. exons)");
- SUBREADputs(" \tinto meta-features (eg. genes), when GTF annotation is provided.");
- SUBREADputs(" \t`gene_id' by default. This attribute type is usually the gene");
- SUBREADputs(" \tidentifier. This argument is useful for the meta-feature level");
- SUBREADputs(" \tsummarization.");
- SUBREADputs(" ");
- SUBREADputs(" -f \tIf specified, read summarization will be performed at the ");
- SUBREADputs(" \tfeature level (eg. exon level). Otherwise, it is performed at");
- SUBREADputs(" \tmeta-feature level (eg. gene level).");
- SUBREADputs(" ");
- SUBREADputs(" -O \tIf specified, reads (or fragments if -p is specified) will");
- SUBREADputs(" \tbe allowed to be assigned to more than one matched meta-");
- SUBREADputs(" \tfeature (or feature if -f is specified). ");
- SUBREADputs(" ");
- SUBREADputs(" -s <int> \tIndicate if strand-specific read counting should be performed.");
- SUBREADputs(" \tIt has three possible values: 0 (unstranded), 1 (stranded) and");
- SUBREADputs(" \t2 (reversely stranded). 0 by default.");
- SUBREADputs(" ");
- SUBREADputs(" -M \tIf specified, multi-mapping reads/fragments will be counted (ie.");
- SUBREADputs(" \ta multi-mapping read will be counted up to N times if it has N");
- SUBREADputs(" \treported mapping locations). The program uses the `NH' tag to");
- SUBREADputs(" \tfind multi-mapping reads.");
- SUBREADputs(" ");
- SUBREADputs(" -Q <int> \tThe minimum mapping quality score a read must satisfy in order");
- SUBREADputs(" \tto be counted. For paired-end reads, at least one end should");
- SUBREADputs(" \tsatisfy this criteria. 0 by default.");
- SUBREADputs(" ");
- SUBREADputs(" -T <int> \tNumber of the threads. 1 by default.");
- SUBREADputs(" ");
- SUBREADputs(" -v \tOutput version of the program.");
- SUBREADputs(" ");
- SUBREADputs(" -R \tOutput read counting result for each read/fragment. For each");
- SUBREADputs(" \tinput read file, read counting results for reads/fragments will");
- SUBREADputs(" \tbe saved to a tab-delimited file that contains four columns");
- SUBREADputs(" \tincluding read name, status(assigned or the reason if not");
- SUBREADputs(" \tassigned), name of target feature/meta-feature and number of");
- SUBREADputs(" \thits if the read/fragment is counted multiple times. Name of");
- SUBREADputs(" \tthe file is the same as name of the input read file except a");
- SUBREADputs(" \tsuffix `.featureCounts' is added.");
- SUBREADputs(" ");
- SUBREADputs(" --largestOverlap If specified, reads (or fragments) will be");
- SUBREADputs(" \tassigned to the target that has the largest number of overlapping");
- SUBREADputs(" \tbases.");
- SUBREADputs(" ");
- SUBREADputs(" --minOverlap <int> Specify the minimum required number of");
- SUBREADputs(" \toverlapping bases between a read (or a fragment) and a feature.");
- SUBREADputs(" \t1 by default. If a negative value is provided, the read will be");
- SUBREADputs(" \textended from both ends.");
- SUBREADputs(" ");
- SUBREADputs(" --readExtension5 <int> Reads are extended upstream by <int> bases from");
- SUBREADputs(" \ttheir 5' end.");
- SUBREADputs(" ");
- SUBREADputs(" --readExtension3 <int> Reads are extended upstream by <int> bases from");
- SUBREADputs(" \ttheir 3' end.");
- SUBREADputs(" ");
- SUBREADputs(" --read2pos <5:3> The read is reduced to its 5' most base or 3'");
- SUBREADputs(" \tmost base. Read summarization is then performed based on the");
- SUBREADputs(" \tsingle base which the read is reduced to.");
- SUBREADputs(" ");
- SUBREADputs(" --fraction\tIf specified, a fractional count 1/n will be generated for each");
- SUBREADputs(" \tmulti-mapping read, where n is the number of alignments (indica-");
- SUBREADputs(" \tted by 'NH' tag) reported for the read. This option must be used");
- SUBREADputs(" \ttogether with the '-M' option.");
- SUBREADputs(" ");
- SUBREADputs(" --primary \tIf specified, only primary alignments will be counted. Primary");
- SUBREADputs(" \tand secondary alignments are identified using bit 0x100 in the");
- SUBREADputs(" \tFlag field of SAM/BAM files. All primary alignments in a dataset");
- SUBREADputs(" \twill be counted no matter they are from multi-mapping reads or");
- SUBREADputs(" \tnot ('-M' is ignored). ");
- SUBREADputs(" ");
- SUBREADputs(" --ignoreDup If specified, reads that were marked as");
- SUBREADputs(" \tduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAM");
- SUBREADputs(" \tfile is used for identifying duplicate reads. In paired end");
- SUBREADputs(" \tdata, the entire read pair will be ignored if at least one end");
- SUBREADputs(" \tis found to be a duplicate read.");
- SUBREADputs(" ");
- SUBREADputs(" --countSplitAlignmentsOnly If specified, only split alignments (CIGAR");
- SUBREADputs(" \tstrings containing letter `N') will be counted. All the other");
- SUBREADputs(" \talignments will be ignored. An example of split alignments is");
- SUBREADputs(" \tthe exon-spanning reads in RNA-seq data.");
- SUBREADputs(" ");
- SUBREADputs(" Optional paired-end parameters:");
- SUBREADputs(" ");
- SUBREADputs(" -p \tIf specified, fragments (or templates) will be counted instead");
- SUBREADputs(" \tof reads. This option is only applicable for paired-end reads.");
- SUBREADputs(" \tThe two reads from the same fragment must be adjacent to each");
- SUBREADputs(" \tother in the provided SAM/BAM file.");
- SUBREADputs(" ");
- SUBREADputs(" -P \tIf specified, paired-end distance will be checked when assigning");
- SUBREADputs(" \tfragments to meta-features or features. This option is only");
- SUBREADputs(" \tapplicable when -p is specified. The distance thresholds should");
- SUBREADputs(" \tbe specified using -d and -D options.");
- SUBREADputs(" ");
- SUBREADputs(" -d <int> \tMinimum fragment/template length, 50 by default.");
- SUBREADputs(" ");
- SUBREADputs(" -D <int> \tMaximum fragment/template length, 600 by default.");
- SUBREADputs(" ");
- SUBREADputs(" -B \tIf specified, only fragments that have both ends ");
- SUBREADputs(" \tsuccessfully aligned will be considered for summarization.");
- SUBREADputs(" \tThis option is only applicable for paired-end reads.");
- SUBREADputs(" ");
- SUBREADputs(" -S <ff:fr:rf> Orientation of the two read from the same pair, 'fr' by");
- SUBREADputs(" \tby default.");
- SUBREADputs(" ");
- SUBREADputs(" -C \tIf specified, the chimeric fragments (those fragments that ");
- SUBREADputs(" \thave their two ends aligned to different chromosomes) will");
- SUBREADputs(" \tNOT be included for summarization. This option is only ");
- SUBREADputs(" \tapplicable for paired-end read data.");
- SUBREADputs(" ");
- SUBREADputs(" --donotsort If specified, paired end reads will not be reordered even if");
- SUBREADputs(" \treads from the same pair were found not to be next to each other");
- SUBREADputs(" \tin the input. ");
- SUBREADputs(" ");
+ SUBREADputs("Usage: featureCounts [options] -a <annotation_file> -o <output_file> input_file1 [input_file2] ... \n");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -a <string> Name of an annotation file. GTF format by default. See -F ");
+ SUBREADputs(" option for more formats.");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Name of the output file including read counts. A separate ");
+ SUBREADputs(" file including summary statistics of counting results is ");
+ SUBREADputs(" also included in the output (`<string>.summary')");
+ SUBREADputs("");
+ SUBREADputs(" input_files List of input files in BAM or SAM format. Users do not ");
+ SUBREADputs(" need to specify it is BAM or SAM.");
+ SUBREADputs("");
+ SUBREADputs("Optional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -A <string> Name of a comma delimited file including chromosome alias ");
+ SUBREADputs(" names used to match chromosome names used in annotation ");
+ SUBREADputs(" with those used in BAM/SAM input, if they are different. ");
+ SUBREADputs(" See Users Guide for file format.");
+ SUBREADputs("");
+ SUBREADputs(" -F <string> Specify format of provided annotation file. Acceptable ");
+ SUBREADputs(" formats include `GTF' and `SAF'. `GTF' by default. See ");
+ SUBREADputs(" Users Guide for description of SAF format.");
+ SUBREADputs("");
+ SUBREADputs(" -t <string> Specify feature type in GTF annotation. `exon' by ");
+ SUBREADputs(" default. Features used for read counting will be ");
+ SUBREADputs(" extracted from annotation using the provided value.");
+ SUBREADputs("");
+ SUBREADputs(" -g <string> Specify attribute type in GTF annotation. `gene_id' by ");
+ SUBREADputs(" default. Meta-features used for read counting will be ");
+ SUBREADputs(" extracted from annotation using the provided value.");
+ SUBREADputs("");
+ SUBREADputs(" -f Perform read counting at feature level (eg. counting ");
+ SUBREADputs(" reads for exons rather than genes).");
+ SUBREADputs("");
+ SUBREADputs(" -O Assign reads to all their overlapping meta-features (or ");
+ SUBREADputs(" features if -f is specified).");
+ SUBREADputs("");
+ SUBREADputs(" -s <int> Perform strand-specific read counting. Possible values: ");
+ SUBREADputs(" 0 (unstranded), 1 (stranded) and 2 (reversely stranded). ");
+ SUBREADputs(" 0 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -M Multi-mapping reads will also be counted. For a multi-");
+ SUBREADputs(" mapping read, all its reported alignments will be ");
+ SUBREADputs(" counted. The `NH' tag in BAM/SAM input is used to detect ");
+ SUBREADputs(" multi-mapping reads.");
+ SUBREADputs("");
+ SUBREADputs(" -R Output detailed assignment result for each read. A text ");
+ SUBREADputs(" file will be generated for each input file, including ");
+ SUBREADputs(" names of reads and meta-features/features reads were ");
+ SUBREADputs(" assigned to. See Users Guide for more details.");
+ SUBREADputs("");
+ SUBREADputs(" --largestOverlap Assign reads to a meta-feature/feature that has the ");
+ SUBREADputs(" largest number of overlapping bases.");
+ SUBREADputs("");
+ SUBREADputs(" --minOverlap <int> Specify minimum number of overlapping bases requried ");
+ SUBREADputs(" between a read and a meta-feature/feature that the read ");
+ SUBREADputs(" is assigned to. 1 by default.");
+ SUBREADputs("");
+ SUBREADputs(" --read2pos <5:3> Reduce reads to their 5' most base or 3' most base. Read ");
+ SUBREADputs(" counting is then performed based on the single base the ");
+ SUBREADputs(" read is reduced to.");
+ SUBREADputs("");
+ SUBREADputs(" --fraction Use a fractional count 1/n, instead of 1 (one) count, for ");
+ SUBREADputs(" each reported alignment of a multi-mapping read in read ");
+ SUBREADputs(" counting. n is total number of alignments reported for ");
+ SUBREADputs(" the multi-mapping read. This option must be used together ");
+ SUBREADputs(" with '-M' option.");
+ SUBREADputs("");
+ SUBREADputs(" --primary Count primary alignments only. Primary alignments are ");
+ SUBREADputs(" identified using bit 0x100 in SAM/BAM FLAG field.");
+ SUBREADputs("");
+ SUBREADputs(" --ignoreDup Ignore duplicate reads in read counting. Duplicate reads ");
+ SUBREADputs(" are identified using bit Ox400 in BAM/SAM FLAG field. The ");
+ SUBREADputs(" whole read pair is ignored if one of the reads is a ");
+ SUBREADputs(" duplicate read for paired end data.");
+ SUBREADputs("");
+ SUBREADputs(" --countSplitAlignmentsOnly Count split alignments only (ie. alignments with ");
+ SUBREADputs(" CIGAR string containing `N'). An example of split ");
+ SUBREADputs(" alignments is exon-spanning reads in RNA-seq data.");
+ SUBREADputs("");
+ SUBREADputs(" -p Count fragments (read pairs) instead of individual reads. ");
+ SUBREADputs(" For each read pair, its two reads must be adjacent to ");
+ SUBREADputs(" each other in BAM/SAM input.");
+ SUBREADputs("");
+ SUBREADputs(" -P Check validity of paired-end distance when counting read ");
+ SUBREADputs(" pairs. Use -d and -D to set thresholds.");
+ SUBREADputs("");
+ SUBREADputs(" -B Count read pairs that have both ends successfully aligned ");
+ SUBREADputs(" only.");
+ SUBREADputs("");
+ SUBREADputs(" -S <ff:fr:rf> Specify orientation of two reads from the same pair, 'fr' ");
+ SUBREADputs(" by by default (forward/reverse).");
+ SUBREADputs("");
+ SUBREADputs(" -C Do not count read pairs that have their two ends mapping ");
+ SUBREADputs(" to different chromosomes or mapping to same chromosome ");
+ SUBREADputs(" but on different strands.");
+ SUBREADputs("");
+ SUBREADputs(" --donotsort Do not sort reads in BAM/SAM input. Note that reads from ");
+ SUBREADputs(" the same pair are required to be located next to each ");
+ SUBREADputs(" other in the input.");
+ SUBREADputs("");
}
+int junckey_sort_compare(void * inptr, int i, int j){
+ char ** inp = (char **) inptr;
+ return strcmp(inp[i], inp[j]);
+}
+void junckey_sort_exchange(void * inptr, int i, int j){
-int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons, int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter);
+ char ** inp = (char **) inptr;
+ char * tmpp = inp[j];
+ inp[j]=inp[i];
+ inp[i]=tmpp;
+}
-int readSummary(int argc,char *argv[]){
+void junckey_sort_merge(void * inptr, int start, int items1, int items2){
+ char ** inp = (char **) inptr;
+ char ** tmpp = malloc(sizeof(char *) * (items1+items2));
+ int read_1_ptr = start, read_2_ptr = start+items1, outptr = 0;
+ while(1){
+ if(read_1_ptr == start+items1 && read_2_ptr == start+items1+items2) break;
+ if((read_1_ptr == start+items1)||(read_2_ptr < start+items1+items2 && junckey_sort_compare(inptr, read_1_ptr, read_2_ptr) > 0 )) {
+ // select 2
+ tmpp[outptr++]=inp[read_2_ptr++];
+ } else {
+ // select 1
+ tmpp[outptr++]=inp[read_1_ptr++];
+ }
+ }
+ memcpy(inp + start, tmpp, sizeof(char *)*(items1+items2));
+ free(tmpp);
+}
- /*
- This function counts the number of reads falling into each exon region.
- The order of exons in the output is the same as that of exons included in the annotation.
- The annotation, if provided as a file, should be sorted by chromosome name.
+int junccmp(fc_junction_gene_t * j1, fc_junction_gene_t * j2){
+ if(strcmp( j1 -> gene_name, j2 -> gene_name ) == 0)
+ return 0;
+ return 1;
+}
- Parameters passed from the featureCounts R function:
- 0: "readSummary"
- 1: ann
- 2: files[i]
- 3: fout
- 4: as.numeric(isPairedEnd)
+
+void fc_write_final_junctions(fc_thread_global_context_t * global_context, char * output_file_name, read_count_type_t ** table_columns, char * input_file_names, int n_input_files, HashTable ** junction_global_table_list, HashTable ** splicing_global_table_list){
+ int infile_i;
+
+ HashTable * merged_junction_table = HashTableCreate(156679);
+
+ HashTableSetHashFunction(merged_junction_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(merged_junction_table, NULL, NULL);
+ HashTableSetKeyComparisonFunction(merged_junction_table, fc_strcmp_chro);
+
+ HashTable * merged_splicing_table = HashTableCreate(156679);
+
+ HashTableSetHashFunction(merged_splicing_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(merged_splicing_table, NULL, NULL);
+ HashTableSetKeyComparisonFunction(merged_splicing_table, fc_strcmp_chro);
+
+
+ for(infile_i = 0 ; infile_i < n_input_files ; infile_i ++){
+ if(!table_columns[infile_i]) continue; // bad input file
+ KeyValuePair * cursor;
+ int bucket;
+ for(bucket=0; bucket < splicing_global_table_list[infile_i] -> numOfBuckets; bucket++)
+ {
+ cursor = splicing_global_table_list[infile_i] -> bucketArray[bucket];
+ while (cursor)
+ {
+ char * ky = (char *)cursor -> key;
+ unsigned int old_supp = HashTableGet(merged_splicing_table, ky) - NULL;
+ old_supp += (cursor -> value - NULL);
+ HashTablePut(merged_splicing_table, ky, NULL+old_supp);
+ cursor = cursor -> next;
+ }
+ }
+ }
+
+ for(infile_i = 0 ; infile_i < n_input_files ; infile_i ++){
+ if(!table_columns[infile_i]) continue; // bad input file
+ KeyValuePair * cursor;
+ int bucket;
+ for(bucket=0; bucket < junction_global_table_list[infile_i] -> numOfBuckets; bucket++)
+ {
+ cursor = junction_global_table_list[infile_i] -> bucketArray[bucket];
+ while (cursor)
+ {
+ char * ky = (char *)cursor -> key;
+
+ if(HashTableGet(merged_junction_table, ky)==NULL)
+ HashTablePut(merged_junction_table, ky, NULL+1);
+ cursor = cursor -> next;
+ }
+ }
+ }
+
+ char ** key_list;
+ key_list = malloc(sizeof(char *) * merged_junction_table -> numOfElements);
+
+ KeyValuePair * cursor;
+ int bucket, ky_i = 0;
+ for(bucket=0; bucket < merged_junction_table -> numOfBuckets; bucket++){
+ cursor = merged_junction_table -> bucketArray[bucket];
+ while (cursor){
+ char * ky = (char *)cursor -> key;
+
+ key_list[ky_i ++] = ky;
+ cursor = cursor -> next;
+ }
+ }
+
+ merge_sort(key_list, merged_junction_table -> numOfElements , junckey_sort_compare, junckey_sort_exchange, junckey_sort_merge);
+
+ char outfname[300];
+ sprintf(outfname, "%s.junctions", output_file_name);
+
+ int max_junction_genes = 3000;
+ char * gene_names = malloc(max_junction_genes * FEATURE_NAME_LENGTH), * gene_name_tail;
+ fc_junction_gene_t ** ret_juncs_small = malloc(sizeof(fc_junction_gene_t *) * max_junction_genes);
+ fc_junction_gene_t ** ret_juncs_large = malloc(sizeof(fc_junction_gene_t *) * max_junction_genes);
+ fc_junction_gene_t ** junction_key_list = malloc(sizeof(fc_junction_gene_t *)* max_junction_genes * 2);
+ unsigned int * junction_support_list = malloc(sizeof(int)* max_junction_genes * 2);
+ unsigned char * junction_source_list = malloc(sizeof(char)* max_junction_genes * 2 );
+
+ int ky_i1, ky_i2;
+ FILE * ofp = fopen(outfname, "w");
+ char * tmpp = NULL;
+ char * next_fn = input_file_names;
+
+ fprintf(ofp, "#PrimaryGene\tSecondaryGenes\tChro1\tSplicePoint1\tStrand1\tChro2\tSplicePoint2\tStrand2");
+
+ for(infile_i=0; infile_i < n_input_files; infile_i++)
+ {
+ if(!next_fn||strlen(next_fn)<1) break;
+ if(table_columns[infile_i])
+ fprintf(ofp,"\t%s", next_fn);
+
+ next_fn += strlen(next_fn)+1;
+ }
+ fprintf(ofp, "\n");
+
+ for(ky_i = 0; ky_i < merged_junction_table -> numOfElements ; ky_i ++){
+
+ //SUBREADprintf("KY=%s\n", key_list[ky_i]);
+
+ int unique_junctions = 0;
+ char * chro_small = strtok_r( key_list[ky_i] , "\t", &tmpp);
+ char * pos_small_str = strtok_r( NULL, "\t", &tmpp);
+ char * chro_large = strtok_r( NULL, "\t", &tmpp);
+ char * pos_large_str = strtok_r( NULL, "\t", &tmpp);
+
+ unsigned int pos_small = atoi(pos_small_str);
+ unsigned int pos_large = atoi(pos_large_str);
+
+ int found_features_small = locate_junc_features(global_context, chro_small, pos_small, ret_juncs_small , max_junction_genes);
+ int found_features_large = locate_junc_features(global_context, chro_large, pos_large, ret_juncs_large , max_junction_genes);
+
+ char strand = '?';
+ if(global_context -> fasta_contigs){
+ char donor[3], receptor[3];
+ donor[2]=receptor[2]=0;
+ int has = !get_contig_fasta(global_context -> fasta_contigs, chro_small, pos_small, 2, donor);
+ has = has && !get_contig_fasta(global_context -> fasta_contigs, chro_large, pos_large-3, 2, receptor);
+ if(has){
+ if(donor[0]=='G' && donor[1]=='T' && receptor[0]=='A' && receptor[1]=='G') strand = '+';
+ else if(donor[0]=='C' && donor[1]=='T' && receptor[0]=='A' && receptor[1]=='C') strand = '-';
+ }
+ }
+
+ //SUBREADprintf("FOUND=%d, %d\n", found_features_small, found_features_large);
+
+ gene_name_tail = gene_names;
+ gene_names[0]=0;
+
+ // rules to choose the primary gene:
+ // (1) if some genes have one support but the other have multiple supporting reads: remove the lowly supported genes
+ // (2) if all genes have only one support but from different ends of the fragment, then remove the genes that are assigned to the end having lower supporting fragments
+ // (3) choose the gene that have the smallest coordinate.
+
+ int max_supp = 0;
+ for(ky_i1 = 0; ky_i1 < found_features_small + found_features_large; ky_i1++){
+ int is_duplicate = 0;
+ fc_junction_gene_t * tested_key = (ky_i1 < found_features_small)?ret_juncs_small[ky_i1] :ret_juncs_large[ky_i1 - found_features_small];
+ for(ky_i2 = 0; ky_i2 < unique_junctions; ky_i2 ++){
+ if(junccmp( tested_key, junction_key_list[ky_i2] )==0){
+ junction_support_list[ ky_i2 ] ++;
+ junction_source_list[ky_i2] |= ( (ky_i1 < found_features_small)? 1 : 2 );
+ is_duplicate = 1;
+ break;
+ }
+ }
+
+ if(!is_duplicate){
+ junction_key_list[unique_junctions] = tested_key;
+ junction_support_list[unique_junctions] = 1;
+ junction_source_list[unique_junctions] = ( (ky_i1 < found_features_small)? 1 : 2 );
+ max_supp = max(junction_support_list[unique_junctions], max_supp);
+ unique_junctions++;
+ }
+ }
+
+ if(1 == max_supp){
+ if(found_features_small > 0 && found_features_large > 0){
+ char junc_key [FEATURE_NAME_LENGTH + 15];
+ sprintf(junc_key, "%s\t%u", chro_small, pos_small);
+ unsigned int supp_small = HashTableGet(merged_splicing_table, junc_key) - NULL;
+ sprintf(junc_key, "%s\t%u", chro_large, pos_large);
+ unsigned int supp_large = HashTableGet(merged_splicing_table, junc_key) - NULL;
+
+ if(supp_small !=supp_large){
+ for(ky_i2 = 0; ky_i2 < unique_junctions; ky_i2 ++){
+ if(supp_small > supp_large && junction_source_list[ky_i2] == 1) junction_key_list[ky_i2] = NULL;
+ else if(supp_small < supp_large && junction_source_list[ky_i2] == 2) junction_key_list[ky_i2] = NULL;
+ }
+ }
+ }
+ }
+
+ int smallest_coordinate_gene = 0x7fffffff;
+ fc_junction_gene_t * primary_gene = NULL;
+
+ for(ky_i2 = 0; ky_i2 < unique_junctions; ky_i2 ++){
+ fc_junction_gene_t * tested_key = junction_key_list[ky_i2];
+ if(tested_key != NULL && tested_key -> pos_first_base < smallest_coordinate_gene){
+ primary_gene = tested_key;
+ smallest_coordinate_gene = tested_key -> pos_first_base;
+ }
+ }
+
+ if(primary_gene == NULL){
+ strcpy(gene_names, "NA");
+ }else{
+ strcpy(gene_names, primary_gene -> gene_name);
+ }
+
+ *(pos_small_str-1)='\t';
+ *(pos_large_str-1)='\t';
+
+ fprintf(ofp, "%s", gene_names);
+
+ gene_name_tail = gene_names;
+ gene_names[0]=0;
+ for(ky_i2 = 0; ky_i2 < unique_junctions; ky_i2 ++){
+ fc_junction_gene_t * tested_key = junction_key_list[ky_i2];
+ if(tested_key && tested_key != primary_gene)
+ gene_name_tail += sprintf(gene_name_tail, "%s,", tested_key -> gene_name);
+ }
+ if( gene_names[0] ) gene_name_tail[-1]=0;
+ else strcpy(gene_names, "NA");
+ fprintf(ofp, "\t%s", gene_names);
+
+ fprintf(ofp, "\t%s\t%c\t%s\t%c", chro_small, strand, chro_large, strand);
+
+ chro_large[-1]='\t';
+
+ for(infile_i = 0 ; infile_i < n_input_files ; infile_i ++){
+ if(!table_columns[infile_i]) continue;
+ unsigned long count = HashTableGet(junction_global_table_list[infile_i] , key_list[ky_i]) - NULL;
+ fprintf(ofp,"\t%lu", count);
+ }
+ fprintf(ofp, "\n");
+ }
+ fclose(ofp);
+ free(junction_key_list);
+ free(gene_names);
+ free(ret_juncs_small);
+ free(ret_juncs_large);
+ free(junction_support_list);
+ free(key_list);
+ free(junction_source_list);
+ HashTableDestroy(merged_junction_table);
+ HashTableDestroy(merged_splicing_table);
+}
+
+void sort_bucket_table(fc_thread_global_context_t * global_context);
+int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons, int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter, HashTable * junc_glob_tab, HashTable * splicing_glob_tab);
+
+int readSummary(int argc,char *argv[]){
+
+ /*
+ This function counts the number of reads falling into each exon region.
+ The order of exons in the output is the same as that of exons included in the annotation.
+ The annotation, if provided as a file, should be sorted by chromosome name.
+
+ Parameters passed from the featureCounts R function:
+ 0: "readSummary"
+ 1: ann
+ 2: files[i]
+ 3: fout
+ 4: as.numeric(isPairedEnd)
5: min.distance
6: max.distance
7: as.numeric(tolower(file.type)=="sam")
@@ -2991,6 +3732,8 @@ int readSummary(int argc,char *argv[]){
33: as.numeric(fractionMultiMapping) # 1 = calculate fraction numbers if a read overlaps with multiple features or meta-features. "-M" must be specified when fractions are caculated.
34: as.numeric(useOverlappingBreakTie) # 1 = Select features or meta-features with a longer overlapping length; 0 = just use read-voting strategy: one overlapping read = 1 vote
35: Pair_Orientations # FF, FR, RF or RR. This parameter matters only if "-s" option is 1 or 2.
+ 36: as.numeric(doJunctionCounting) # 1 = count the number of junction reads spaining each exon-exon pairs; 0 = do not.
+ 37: file name of fasta (for determine the strandness of junctions by looking for GT/AG or CT/AC).
*/
int isStrandChecked, isCVersion, isChimericDisallowed, isPEDistChecked, minMappingQualityScore=0, isInputFileResortNeeded, feature_block_size = 20, reduce_5_3_ends_to_one;
@@ -3005,15 +3748,15 @@ int readSummary(int argc,char *argv[]){
long * anno_chr_head, * block_min_start, *block_max_end, *block_end_index;
char ** anno_chrs, * anno_chr_2ch;
long curchr, curpos;
- char * curchr_name;
+ char * curchr_name, * fasta_contigs_fname;
unsigned char * sorted_strand;
curchr = 0;
curpos = 0;
curchr_name = "";
- int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minFragmentOverlap, isSplitAlignmentOnly, is_duplicate_ignored, doNotSort, fractionMultiMapping, useOverlappingBreakTie;
+ int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minFragmentOverlap, isSplitAlignmentOnly, is_duplicate_ignored, doNotSort, fractionMultiMapping, useOverlappingBreakTie, doJuncCounting;
- int isSAM, isGTF, n_input_files=0;
+ int isGTF, n_input_files=0;
char * alias_file_name = NULL, * cmd_rebuilt = NULL;
int isMultiOverlapAllowed, isGeneLevel;
@@ -3024,7 +3767,7 @@ int readSummary(int argc,char *argv[]){
minPEDistance = atoi(argv[5]);
maxPEDistance = atoi(argv[6]);
- isSAM = atoi(argv[7]);
+ // isSAM = atoi(argv[7]);
isMultiOverlapAllowed = atoi(argv[8]);
isGeneLevel = atoi(argv[9]);
unsigned short thread_number;
@@ -3141,11 +3884,20 @@ int readSummary(int argc,char *argv[]){
pair_orientations = argv[35];
else pair_orientations = "FR";
+ if(argc>36)
+ doJuncCounting = atoi(argv[36]);
+ else doJuncCounting = 0;
+
+ fasta_contigs_fname = NULL;
+ if(argc>37)
+ if(argv[37][0] != 0 && argv[37][0]!=' ')
+ fasta_contigs_fname = argv[37];
+
unsigned int buffer_size = 1024*1024*6;
fc_thread_global_context_t global_context;
- fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, isSAM, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtens [...]
+ fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, 0, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtension [...]
if( global_context.is_multi_mapping_allowed != ALLOW_ALL_MULTI_MAPPING && global_context.use_fraction_multi_mapping)
{
@@ -3174,11 +3926,25 @@ int readSummary(int argc,char *argv[]){
}
sort_feature_info(&global_context, nexons, loaded_features, &chr, &geneid, &start, &stop, &sorted_strand, &anno_chr_2ch, &anno_chrs, &anno_chr_head, & block_end_index, & block_min_start, & block_max_end);
+ if(global_context.do_junction_counting){
+ sort_bucket_table(&global_context);
+ }
print_in_box(80,0,0," Meta-features : %d", global_context . gene_name_table -> numOfElements);
print_in_box(80,0,0," Chromosomes/contigs : %d", global_context . exontable_nchrs);
print_in_box(80,0,0,"");
+
+ if(fasta_contigs_fname){
+ print_in_box(80,0,0,"Loading FASTA contigs : %s", fasta_contigs_fname);
+ global_context.fasta_contigs = malloc(sizeof(fasta_contigs_t));
+ read_contig_fasta(global_context.fasta_contigs, fasta_contigs_fname);
+ print_in_box(80,0,0," %lu contigs were loaded", global_context.fasta_contigs -> contig_table -> numOfElements);
+ print_in_box(80,0,0,"");
+ }else global_context.fasta_contigs = NULL;
+
+
+
global_context.exontable_exons = nexons;
unsigned int * nreads = (unsigned int *) calloc(nexons,sizeof(int));
@@ -3196,37 +3962,61 @@ int readSummary(int argc,char *argv[]){
char * next_fn = strtok_r(file_list_used,";", &tmp_pntr);
read_count_type_t ** table_columns = calloc( n_input_files , sizeof(read_count_type_t *)), i_files=0;
fc_read_counters * read_counters = calloc(n_input_files , sizeof(fc_read_counters));
+ HashTable ** junction_global_table_list = NULL;
+ HashTable ** splicing_global_table_list = NULL;
+
+ if(global_context.do_junction_counting){
+ junction_global_table_list = calloc(n_input_files, sizeof(HashTable *));
+ splicing_global_table_list = calloc(n_input_files, sizeof(HashTable *));
+ }
for(;;){
- int redoing, original_sorting = global_context.is_input_file_resort_needed, orininal_isPE = global_context.is_paired_end_mode_assign;
+ int orininal_isPE = global_context.is_paired_end_mode_assign;
if(next_fn==NULL || strlen(next_fn)<1) break;
read_count_type_t * column_numbers = calloc(nexons, sizeof(read_count_type_t));
+ HashTable * junction_global_table = NULL;
+ HashTable * splicing_global_table = NULL;
strcpy(global_context.input_file_name, next_fn);
strcpy(global_context.raw_input_file_name, next_fn);
global_context.redo=0;
- for(redoing = 0; redoing < 1 + !original_sorting; redoing++)
- {
- fc_read_counters * my_read_counter = &(read_counters[i_files]);
- memset(my_read_counter, 0, sizeof(fc_read_counters));
- int ret_int = readSummary_single_file(& global_context, column_numbers, nexons, geneid, chr, start, stop, sorted_strand, anno_chr_2ch, anno_chrs, anno_chr_head, block_end_index, block_min_start, block_max_end, my_read_counter);
- if(ret_int!=0 || (global_context.redo && redoing)){
- table_columns[i_files] = NULL;
- free(column_numbers);
- break;
- }
- else table_columns[i_files] = column_numbers;
+ if(global_context.do_junction_counting){
+ junction_global_table = HashTableCreate(156679);
+ splicing_global_table = HashTableCreate(156679);
- if(redoing || !global_context.redo) break;
-
- global_context.is_input_file_resort_needed = 1;
- memset(column_numbers, 0, nexons * sizeof(read_count_type_t));
+ HashTableSetHashFunction(junction_global_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(junction_global_table, free, NULL);
+ HashTableSetKeyComparisonFunction(junction_global_table, fc_strcmp_chro);
+
+ HashTableSetHashFunction(splicing_global_table,HashTableStringHashFunction);
+ HashTableSetDeallocationFunctions(splicing_global_table, free, NULL);
+ HashTableSetKeyComparisonFunction(splicing_global_table, fc_strcmp_chro);
+ }
+
+ fc_read_counters * my_read_counter = &(read_counters[i_files]);
+ memset(my_read_counter, 0, sizeof(fc_read_counters));
+
+ int ret_int = readSummary_single_file(& global_context, column_numbers, nexons, geneid, chr, start, stop, sorted_strand, anno_chr_2ch, anno_chrs, anno_chr_head, block_end_index, block_min_start, block_max_end, my_read_counter, junction_global_table, splicing_global_table);
+ if(ret_int!=0){
+ // give up this file.
+
+ table_columns[i_files] = NULL;
+ if(global_context.do_junction_counting){
+ HashTableDestroy(junction_global_table);
+ HashTableDestroy(splicing_global_table);
+ }
+ free(column_numbers);
+ } else {
+ // finished
+ table_columns[i_files] = column_numbers;
+ if(global_context.do_junction_counting){
+ junction_global_table_list[ i_files ] = junction_global_table;
+ splicing_global_table_list[ i_files ] = splicing_global_table;
+ }
}
- global_context.is_SAM_file = isSAM;
- global_context.is_input_file_resort_needed = original_sorting;
global_context.is_paired_end_mode_assign = orininal_isPE;
i_files++;
@@ -3240,13 +4030,22 @@ int readSummary(int argc,char *argv[]){
else
fc_write_final_results(&global_context, argv[3], nexons, table_columns, argv[2], n_input_files ,loaded_features, isCVersion);
+ if(global_context.do_junction_counting)
+ fc_write_final_junctions(&global_context, argv[3], table_columns, argv[2], n_input_files , junction_global_table_list, splicing_global_table_list);
+
fc_write_final_counts(&global_context, argv[3], n_input_files, argv[2], table_columns, read_counters, isCVersion);
int total_written_coulmns = 0;
for(i_files=0; i_files<n_input_files; i_files++)
if(table_columns[i_files]){
free(table_columns[i_files]);
+ if(global_context.do_junction_counting){
+ HashTableDestroy(junction_global_table_list[i_files]);
+ HashTableDestroy(splicing_global_table_list[i_files]);
+ }
+
total_written_coulmns++;
+
}
free(table_columns);
@@ -3274,8 +4073,18 @@ int readSummary(int argc,char *argv[]){
free(global_context.gene_name_array);
HashTableDestroy(global_context.exontable_chro_table);
+ if(global_context.fasta_contigs){
+ destroy_contig_fasta(global_context.fasta_contigs);
+ free(global_context.fasta_contigs);
+ }
if(global_context.annot_chro_name_alias_table)
HashTableDestroy(global_context.annot_chro_name_alias_table);
+ if(global_context.do_junction_counting){
+ HashTableDestroy(global_context.junction_bucket_table);
+ HashTableDestroy(global_context.junction_features_table);
+ free(junction_global_table_list);
+ free(splicing_global_table_list);
+ }
free(global_context.unistr_buffer_space);
free(loaded_features);
@@ -3296,7 +4105,55 @@ int readSummary(int argc,char *argv[]){
return total_written_coulmns?0:-1;
}
+void register_buckets(fc_thread_global_context_t * global_context , HashTable * gene_feature_table, char * chro_name){
+ KeyValuePair * cursor;
+ int bucket;
+ for(bucket=0; bucket < gene_feature_table -> numOfBuckets; bucket++){
+ cursor = gene_feature_table -> bucketArray[bucket];
+ while(1){
+ if (!cursor) break;
+ fc_junction_gene_t * gene = (fc_junction_gene_t *) cursor -> value;
+ unsigned int x1;
+
+ for(x1 = gene -> pos_first_base - gene -> pos_first_base % JUNCTION_BUCKET_STEP; x1 <= gene -> pos_last_base ; x1 += JUNCTION_BUCKET_STEP){
+ char bucket_key[CHROMOSOME_NAME_LENGTH + 20];
+ sprintf(bucket_key, "%s:%u", chro_name, x1);
+ gene_info_list_t * list = HashTableGet(global_context -> junction_bucket_table, bucket_key);
+ if(list == NULL){
+ list = malloc(sizeof(gene_info_list_t));
+ list -> space = 3;
+ list -> used = 0;
+ list -> genes = malloc(sizeof(void *) * list -> space);
+ char * mem_bucket_key = malloc(strlen(bucket_key) + 1);
+ strcpy(mem_bucket_key , bucket_key);
+ HashTablePut(global_context -> junction_bucket_table, mem_bucket_key , list);
+ }
+
+ if(list -> used == list -> space){
+ list -> space = max(list -> space + 3, list -> space * 1.3);
+ list -> genes = realloc(list -> genes , list -> space * sizeof(void *));
+ }
+ list -> genes[list -> used++] = gene;
+ }
+ cursor = cursor -> next;
+ }
+ }
+}
+void sort_bucket_table(fc_thread_global_context_t * global_context){
+ KeyValuePair * cursor;
+ int bucket;
+ for(bucket=0; bucket < global_context -> junction_features_table -> numOfBuckets; bucket++){
+ cursor = global_context -> junction_features_table -> bucketArray[bucket];
+ while(1){
+ if (!cursor) break;
+ HashTable * gene_feature_table = cursor -> value;
+ char * chro_name = (char *)cursor -> key;
+ register_buckets(global_context , gene_feature_table, chro_name);
+ cursor = cursor -> next;
+ }
+ }
+}
@@ -3308,7 +4165,7 @@ int readSummary(int argc,char *argv[]){
-int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons, int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter)
+int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons, int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter, HashTable * junction_global_table, HashTable * splicing_global_table)
{
FILE *fp_in = NULL;
int read_length = 0;
@@ -3319,7 +4176,6 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
if(strcmp( global_context->input_file_name,"STDIN")!=0)
{
int file_probe = is_certainly_bam_file(global_context->input_file_name, &is_first_read_PE);
-
global_context -> is_paired_end_input_file = is_first_read_PE;
// a Singel-end SAM/BAM file cannot be assigned as a PE SAM/BAM file;
@@ -3327,10 +4183,8 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
if(is_first_read_PE==0)
global_context -> is_paired_end_mode_assign = 0;
- if(file_probe == 1){
- global_context->is_SAM_file = 0;
- }
- else if(file_probe == 0) global_context->is_SAM_file = 1;
+ global_context->is_SAM_file = 1;
+ if(file_probe == 1) global_context->is_SAM_file = 0;
global_context -> start_time = miltime();
@@ -3349,8 +4203,6 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
}
- int isInputFileResortNeeded = global_context->is_input_file_resort_needed;
-
if(strcmp( global_context->input_file_name,"STDIN")!=0)
{
FILE * exist_fp = f_subr_open( global_context->input_file_name,"r");
@@ -3364,12 +4216,11 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
fclose(exist_fp);
}
+ /*
if(strcmp(global_context->input_file_name,"STDIN")!=0)
if(warning_file_type(global_context->input_file_name, global_context->is_SAM_file?FILE_TYPE_SAM:FILE_TYPE_BAM))
global_context->is_unpaired_warning_shown=1;
- if(strcmp(global_context->input_file_name,"STDIN")!=0 && isInputFileResortNeeded)
- if(resort_input_file( global_context)) return -1;
- int isSAM = global_context->is_SAM_file;
+ */
// Open the SAM/BAM file
// Nothing is done if the file does not exist.
@@ -3395,389 +4246,13 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
print_in_box(80,0,0," Assign reads to features...");
}
-
-
fc_thread_start_threads(global_context, nexons, geneid, chr, start, stop, sorted_strand, anno_chr_2ch, anno_chrs, anno_chr_head, block_end_index, block_min_start , block_max_end, read_length);
- int buffer_pairs = global_context -> thread_number>1?512:1;
- int isPE = global_context->is_paired_end_mode_assign;
- char * preload_line = malloc(sizeof(char) * (2+MAX_LINE_LENGTH)*(isPE?2:1)*buffer_pairs);
- int preload_line_ptr;
- int current_thread_id = 0;
- fc_thread_thread_context_t * one_thread_context = global_context->thread_contexts;
-
- SamBam_Reference_Info * sb_header_tab = NULL;
-
- unsigned long long int chunk_id = 0;
- int binary_remainder = 0, binary_read_ptr = 0;
- char * chunk_in_buff = malloc(70000);
- char * binary_in_buff = malloc(80000 * 2);
-
- if(!isSAM)
- {
- int remainder_read_data_len = 0;
-
- PBum_load_header(fp_in, &sb_header_tab, binary_in_buff, & remainder_read_data_len);
- //printf("RMD=%d\n", remainder_read_data_len);
-
- if(remainder_read_data_len)
- {
- binary_remainder = remainder_read_data_len;
- }
- global_context->sambam_chro_table = sb_header_tab;
- }
-
- while (1){
- int pair_no;
- int is_second_read;
- int fresh_read_no = 0;
- preload_line[0] = 0;
- preload_line_ptr = 0;
-
- char * ret = NULL;
-
- // one-thread BAM is not supported.
- if( isSAM && global_context->thread_number==1)
- {
- int is_second_read;
-
- for(is_second_read=0;is_second_read<(isPE?2:1);is_second_read++)
- {
- char * lbuf = is_second_read?one_thread_context -> line_buffer2:one_thread_context -> line_buffer1;
- while(1)
- {
- ret = fgets(lbuf, MAX_LINE_LENGTH, fp_in);
- if(global_context -> redo) ret = NULL;
- if(!ret) break;
- if(lbuf[0] == '@')
- {
- int retlen = strlen(ret);
- if(ret[retlen-1]!='\n')
- {
- while(1){
- int nch = getc(fp_in);
- if(nch == EOF || nch == '\n') break;
- }
- }
- }
- else break;
- }
-
- if(!ret) break;
- if(read_length < 1)
- {
- int tab_no = 0;
- int read_len_tmp=0, read_cursor;
- int curr_line_len = strlen(lbuf);
- for(read_cursor=0; read_cursor<curr_line_len; read_cursor++)
- {
- if(lbuf[read_cursor] == '\t')
- tab_no++;
- else
- {
- if(tab_no == 9) // SEQ
- read_len_tmp++;
- }
- }
- read_length = read_len_tmp;
- global_context->read_length = read_length;
- }
- lbuf[strlen(lbuf)+1]=0;
- }
-
- //printf("RRR=%d\n",ret);
-
- //one_thread_context -> current_read_length1 = global_context->read_length;
- //one_thread_context -> current_read_length2 = global_context->read_length;
-
- if(is_second_read == 1 && isPE){
- print_in_box(85,0,0," %c[31mThere are odd number of reads in the paired-end data.", CHAR_ESC);
- print_in_box(80,0,0," Please make sure that the format is correct.");
- }
-
- if(ret)
- {
- global_context->all_reads ++;
- process_line_buffer(global_context, one_thread_context);
- }
-
- if(!ret)break;
- }
- else if(!isSAM)
- {
- int no_of_reads = 0;
- unsigned int real_len = 0;
- // most of the data must have been given out before this step.
-
- int cdata_size = 0;
-
- if(binary_remainder > 70000)
- SUBREADprintf("SOMETHING IS WRONG!\n");
-
- if(global_context -> redo)
- cdata_size = -1;
- else{
- if(binary_remainder<10000)
- cdata_size = PBam_get_next_zchunk(fp_in, chunk_in_buff, 65537, & real_len);
- }
-
- if(cdata_size>0 || binary_remainder>0)
- {
- int x1;
-
-
- if(binary_read_ptr>0)
- {
- for(x1=0; x1< binary_remainder; x1++)
- binary_in_buff[x1] = binary_in_buff [x1 + binary_read_ptr];
- binary_read_ptr = 0;
- }
-
- //fprintf(stderr,"NBN=%d, OBN=%d\n", cdata_size , binary_remainder);
- if(cdata_size>0)
- {
- int new_binary_bytes = SamBam_unzip(binary_in_buff + binary_remainder , chunk_in_buff , cdata_size);
- if(new_binary_bytes>=0)
- binary_remainder += new_binary_bytes;
- else SUBREADprintf("ERROR: BAM GZIP FORMAT ERROR.\n");
- // fprintf(stderr,"BBN=%d\n", new_binary_bytes);
- }
-
- while(binary_remainder>4)
- {
- unsigned int binary_read_len = 0;
- memcpy(& binary_read_len , binary_in_buff + binary_read_ptr , 4);
- //printf("RLEN=%d; PTR=%d; RMD=%d\n", binary_read_len , binary_read_ptr, binary_remainder);
- if(binary_read_len > 10000)
- {
- binary_remainder = -1;
- //SUBREADprintf("FATAL ERROR: BAM RECORD SIZE = %u ; PTR=%d ; REM=%d.\n", binary_read_len, binary_read_ptr , binary_remainder);
- print_in_box(80,0,0," A format error was detected in this BAM file.");
- print_in_box(80,0,0," The remaining part in the file is skipped.");
- print_in_box(80,0,0," Please check the file format using samtools.");
- print_in_box(80,0,0,"");
- break;
- }
- // if the program runs on PE mode, no_of_reads must be even.
-
- if(isPE)
- {
- if(4 + binary_read_len + 4 < binary_remainder)
- {
- int binary_read2_len=0;
- memcpy(&binary_read2_len , binary_in_buff + binary_read_ptr + 4 + binary_read_len, 4);
- if(4 + binary_read_len + 4 + binary_read2_len <= binary_remainder)
- {
- no_of_reads +=2;
- binary_read_ptr += 4 + binary_read_len + 4 + binary_read2_len;
- binary_remainder -= 4 + binary_read_len + 4 + binary_read2_len;
- }
- else break;
- }
- else break;
- }
- else
- {
- if(binary_read_len + 4<= binary_remainder)
- {
- no_of_reads ++;
- binary_read_ptr += 4 + binary_read_len;
- binary_remainder -= 4 + binary_read_len;
- }
- else break;
- }
- }
- }
-
- if(binary_remainder <0)break;
-
- if(no_of_reads>0)
- {
- while(1)
- {
- int is_finished = 0;
-
- fc_thread_thread_context_t * thread_context = global_context->thread_contexts+current_thread_id;
-
- pthread_spin_lock(&thread_context->input_buffer_lock);
-
- // the number of bytes can be utilised given the two_chunk_len.
- int empty_bytes = global_context->input_buffer_max_size - thread_context->input_buffer_remainder;
- int tail_bytes = global_context->input_buffer_max_size - thread_context->input_buffer_write_ptr;
-
- if(thread_context->input_buffer_remainder > global_context->input_buffer_max_size)
- {
- SUBREADprintf("RMD=%d\n", thread_context->input_buffer_remainder );
- assert(0);
- }
-
- if(tail_bytes < binary_read_ptr + 4)
- empty_bytes -= tail_bytes;
-
- // copy the new buffer to thread buffer.
- // format: read_number=n, read_chunk1, read_chunk2, ..., read_chunk_n
- if(empty_bytes >= binary_read_ptr + 4)
- {
-
- if(tail_bytes < binary_read_ptr + 4)
- {
- if(tail_bytes>=4)
- memset(thread_context->input_buffer + thread_context->input_buffer_write_ptr, 0, 4);
- thread_context->input_buffer_write_ptr = 0;
- thread_context->input_buffer_remainder += tail_bytes;
- }
-
- memcpy( thread_context->input_buffer + thread_context->input_buffer_write_ptr, & binary_read_ptr, 4);
- memcpy( thread_context->input_buffer + thread_context->input_buffer_write_ptr + 4, binary_in_buff , binary_read_ptr);
- thread_context->input_buffer_write_ptr += 4 + binary_read_ptr;
- thread_context->input_buffer_remainder += 4 + binary_read_ptr;
- is_finished = 1;
- }
-
- pthread_spin_unlock(&thread_context->input_buffer_lock);
- current_thread_id++;
- if(current_thread_id >= global_context->thread_number) current_thread_id = 0;
-
- if(is_finished) break;
- else usleep(tick_time);
-
- }
-
- chunk_id++;
- }
- else if(cdata_size<0)
- break;
-
- }
- else
- {
- for(pair_no=0; pair_no < buffer_pairs; pair_no++)
- {
- for(is_second_read=0;is_second_read<(isPE?2:1);is_second_read++)
- {
- while(1)
- {
- ret = fgets(preload_line+preload_line_ptr, MAX_LINE_LENGTH, fp_in);
- if(global_context -> redo ) ret = NULL;
- if(!ret) break;
- if(preload_line[preload_line_ptr] == '@'){
- int retlen = strlen(ret);
- if(ret[retlen-1]!='\n')
- {
- while(1){
- int nch = getc(fp_in);
- if(nch == EOF || nch == '\n') break;
- }
- }
- }else break;
- }
-
- if(!ret) break;
-
- int curr_line_len = strlen(preload_line+preload_line_ptr);
- if(curr_line_len >= MAX_LINE_LENGTH || preload_line[preload_line_ptr + curr_line_len-1]!='\n')
- {
- print_in_box(80,0,0,"ERROR: the lines are too long. Please check the input format!!\n");
- ret = NULL;
- preload_line_ptr = 0;
- break;
- }
- preload_line_ptr += curr_line_len;
-
- fresh_read_no++;
- }
- if(!ret) break;
- else
- global_context->all_reads ++;
- }
-
- int line_length = preload_line_ptr;
- if(line_length >= global_context->input_buffer_max_size-1)
- {
- SUBREADprintf("ERROR: the lines are too long. Please check the input format!!\n");
- break;
- }
- if(isPE && (fresh_read_no%2>0))
- {
- // Safegarding -- it should not happen if the SAM file has a correct format.
- //line_length = 0;
- if( (!global_context -> redo)){
- print_in_box(85,0,0," %c[31mThere are odd number of reads in the paired-end data.", CHAR_ESC);
- print_in_box(80,0,0," Please make sure that the format is correct.");
- }
- if(line_length > 0){
- int xx1, enters = 0;
- for(xx1 = line_length; xx1 >=0; xx1--){
- if( preload_line[xx1]=='\n' ) enters ++;
- if(2 == enters){
- line_length = xx1+1;
- break;
- }
- }
- if(xx1 <= 0) line_length = 0;
- }
- }
-
- //printf("FRR=%d\n%s\n", fresh_read_no, preload_line);
-
- if(line_length > 0)
- {
- while(1)
- {
- int is_finished = 0;
- fc_thread_thread_context_t * thread_context = global_context->thread_contexts+current_thread_id;
- //printf("WRT_THR_IBUF_REM [%d]=%d\n", current_thread_id , thread_context->input_buffer_remainder);
-
- pthread_spin_lock(&thread_context->input_buffer_lock);
- unsigned int empty_bytes = global_context->input_buffer_max_size - thread_context->input_buffer_remainder;
- if(empty_bytes > line_length)
- {
- unsigned int tail_bytes = global_context->input_buffer_max_size - thread_context->input_buffer_write_ptr;
- unsigned int write_p1_len = (tail_bytes > line_length)?line_length:tail_bytes;
- unsigned int write_p2_len = (tail_bytes > line_length)?0:(line_length - tail_bytes);
- memcpy(thread_context->input_buffer + thread_context->input_buffer_write_ptr, preload_line, write_p1_len);
- if(write_p2_len)
- {
- memcpy(thread_context->input_buffer, preload_line + write_p1_len, write_p2_len);
- thread_context->input_buffer_write_ptr = write_p2_len;
- }
- else thread_context->input_buffer_write_ptr += write_p1_len;
- if(thread_context->input_buffer_write_ptr == global_context->input_buffer_max_size)
- thread_context->input_buffer_write_ptr=0;
-
-
- thread_context->input_buffer_remainder += line_length;
- //printf("WRT_THR_IBUF_REM [%d] + %d =%d\n", current_thread_id, line_length , thread_context->input_buffer_remainder);
- is_finished = 1;
- }
-
- pthread_spin_unlock(&thread_context->input_buffer_lock);
-
- current_thread_id++;
- if(current_thread_id >= global_context->thread_number) current_thread_id = 0;
-
- if(is_finished) break;
- else usleep(tick_time);
- }
- }
- if(!ret) break;
- }
- }
-
- free(chunk_in_buff);
- free(binary_in_buff);
- free(preload_line);
global_context->is_all_finished = 1;
-
- if(global_context->thread_number > 1 || !isSAM)
- fc_thread_wait_threads(global_context);
+ fc_thread_wait_threads(global_context);
unsigned long long int nreads_mapped_to_exon = 0;
-
-
- if(!global_context->redo)
- fc_thread_merge_results(global_context, column_numbers , &nreads_mapped_to_exon, my_read_counter);
-
+ fc_thread_merge_results(global_context, column_numbers , &nreads_mapped_to_exon, my_read_counter, junction_global_table, splicing_global_table);
fc_thread_destroy_thread_context(global_context);
//global_context .read_counters.assigned_reads = nreads_mapped_to_exon;
@@ -3787,9 +4262,9 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, read_co
#endif
fclose(fp_in);
- if(sb_header_tab) free(sb_header_tab);
- if(strcmp(global_context->input_file_name,"STDIN")!=0 && isInputFileResortNeeded)
- unlink(global_context->input_file_name);
+ if(global_context -> sambam_chro_table) free(global_context -> sambam_chro_table);
+ global_context -> sambam_chro_table = NULL;
+
free(line);
return 0;
}
@@ -3801,9 +4276,10 @@ int main(int argc, char ** argv)
int feature_count_main(int argc, char ** argv)
#endif
{
- char * Rargv[36];
+ char * Rargv[38];
char annot_name[300];
char * out_name = malloc(300);
+ char * fasta_contigs_name = malloc(300);
char * alias_file_name = malloc(300);
int cmd_rebuilt_size = 200;
char * cmd_rebuilt = malloc(cmd_rebuilt_size);
@@ -3835,6 +4311,7 @@ int feature_count_main(int argc, char ** argv)
int is_Split_Alignment_Only = 0;
int is_duplicate_ignored = 0;
int do_not_sort = 0;
+ int do_junction_cnt = 0;
int reduce_5_3_ends_to_one = 0;
int use_fraction_multimapping = 0;
int threads = 1;
@@ -3848,6 +4325,7 @@ int feature_count_main(int argc, char ** argv)
char strFiveEndExtension[11], strThreeEndExtension[11], strMinFragmentOverlap[11];
very_long_file_names = malloc(very_long_file_names_size);
very_long_file_names [0] = 0;
+ fasta_contigs_name[0]=0;
alias_file_name[0]=0;
debug_command[0] = 0;
@@ -3872,7 +4350,9 @@ int feature_count_main(int argc, char ** argv)
opterr=1;
optopt=63;
- while ((c = getopt_long (argc, argv, "A:g:t:T:o:a:d:D:L:Q:pbF:fs:S:CBPMORv?", long_options, &option_index)) != -1)
+ strcpy(Pair_Orientations,"fr");
+
+ while ((c = getopt_long (argc, argv, "A:g:t:T:o:a:d:D:L:Q:pbF:fs:S:CBJPMORv?", long_options, &option_index)) != -1)
switch(c)
{
case 'S':
@@ -3886,6 +4366,9 @@ int feature_count_main(int argc, char ** argv)
Pair_Orientations[2]=0;
break;
+ case 'J':
+ do_junction_cnt = 1;
+ break;
case 'A':
strcpy(alias_file_name, optarg);
break;
@@ -4001,6 +4484,10 @@ int feature_count_main(int argc, char ** argv)
use_fraction_multimapping = 1;
}
+ if(strcmp("fasta", long_options[option_index].name)==0)
+ {
+ strcpy(fasta_contigs_name , optarg);
+ }
if(strcmp("read2pos", long_options[option_index].name)==0)
{
if(optarg[0]=='3')
@@ -4108,11 +4595,14 @@ int feature_count_main(int argc, char ** argv)
Rargv[33] = use_fraction_multimapping?"1":"0";
Rargv[34] = use_overlapping_length_break_tie?"1":"0";
Rargv[35] = Pair_Orientations;
- int retvalue = readSummary(36, Rargv);
+ Rargv[36] = do_junction_cnt?"1":"0";
+ Rargv[37] = fasta_contigs_name;
+ int retvalue = readSummary(38, Rargv);
free(very_long_file_names);
free(out_name);
free(alias_file_name);
+ free(fasta_contigs_name);
free(cmd_rebuilt);
return retvalue;
diff --git a/src/removeDupReads.c b/src/removeDupReads.c
index 4a78d72..c03e77b 100644
--- a/src/removeDupReads.c
+++ b/src/removeDupReads.c
@@ -219,7 +219,28 @@ int repeated_read_removal(char * in_SAM_file, int threshold, char * out_SAM_file
void print_usage_rrr(char * myname)
{
- SUBREADprintf("Usage: %s -i <input_SAM_file> -o <output_SAM_file> {-r threshold} {-t temp_path} {-c max_read_number}\n\n", myname);
+ SUBREADprintf("\nremoveDup Version %s\n\n", SUBREAD_VERSION);
+ SUBREADputs(" Remove duplicated reads.");
+ SUBREADputs("");
+ SUBREADputs("Usage:");
+ SUBREADputs(" ./removeDup [options] -i <input_file> -o <output_file>\n");
+ SUBREADputs("Required arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -i <string> Name of input file in SAM format.");
+ SUBREADputs("");
+ SUBREADputs(" -o <string> Name of output SAM file including filtered reads.");
+ SUBREADputs("");
+ SUBREADputs("Aptional arguments:");
+ SUBREADputs("");
+ SUBREADputs(" -r <int> Specify the duplication cutoff. All the reads mapped to a location");
+ SUBREADputs(" are removed from the output if the number of reads mapped to this");
+ SUBREADputs(" location is equal or higher than the cutoff. 10 by default.");
+ SUBREADputs("");
+ SUBREADputs(" -t <string> A directory storing temporary files generated by the program.");
+ SUBREADputs("");
+ SUBREADputs(" -c <int> The maximum number of reads the input file can have. 40 million by");
+ SUBREADputs(" default.");
+ SUBREADputs("");
}
struct option rem_long_options[]={{0,0,0,0}};
diff --git a/src/samMappedBases.c b/src/samMappedBases.c
new file mode 100644
index 0000000..e51e3f6
--- /dev/null
+++ b/src/samMappedBases.c
@@ -0,0 +1,124 @@
+/***************************************************************
+
+ The Subread and Rsubread software packages are free
+ software packages:
+
+ you can redistribute it and/or modify it under the terms
+ of the GNU General Public License as published by the
+ Free Software Foundation, either version 3 of the License,
+ or (at your option) any later version.
+
+ Subread is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty
+ of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ See the GNU General Public License for more details.
+
+ Authors: Drs Yang Liao and Wei Shi
+
+ ***************************************************************/
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <getopt.h>
+#include <ctype.h>
+#include "subread.h"
+#include "core.h"
+#include "sambam-file.h"
+#include "HelperFunctions.h"
+#include "input-files.h"
+
+typedef struct{
+ int is_BAM;
+
+}countbases_context_t;
+
+static struct option sumb_long_options[] =
+{
+ {"BAMinput", no_argument, 0, '9'},
+ {"SAMinput", no_argument, 0, '8'},
+ {0, 0, 0, 0}
+};
+
+void countBases(char * fn, countbases_context_t * context)
+{
+ char fline[2999];
+ unsigned long long int allbases = 0;
+ SamBam_FILE * in_fp = SamBam_fopen(fn,context->is_BAM?SAMBAM_FILE_BAM:SAMBAM_FILE_SAM);
+
+ while(1)
+ {
+ char * tok_tmp = NULL;
+ char * is_ret = SamBam_fgets(in_fp, fline, 2999, 0);
+ if(!is_ret) break;
+ if('@' == fline[0]) continue;
+
+ strtok_r(fline, "\t", &tok_tmp);
+ char * flags_str = strtok_r(NULL, "\t", &tok_tmp);
+ strtok_r(NULL, "\t", &tok_tmp);
+ strtok_r(NULL, "\t", &tok_tmp);
+ strtok_r(NULL, "\t", &tok_tmp);
+ char * cigar = strtok_r(NULL, "\t", &tok_tmp);
+
+ int flags = atoi(flags_str);
+
+ if(4 & flags) continue;
+
+
+ unsigned int Staring_Points[6];
+ unsigned short Section_Length[6];
+
+ int i, retv = RSubread_parse_CIGAR_string(cigar, Staring_Points, Section_Length);
+
+ for(i=0;i<retv;i++)
+ allbases += Section_Length[i];
+
+ }
+ SamBam_fclose(in_fp);
+
+ SUBREADprintf("%s\t%llu\n", fn, allbases);
+}
+
+#ifdef MAKE_STANDALONE
+int main(int argc, char ** argv)
+#else
+int main_mappedBases(int argc, char ** argv)
+#endif
+{
+
+ int ret = 0;
+ int c;
+ int option_index = 0 ;
+ countbases_context_t * context = calloc(sizeof(countbases_context_t),1);
+
+ optind = 0;
+ opterr = 1;
+ optopt = 63;
+
+
+ while((c = getopt_long (argc, argv, "98", sumb_long_options, &option_index)) != -1)
+ {
+ switch(c){
+ case '9':
+ context -> is_BAM = 1;
+ break;
+ case '8':
+ context -> is_BAM = 0;
+ break;
+ default:
+ return 0;
+ }
+ }
+
+ int input_file;
+ for(input_file = optind; input_file < argc; input_file++)
+ {
+ countBases(argv[input_file], context);
+ }
+
+ free(context);
+ return ret;
+}
+
diff --git a/src/sambam-file.c b/src/sambam-file.c
index c57cb81..3fa77e3 100644
--- a/src/sambam-file.c
+++ b/src/sambam-file.c
@@ -31,6 +31,7 @@
#include <assert.h>
#include <ctype.h>
#include "subread.h"
+#include "core.h"
#include "gene-algorithms.h"
#include "sambam-file.h"
@@ -274,8 +275,8 @@ char * SamBam_fgets(SamBam_FILE * fp, char * buff , int buff_len, int seq_needed
SB_RINC(fp,1);
//printf("NNCH=%c\n", nch);
-
- if(nch == '\r'||nch=='\n' || nch <0) break;
+ if(nch == '\r')continue;
+ if(nch == '\n' || nch <0) break;
if(xk1 < buff_len-2)
{
buff[xk1]=nch;
@@ -365,6 +366,7 @@ int PBam_get_next_zchunk(FILE * bam_fp, char * buffer, int buffer_length, unsign
fseeko(bam_fp, 4, SEEK_CUR);
fread(&real_len, 4, 1, bam_fp);
+ // SUBREADprintf("read_data=%u\n", CDATA_LEN);
return CDATA_READING;
}
else
@@ -577,12 +579,13 @@ int PBam_chunk_gets(char * chunk, int *chunk_ptr, int chunk_limit, SamBam_Refere
memcpy( aln-> buff_for_seq, chunk+(*chunk_ptr), seq_qual_bytes);
(*chunk_ptr) += seq_qual_bytes;
- int nh_val = -1, hi_val = -1;
+ char extra_tags [CORE_ADDITIONAL_INFO_LENGTH];
+ extra_tags[0]=0;
while( (*chunk_ptr) < next_start)
{
char extag[2];
char extype;
- int delta;
+ int delta, need_tag = 1;
memcpy(extag, chunk+(*chunk_ptr), 2);
extype = chunk[2+(*chunk_ptr)];
(*chunk_ptr)+=3;
@@ -591,8 +594,8 @@ int PBam_chunk_gets(char * chunk, int *chunk_ptr, int chunk_limit, SamBam_Refere
{
delta = 0;
// 'Z' columns are NULL-terminated.
- while(chunk[*chunk_ptr]) (*chunk_ptr)++;
- (*chunk_ptr)++;
+ while(chunk[delta + (*chunk_ptr)]) delta++;
+ delta += 1;
}
else if(extype == 'A' || extype == 'c' || extype=='C') delta=1;
else if(extype == 'i' || extype=='I' || extype == 'f') delta=4;
@@ -610,6 +613,7 @@ int PBam_chunk_gets(char * chunk, int *chunk_ptr, int chunk_limit, SamBam_Refere
else break;
int array_len;
+ need_tag = 0;
memcpy(&array_len, chunk+(*chunk_ptr), 4);
(*chunk_ptr)+=4;
delta *= array_len;
@@ -618,18 +622,22 @@ int PBam_chunk_gets(char * chunk, int *chunk_ptr, int chunk_limit, SamBam_Refere
// fprintf(stderr, "NO_EXTYPE: %c\n", extype);
break;
}
- if(memcmp(extag,"HI",2)==0 && delta<=4)
- {
- hi_val=0;
- memcpy(&hi_val, chunk+(*chunk_ptr),delta);
- }
-
- if(memcmp(extag,"NH",2)==0 && delta<=4)
- {
- nh_val=0;
- memcpy(&nh_val, chunk+(*chunk_ptr),delta);
- // printf("NH=%d\n", nh_val);
+
+ if(need_tag){
+ if(extype == 'c' || extype=='C' || extype == 'i' || extype=='I' || extype == 's' || extype=='S'){
+ int tmpi = 0;
+ memcpy(&tmpi, chunk+(*chunk_ptr),delta);
+ if(tmpi >= 0)
+ sprintf(extra_tags + strlen(extra_tags), "\t%c%c:i:%d", extag[0], extag[1], tmpi);
+ }else if(extype == 'Z'){
+ sprintf(extra_tags + strlen(extra_tags), "\t%c%c:Z:", extag[0], extag[1]);
+ *(extra_tags + strlen(extra_tags)+delta-1) = 0;
+ memcpy(extra_tags + strlen(extra_tags), chunk + (*chunk_ptr), delta - 1);
+ }else if(extype == 'A'){
+ sprintf(extra_tags + strlen(extra_tags), "\t%c%c:A:%c", extag[0], extag[1], *(chunk + *chunk_ptr) );
+ }
}
+
if((*chunk_ptr) + delta > chunk_limit) return -1;
(*chunk_ptr)+=delta;
@@ -694,19 +702,10 @@ int PBam_chunk_gets(char * chunk, int *chunk_ptr, int chunk_limit, SamBam_Refere
long long int templete_length = aln -> templete_length;
- char nh_tag [20];
- char hi_tag [20];
- nh_tag[0]=0;
- hi_tag[0]=0;
-
- if(nh_val>=0)
- sprintf(nh_tag, "\tNH:i:%d",nh_val);
- if(hi_val>=0)
- sprintf(hi_tag, "\tHI:i:%d",hi_val);
//fprintf(stderr, "HN_TAG=%d\n", nh_val );
- int plen = snprintf(buff, buff_len-1, "%s\t%u\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s%s\n", aln -> read_name, aln -> flags , chro_name, chro_offset, aln -> mapping_quality, cigar, mate_chro_name, mate_chro_offset, templete_length, aln -> sequence , aln -> seq_quality, nh_tag, hi_tag);
+ int plen = snprintf(buff, buff_len-1, "%s\t%u\t%s\t%u\t%d\t%s\t%s\t%u\t%lld\t%s\t%s%s\n%c", aln -> read_name, aln -> flags , chro_name, chro_offset, aln -> mapping_quality, cigar, mate_chro_name, mate_chro_offset, templete_length, aln -> sequence , aln -> seq_quality, extra_tags, 0);
//fprintf(stderr,"%s", buff);
@@ -1387,11 +1386,13 @@ int SamBam_unzip(char * out , char * in , int inlen)
if(ret != Z_STREAM_END)
{
inflateEnd(&strm);
+ SUBREADprintf("DATA ERROR! code=%d\n", ret);
return -1;
}
int have = unzip_out_max_len - strm.avail_out;
inflateEnd(&strm);
+ //SUBREADprintf("DECOMPRESS GENERATED=%d\n", have);
return have;
}
diff --git a/src/sambam-file.h b/src/sambam-file.h
index 502f48c..80ad4cf 100644
--- a/src/sambam-file.h
+++ b/src/sambam-file.h
@@ -177,4 +177,7 @@ int is_badBAM(char * fn);
int SamBam_unzip(char * out , char * in , int inlen);
int SamBam_fetch_next_chunk(SamBam_FILE *fp);
+
+int SamBam_compress_cigar(char * cigar, int * cigar_int, int * ret_coverage);
+char cigar_op_char(int ch);
#endif
diff --git a/src/seek-zlib.c b/src/seek-zlib.c
new file mode 100644
index 0000000..7ab4428
--- /dev/null
+++ b/src/seek-zlib.c
@@ -0,0 +1,338 @@
+#include <assert.h>
+#include "core.h"
+#include "seek-zlib.h"
+
+#define SEEKGZ_INIT_TEXT_SIZE (1024*1024)
+#define SEEKGZ_BINBUFF_SIZE (1*1024*1024)
+
+unsigned long long seekgz_ftello(seekable_zfile_t * fp){
+ unsigned long long ret = ftello(fp -> gz_fp);
+ ret -= fp -> stem.avail_in;
+ return ret;
+}
+
+unsigned int crc_pos(char * bin, int len){
+ unsigned int crc0 = crc32(0, NULL, 0);
+ unsigned int CRC32 = crc32(crc0, (unsigned char *) bin, len);
+ return CRC32;
+}
+
+void seekgz_binreadmore(seekable_zfile_t * fp){
+ if(feof(fp->gz_fp))return;
+
+ if(fp -> stem.avail_in < SEEKGZ_BINBUFF_SIZE / 2 )
+ {
+ if(fp -> in_pointer > 0 && fp -> stem.avail_in > 0){
+ int i;
+ for(i = 0 ; i < fp -> stem.avail_in ; i ++){
+ fp -> current_chunk_bin[i] = fp -> current_chunk_bin[i + fp -> in_pointer];
+ }
+ }
+ fp -> in_pointer = 0;
+
+ int readlen = fread(fp -> current_chunk_bin + fp -> stem.avail_in, 1 , SEEKGZ_BINBUFF_SIZE - fp -> stem.avail_in , fp -> gz_fp);
+ if(readlen>0)
+ fp -> stem.avail_in += readlen;
+ fp -> stem.next_in = (unsigned char *)fp -> current_chunk_bin;
+ //SUBREADprintf("READIN: %d, POS: %llu, CRC:%u\n", fp -> stem.avail_in , ftello(fp -> gz_fp) , crc_pos(fp -> current_chunk_bin , fp -> stem.avail_in));
+ }
+}
+
+int seekgz_bingetc(seekable_zfile_t * fp){
+ seekgz_binreadmore(fp);
+ int ret = -1;
+
+ if(fp -> stem.avail_in > 0)
+ {
+ ret = fp -> current_chunk_bin [ fp -> in_pointer ++];
+ fp -> stem.next_in = (unsigned char *)(fp -> current_chunk_bin + fp -> in_pointer);
+ fp -> stem.avail_in --;
+ if(ret<0) ret=256+ret;
+ }
+ return ret;
+
+}
+
+int seekgz_skip_header(seekable_zfile_t * fp, int tail_size){
+ int id1, id2;
+
+ if(tail_size){
+ for(id1=0; id1<tail_size; id1++)
+ seekgz_bingetc(fp);
+ }
+ id1 = seekgz_bingetc(fp);
+ id2 = seekgz_bingetc(fp);
+
+ if(id1 != 31 || id2 != 139){
+ //SUBREADprintf("header:%d,%d\n", id1, id2);
+ return 1;
+ }
+
+ seekgz_bingetc(fp); // CM
+ int FLG= seekgz_bingetc(fp); // FLG
+ seekgz_bingetc(fp);
+ seekgz_bingetc(fp);
+ seekgz_bingetc(fp);
+ seekgz_bingetc(fp);
+ seekgz_bingetc(fp); // XFL
+ seekgz_bingetc(fp); // OS
+
+ //fprintf(stderr, "FLG=%d, XFL=%d\n" , FLG, XFL);
+
+ if(FLG & 1){ // FEXT
+ unsigned short XLEN=0;
+ XLEN = seekgz_bingetc(fp);
+ XLEN += seekgz_bingetc(fp)*256;
+ for(; XLEN>0; XLEN--){
+ seekgz_bingetc(fp);
+ }
+ }
+
+ for(id1 = 3; id1 <=4; id1++){
+ if(FLG & (1<<id1)){ // FNAME or FCOMMENT
+ while(1){
+ int namec = seekgz_bingetc(fp);
+ if(0==namec) break;
+ }
+ }
+ }
+ if(FLG & (1<<1)){ // FCRC
+ seekgz_bingetc(fp);
+ seekgz_bingetc(fp);
+ }
+
+ fp -> next_block_file_offset = seekgz_ftello(fp);
+ if(fp -> block_start_in_file_offset<1)
+ fp -> block_start_in_file_offset = fp -> next_block_file_offset;
+ fp -> next_block_file_bits = 0;
+ fp -> dict_window_used = 0;
+ fp -> dict_window_pointer = 0;
+
+ fp -> is_the_last_chunk = 2;
+ return 0;
+}
+
+int seekgz_decompress_next_chunk(seekable_zfile_t * fp);
+int seekgz_open(const char * fname, seekable_zfile_t * fp){
+ memset(fp, 0, sizeof(seekable_zfile_t));
+ fp -> gz_fp = f_subr_open(fname, "rb");
+ if(NULL==fp -> gz_fp)return -1;
+ fp -> current_chunk_bin = malloc(SEEKGZ_BINBUFF_SIZE);
+ fp -> current_chunk_txt = malloc(SEEKGZ_INIT_TEXT_SIZE);
+ fp -> txt_buffer_size = SEEKGZ_INIT_TEXT_SIZE;
+
+ fp -> stem.zalloc = Z_NULL;
+ fp -> stem.zfree = Z_NULL;
+ fp -> stem.opaque = Z_NULL;
+ fp -> stem.avail_in = 0;
+ fp -> stem.next_in = Z_NULL;
+
+ int ret = seekgz_skip_header(fp,0);
+ if(ret) return 1;
+ ret = inflateInit2(&(fp -> stem), -15);
+ if(ret) return 1;
+ return 0;
+}
+
+void seekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos){
+ pos -> block_gzfile_offset = fp -> block_start_in_file_offset;
+ pos -> block_gzfile_bits = fp -> block_start_in_file_bits;
+ memcpy(pos -> dict_window, fp -> block_dict_window, fp -> block_dict_window_size);
+ pos -> block_dict_window_size = fp -> block_dict_window_size;
+ pos -> in_block_text_offset = fp -> in_block_offset;
+}
+
+void seekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos){
+ //#warning "COMMENT THIS LINE !!!!!"
+ //fprintf(stderr, "SEEK => %llu[%d] + %u ; WIN=%d CRC=%u\n", pos -> block_gzfile_offset, pos -> block_gzfile_bits, pos -> in_block_text_offset, pos -> block_dict_window_size, crc_pos( pos -> dict_window, pos -> block_dict_window_size));
+ fseeko(fp->gz_fp, pos -> block_gzfile_offset - (pos -> block_gzfile_bits?1:0), SEEK_SET);
+
+ if(Z_OK!=inflateReset(&fp->stem))
+ SUBREADprintf("FATAL: UNABLE TO INIT STREAM!\n\n\n");
+ if(pos -> block_dict_window_size>0){
+ if(pos -> block_gzfile_bits){
+ char nch = fgetc(fp->gz_fp);
+ //fprintf(stderr, "SEEK 2 FPPOS:%llu, NCH=%d\n", ftello(fp->gz_fp) , nch);
+ inflatePrime(&fp->stem, pos -> block_gzfile_bits, nch>>(8-pos -> block_gzfile_bits));
+ }
+ if(Z_OK != inflateSetDictionary(&fp->stem, (unsigned char *)pos -> dict_window, pos -> block_dict_window_size))
+ SUBREADprintf("FATAL: UNABLE TO RESET STREAM!\n\n\n");
+ }
+
+ fp -> stem.avail_in = 0;
+ fp -> in_pointer = 0;
+ fp -> txt_buffer_used = 0;
+ fp -> in_chunk_offset = 0;
+ memcpy(fp -> block_dict_window, pos -> dict_window, pos -> block_dict_window_size);
+ memcpy(fp -> dict_window, pos -> dict_window, pos -> block_dict_window_size);
+ fp -> block_dict_window_size = fp -> dict_window_used = pos -> block_dict_window_size;
+ fp -> dict_window_pointer = (pos -> block_dict_window_size<SEEKGZ_ZLIB_WINDOW_SIZE)?pos -> block_dict_window_size:0;
+ fp -> in_block_offset = 0;
+ fp -> block_start_in_file_offset = pos -> block_gzfile_offset;
+ fp -> block_start_in_file_bits = pos -> block_gzfile_bits;
+
+ unsigned int chunk_end_block_offset=0;
+ while(1){
+ seekgz_decompress_next_chunk(fp);
+ if(fp -> internal_error) break;
+ chunk_end_block_offset += fp -> txt_buffer_used;
+
+ if(chunk_end_block_offset >= pos -> in_block_text_offset){
+ fp -> in_chunk_offset = fp -> txt_buffer_used - (chunk_end_block_offset - pos -> in_block_text_offset);
+ fp -> in_block_offset = pos -> in_block_text_offset;
+ break;
+ }
+ assert(chunk_end_block_offset < SEEKGZ_INIT_TEXT_SIZE && !feof(fp->gz_fp));
+ fp -> txt_buffer_used=0;
+ }
+}
+
+
+
+int seekgz_decompress_next_chunk(seekable_zfile_t * fp){
+ unsigned int this_chunk_size = 0;
+ while(1){
+ seekgz_binreadmore(fp);
+ assert(fp -> txt_buffer_used < SEEKGZ_INIT_TEXT_SIZE * 7 / 8);
+
+ fp -> stem.avail_out = SEEKGZ_INIT_TEXT_SIZE - fp -> txt_buffer_used;
+ int out_start = fp -> txt_buffer_used;
+ fp -> stem.next_out = (unsigned char *)(fp -> current_chunk_txt + out_start);
+
+ int inlen = fp -> stem.avail_in ;
+ //fprintf(stderr,"INFLATING_0 : LEN=%u, CRC=%u\n", fp -> stem.avail_in , crc_pos( fp -> stem.next_in , fp -> stem.avail_in ));
+
+ int ret = inflate(&(fp -> stem), Z_BLOCK);
+ int have = (SEEKGZ_INIT_TEXT_SIZE - fp -> txt_buffer_used) - fp -> stem.avail_out;
+ int is_chunk_end = 0;
+
+ //#warning "COMMENT NEXT LINE!!!!!!"
+ //fprintf(stderr,"INFLATING: INLEN=%d , OLEN=%d, POS=%lld, RET=%d, TOOL=%s\n", inlen , have, seekgz_ftello(fp), ret, zlibVersion());
+ if(ret != Z_OK && ret != Z_STREAM_END){ //any error
+ SUBREADprintf("FATAL: INFLATE-ERROR=%d POS=%lld\n", ret, seekgz_ftello(fp));
+ fp -> internal_error = 1;
+ return -1;
+ }
+
+ fp -> in_pointer += inlen - fp -> stem.avail_in ;
+
+ if(have > 0){
+ fp -> txt_buffer_used += have;
+ int one_length = 0, one_src_start = 0, one_dst_start = 0;
+ int two_length = 0, two_src_start = 0, two_dst_start = 0;
+ int new_pntr = 0;
+ if(have <= SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer){
+ one_length = 0;
+ two_src_start = out_start;
+ two_dst_start = fp -> dict_window_pointer;
+ two_length = have;
+ new_pntr = two_dst_start + two_length;
+ }else if(have > SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer && have <= SEEKGZ_ZLIB_WINDOW_SIZE){
+ one_src_start = out_start + SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer;
+ one_dst_start = 0;
+ one_length = have - SEEKGZ_ZLIB_WINDOW_SIZE + fp -> dict_window_pointer;
+ two_src_start = out_start;
+ two_dst_start = fp -> dict_window_pointer;
+ two_length = SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer;
+ new_pntr = one_dst_start + one_length;
+ }else{
+ one_src_start = out_start + have - fp -> dict_window_pointer;
+ one_dst_start = 0;
+ one_length = fp -> dict_window_pointer;
+ two_src_start = out_start + have - SEEKGZ_ZLIB_WINDOW_SIZE;
+ two_dst_start = fp -> dict_window_pointer;
+ two_length = SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer;
+ new_pntr = fp -> dict_window_pointer;
+ }
+
+ if(one_length > 0)memcpy(fp -> dict_window + one_dst_start, fp -> current_chunk_txt + one_src_start, one_length);
+ //fprintf(stderr,"CPY: %d -> %d [%d] ; PNTR=%d, NEWPNTR=%d, have=%d\n", two_src_start, two_dst_start, two_length, fp -> dict_window_pointer, new_pntr, have);
+ memcpy(fp -> dict_window + two_dst_start, fp -> current_chunk_txt + two_src_start, two_length);
+ fp -> dict_window_pointer = new_pntr;
+ fp -> dict_window_used = min(fp -> dict_window_used + have, SEEKGZ_ZLIB_WINDOW_SIZE);
+
+ is_chunk_end = (fp -> stem.data_type & 128) && !(fp -> stem.data_type & 64);
+ if(is_chunk_end){
+ fp -> is_the_last_chunk = 1;
+ unsigned long long file_pos_after_avail = seekgz_ftello(fp);
+ fp -> next_block_file_offset = file_pos_after_avail;
+ fp -> next_block_file_bits = fp->stem.data_type & 7;
+ }
+ this_chunk_size += have;
+ }
+
+ if( 0 == fp -> stem.avail_in ) this_chunk_size = 0;
+
+ if(Z_STREAM_END == ret || ((is_chunk_end || 0 == fp -> stem.avail_in) && fp -> txt_buffer_used >=10)){
+ if(Z_STREAM_END == ret){
+ seekgz_skip_header(fp, 8);
+ inflateReset(&fp->stem);
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+int seekgz_next_char(seekable_zfile_t * fp){
+ if(fp -> internal_error) return -1;
+ if(fp -> in_chunk_offset >= fp -> txt_buffer_used){
+ if(feof(fp -> gz_fp) && fp -> stem.avail_in < 10 )
+ return EOF;
+ else {
+ fp -> txt_buffer_used = 0;
+ fp -> in_chunk_offset = 0;
+ int decompress_ret = seekgz_decompress_next_chunk(fp);
+ if(decompress_ret) return -1;
+ }
+ }
+ fp -> in_block_offset ++;
+ char retc = fp -> current_chunk_txt[fp -> in_chunk_offset++];
+
+ if(fp -> is_the_last_chunk && fp -> in_chunk_offset == fp -> txt_buffer_used){
+ //fprintf(stderr,"BLOCK_END_POINT ; POS=%llu ; BITS=%u\n", fp -> block_start_in_file_offset, fp -> block_start_in_file_bits);
+ fp -> in_block_offset = 0;
+ fp -> block_start_in_file_offset = fp -> next_block_file_offset;
+ fp -> block_start_in_file_bits = fp -> next_block_file_bits;
+
+ if(1 == fp -> is_the_last_chunk){
+ fp -> block_dict_window_size = fp -> dict_window_used;
+
+ if(fp -> dict_window_used < SEEKGZ_ZLIB_WINDOW_SIZE)
+ memcpy(fp -> block_dict_window , fp -> dict_window, fp -> dict_window_used);
+ else{
+ memcpy(fp -> block_dict_window , fp -> dict_window + fp -> dict_window_pointer, SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer);
+ memcpy(fp -> block_dict_window + SEEKGZ_ZLIB_WINDOW_SIZE - fp -> dict_window_pointer, fp -> dict_window, fp -> dict_window_pointer);
+ }
+ }else
+ fp -> block_dict_window_size = 0;
+
+ fp -> is_the_last_chunk = 0;
+ }
+
+ return retc;
+}
+
+int seekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size){
+ int i=0;
+ buf[0]=0;
+ while(1){
+ if(i >= buf_size - 1){
+ buf[i]=0;
+ return i;
+ }
+ int nch = seekgz_next_char(fp);
+ if(nch<0 || nch == '\n'){
+ if(i<1 && nch <0) return 0;
+ buf[i] = '\n';
+ buf[i+1]=0;
+ return i+1;
+ }else buf[i++]=nch;
+ }
+}
+
+void seekgz_close(seekable_zfile_t * fp){
+ fclose(fp -> gz_fp);
+ free(fp -> current_chunk_txt);
+ free(fp -> current_chunk_bin);
+}
diff --git a/src/seek-zlib.h b/src/seek-zlib.h
new file mode 100644
index 0000000..ff10135
--- /dev/null
+++ b/src/seek-zlib.h
@@ -0,0 +1,23 @@
+#ifndef __SEEK_ZLIB_H_
+#define __SEEK_ZLIB_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include "subread.h"
+
+// returns 0 if OK; returns 1 if the file is not indexable; returns -1 if file doesn't exist.
+int seekgz_open(const char * fname, seekable_zfile_t * fp);
+
+// returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF
+int seekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size);
+
+void seekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos);
+
+void seekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos);
+
+int seekgz_next_char(seekable_zfile_t * fp);
+
+void seekgz_close(seekable_zfile_t * fp);
+#endif
diff --git a/src/sorted-hashtable.c b/src/sorted-hashtable.c
index b39b35b..49d908e 100644
--- a/src/sorted-hashtable.c
+++ b/src/sorted-hashtable.c
@@ -33,6 +33,7 @@
#include"core.h"
#define _gehash_hash(k) ((unsigned int)(k))
+#define WITHOUT_CLUSTER_ORDERING 0
int gehash_create(gehash_t * the_table, size_t expected_size, char is_small_table)
{
@@ -698,7 +699,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
continue;
for (i=0;i<datalen;i++)
{
- if (dat[i] == kv)
+ if (dat[i] == kv && (WITHOUT_CLUSTER_ORDERING || subread_number + 1 > vote -> last_subread_cluster[offsetX][i]))
{
gene_vote_number_t test_max = (vote->votes[offsetX][i]);
test_max += 1;
@@ -709,6 +710,8 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
if (offset_from_5 +16 > vote->coverage_end [offsetX][i])
vote->coverage_end [offsetX][i] = offset_from_5+16;
+ vote -> last_subread_cluster[offsetX][i] = subread_number + 1;
+
vote->max_vote = max(vote->max_vote , test_max);
i = 9999999;
}
@@ -728,6 +731,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
vote->masks[offsetX][i]= (is_reversed?IS_NEGATIVE_STRAND:0);
vote->coverage_start [offsetX][i] = offset_from_5;
vote->coverage_end [offsetX][i] = offset_from_5+16;
+ vote -> last_subread_cluster[offsetX][i] = subread_number + 1;
if(vote->max_vote==0)
vote->max_vote = 1;
@@ -755,6 +759,10 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
for (i=0;i<datalen;i++)
{
+
+ if((!WITHOUT_CLUSTER_ORDERING ) && subread_number + 1 <= vote -> last_subread_cluster[offsetX][i])
+ continue;
+
int di = dat[i];
int dist0 = kv-di;
if( dist0 >= -indel_tolerance && dist0 <= indel_tolerance )
@@ -796,6 +804,8 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
else
vote -> indel_recorder[offsetX][i][toli+1] = subread_number+1;
+ vote -> last_subread_cluster[offsetX][i] = subread_number + 1;
+
vote->max_vote = max(vote->max_vote , test_max);
i = 9999999;
}
@@ -823,6 +833,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
vote -> quality[offsetX2][datalen2]=quality;
vote -> votes[offsetX2][datalen2]=1;
vote -> toli[offsetX2][datalen2]=0;
+ vote -> last_subread_cluster[offsetX2][datalen2] = subread_number + 1;
// data structure of recorder:
// {unsigned char subread_start; unsigned char subread_end, char indel_offset_from_start}
@@ -849,6 +860,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
{
// VER_1
+ // VER_2
struct gehash_bucket * current_bucket;
int i = 0, items;
@@ -910,6 +922,8 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
unsigned int * dat2, *dat;
dat = dat2 = vote -> pos[offsetX2];
+ //SUBREADprintf("You can find KV at %u\n", kv);
+
for(iix = 0; iix<=ii_end; iix = iix>0?-iix:(-iix+INDEL_SEGMENT_SIZE))
{
if(iix)
@@ -924,10 +938,12 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
for (i=0;i<datalen;i++)
{
+ if((!WITHOUT_CLUSTER_ORDERING ) && subread_number + 1 <= vote -> last_subread_cluster[offsetX][i]) continue;
int di = dat[i];
int dist0 = kv-di;
if( dist0 >= -indel_tolerance && dist0 <= indel_tolerance )
{
+ //SUBREADprintf("IIX = %d, BASE=%u, change_dist=%d, subread=#%d\n", iix, dat[i], dist0, subread_number);
if(is_reversed == (0!=(vote -> masks[offsetX][i]&IS_NEGATIVE_STRAND)))
{
@@ -958,6 +974,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
else
vote -> indel_recorder[offsetX][i][toli+1] = subread_number+1;
+ vote -> last_subread_cluster[offsetX][i] = subread_number + 1;
vote->max_vote = max(vote->max_vote , test_max);
i = 9999999;
}
@@ -993,6 +1010,7 @@ size_t gehash_go_q(gehash_t * the_table, gehash_key_t raw_key, int offset, int r
vote->current_indel_cursor [offsetX2][datalen2] = 0;
vote->coverage_start [offsetX2][datalen2] = offset;
vote->coverage_end [offsetX2][datalen2] = offset+16;
+ vote -> last_subread_cluster[offsetX2][datalen2] = subread_number + 1;
if (vote->max_vote==0)
vote->max_vote = 1;
diff --git a/src/subfilter.c b/src/subfilter.c
new file mode 100644
index 0000000..68903f6
--- /dev/null
+++ b/src/subfilter.c
@@ -0,0 +1,48 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include "subread.h"
+#include "core.h"
+#include "sambam-file.h"
+#include "input-files.h"
+
+
+static struct option long_options[] =
+{
+ {"in", required_argument, 0, 'i'},
+ {"filter", required_argument, 0, 'F'},
+ {0, 0, 0, 0}
+};
+
+int main(int argc, char ** argv)
+{
+ int c;
+ int option_index = 0;
+
+ optind = 1;
+ opterr = 1;
+ optopt = 63;
+
+ int sort_needed = 0;
+ char filter_mode[10];
+ char in_name[300];
+ int in_SAM = 1;
+ int out_SAM = 1;
+
+ in_name[0] = filter_mode[0]=0;
+
+
+ while ((c = getopt_long (argc, argv, "i:F:", long_options, &option_index)) != -1)
+ {
+ switch(c){
+
+ case 'F':
+ out_SAM = (strcmp(optarg, "SAM")==0);
+ break;
+ case 'i':
+ strcpy(in_name, optarg);
+ break;
+ }
+ }
+}
diff --git a/src/subread.h b/src/subread.h
index 5955d9c..6145836 100644
--- a/src/subread.h
+++ b/src/subread.h
@@ -23,6 +23,7 @@
#include <stdlib.h>
#include <pthread.h>
#include <stdio.h>
+#include <zlib.h>
#ifndef MAKE_STANDALONE
#ifndef RUNNING_ENV
@@ -32,8 +33,8 @@
#include "hashtable.h"
-#define PRINT_BOX_NOCOLOR_FOR_COLON 2
-#define PRINT_BOX_CENTER 1
+#define INPUT_BUFFER_SIZE (8*1024*1024)
+#define OUTPUT_BUFFER_SIZE (32*1024*1024)
#define SAM_FLAG_PAIRED_TASK 0x01
#define SAM_FLAG_FIRST_READ_IN_PAIR 0x40
@@ -55,18 +56,26 @@
-#define MAX_PIECE_JUNCTION_READ 7
-#define MAX_READ_LENGTH 1210
+#define MAX_THREADS 40
+#define MAX_EVENTS_IN_READ 8
+
+//#warning "============== REMOVE '* 15' FROM THE NEXT LINE ================"
+#define MAX_READ_LENGTH ( 1210 )
#define MAX_READ_NAME_LEN 100
#define MAX_CHROMOSOME_NAME_LEN 100
#define MAX_FILE_NAME_LENGTH 300
+#define MULTI_THREAD_OUTPUT_ITEMS 4096
+
+//#warning "============ CHANGE THE NEXT LINE TO 120 ========"
#define EXON_LONG_READ_LENGTH 120
#define EXON_MAX_CIGAR_LEN 48
+#define FC_CIGAR_PARSER_ITEMS 11
+
#define MAX_INDEL_SECTIONS 7
//#define XBIG_MARGIN_RECORD_SIZE 24
#define MAX_INSERTION_LENGTH 200
-#define FC_CIGAR_PARSER_ITEMS 9
+#define MAX_DELETION_LENGTH 1000
//#define BASE_BLOCK_LENGTH 15000000
//#define NEED_SUBREAD_STATISTIC
@@ -89,7 +98,9 @@
#define IS_BREAKEVEN_READ (8192*4)
#define IS_R1R2_EQUAL_LEN 1024
-#if defined(MACOS) || defined(FREEBSD)
+#define USE_POSIX_MUTEX_LOCK
+
+#if defined(MACOS) || defined(FREEBSD) || defined(USE_POSIX_MUTEX_LOCK)
typedef pthread_mutex_t subread_lock_t;
#define pthread_spinlock_t pthread_mutex_t
#define pthread_spin_lock pthread_mutex_lock
@@ -144,7 +155,7 @@ typedef pthread_spinlock_t subread_lock_t;
//#define QUALITY_KILL_SUBREAD 150
-
+typedef long long subread_read_number_t;
typedef unsigned int gehash_key_t;
typedef unsigned int gehash_data_t;
//typedef float gene_quality_score_t;
@@ -156,6 +167,7 @@ typedef short gene_vote_number_t;
#define XOFFSET_TABLE_SIZE 250000
#define ANCHORS_NUMBER 259
+#define MAX_ALIGNMENT_PER_ANCHOR 2
#define BEXT_RESULT_LIMIT 16
@@ -165,11 +177,11 @@ typedef short gene_vote_number_t;
//#define LARGE_GENE_VOTE_TABLE
#ifdef LARGE_GENE_VOTE_TABLE
#warning "Using LARGE_GENE_VOTE_TABLE"
-#define GENE_VOTE_SPACE 32
-#define GENE_VOTE_TABLE_SIZE 109
+#define GENE_VOTE_SPACE 173
+#define GENE_VOTE_TABLE_SIZE 331
#else
-#define GENE_VOTE_SPACE 8
-#define GENE_VOTE_TABLE_SIZE 61
+#define GENE_VOTE_SPACE 24
+#define GENE_VOTE_TABLE_SIZE 30
#endif
#define MAX_ANNOTATION_EXONS 30000
@@ -276,7 +288,7 @@ typedef struct {
gene_vote_number_t votes [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
gene_quality_score_t quality [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
short masks [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
- short last_offset [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
+ gene_vote_number_t last_subread_cluster [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
gene_vote_number_t indel_recorder [GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE][MAX_INDEL_TOLERANCE*3];
char current_indel_cursor[GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
char toli[GENE_VOTE_TABLE_SIZE][GENE_VOTE_SPACE];
@@ -351,15 +363,62 @@ struct thread_input_buffer {
};
+#define SEEKGZ_ZLIB_WINDOW_SIZE (32*1024)
+
+
+typedef struct {
+ FILE * gz_fp;
+ char * current_chunk_txt;
+ char * current_chunk_bin;
+ z_stream stem;
+ unsigned int in_pointer;
+ unsigned int in_chunk_offset;
+ unsigned int in_block_offset;
+ unsigned int txt_buffer_size;
+ unsigned int txt_buffer_used;
+ unsigned long long block_start_in_file_offset;
+ unsigned int block_start_in_file_bits;
+
+ unsigned long long next_block_file_offset;
+ unsigned int next_block_file_bits;
+
+ int is_the_last_chunk;
+ int internal_error;
+
+ unsigned int dict_window_pointer;
+ unsigned int dict_window_used;
+ char dict_window[SEEKGZ_ZLIB_WINDOW_SIZE];
+
+ unsigned int block_dict_window_size;
+ char block_dict_window[SEEKGZ_ZLIB_WINDOW_SIZE];
+} seekable_zfile_t;
+
+typedef struct{
+ char dict_window[SEEKGZ_ZLIB_WINDOW_SIZE];
+ unsigned long long block_gzfile_offset;
+ unsigned int block_gzfile_bits;
+ unsigned int block_dict_window_size;
+
+ unsigned int in_block_text_offset;
+} seekable_position_t;
+
+
typedef struct {
char filename [300];
int space_type ;
int file_type ;
- FILE * input_fp;
- unsigned int read_chunk_start;
+ void * input_fp; // can be system (FILE * sam or fastq or fasta), (seekable_zfile_t *)
+ unsigned long long read_chunk_start;
} gene_input_t;
+typedef struct{
+ union{
+ unsigned long long simple_file_position;
+ seekable_position_t seekable_gzip_position;
+ };
+} gene_inputfile_position_t;
+
typedef struct{
unsigned int small_key;
diff --git a/src/subtools.c b/src/subtools.c
index 569a469..6b19bbb 100644
--- a/src/subtools.c
+++ b/src/subtools.c
@@ -139,9 +139,10 @@ int main(int argc, char ** argv)
unsigned long long int added_lines = 0;
+ double t0 = miltime();
while(1)
{
- char * is_ret = SamBam_fgets(in_fp, fline, 2999, 1);
+ char * is_ret = SamBam_fgets(in_fp, fline, 2999, 0);
if(!is_ret) break;
int ret = sort_SAM_add_line(&writer, fline, strlen(fline));
if(ret<0)
@@ -155,7 +156,13 @@ int main(int argc, char ** argv)
}
//printf("N1=%llu\n", writer.unpaired_reads);
+ double t1 = miltime();
sort_SAM_finalise(&writer);
+
+ double t2 = miltime();
+
+ SUBREADprintf("Loading time: %.2f, Sorting time: %.2f\n", t1-t0, t2-t1);
+
if(writer.unpaired_reads && 0)
SUBREADprintf("%llu reads were re-ordered.\n", writer.written_reads);
diff --git a/src/t.c b/src/t.c
new file mode 100644
index 0000000..55dc9c5
--- /dev/null
+++ b/src/t.c
@@ -0,0 +1,4 @@
+main()
+{
+ *(int *)0=1;
+}
diff --git a/src/test-seek-zlib.c b/src/test-seek-zlib.c
new file mode 100644
index 0000000..ab3cc6b
--- /dev/null
+++ b/src/test-seek-zlib.c
@@ -0,0 +1,101 @@
+#include <assert.h>
+#include "seek-zlib.h"
+
+
+int main(int argc, char ** argv){
+ unsigned int tested_cell_total = atoi(argv[4]);
+ unsigned int tested_inc ;
+ tested_inc = 337;
+
+ seekable_zfile_t * fp = malloc(sizeof(seekable_zfile_t));
+ seekable_position_t * pos = malloc(tested_cell_total*sizeof(seekable_position_t));
+ seekgz_open(argv[1], fp);
+
+ char buf[1002];
+ char should[tested_cell_total][1002];
+ long long int text_pos[tested_cell_total];
+ long long int alll = 0, marked = 0;
+ long long int full_size = atoll(argv[2]);
+ long long int tested_cell_no = 0;
+ long long int step = full_size / tested_cell_total;
+
+ unsigned int rand_seed = atoi(argv[3]);
+ //step = 10;
+
+ int write_cell = -1;
+
+ long long int first_all = 0;
+ while(1){
+ int rl = seekgz_gets(fp, buf, 1000);
+ unsigned int inchunk = fp -> in_chunk_offset;
+
+ if(0){
+ if(rl>92)
+ fprintf(stderr, "LEN=%d; READ=%s", rl, buf);
+ if(rl>52 && rl<91)
+ fprintf(stderr, "LEN=%d; READ=%s", rl, buf);
+ }
+ if(rl<1) break;
+ //fprintf(stdout, "%s", buf);
+ alll += rl;
+
+ if(write_cell >=0){
+ strcpy(should[write_cell], buf);
+ write_cell = -1;
+ }
+
+ if(1&& alll - rand_seed > tested_cell_no * step )
+ {
+ if(tested_cell_no == 0)
+ first_all = alll;
+ if(tested_cell_no < tested_cell_total){
+ write_cell = ((1+tested_cell_no) * tested_inc) % tested_cell_total;
+ seekgz_tell(fp, pos+write_cell);
+ //assert(pos[write_cell].block_gzfile_offset > 10);
+ text_pos[write_cell] = alll;
+ //if(alll==925826012||alll==889250153){
+ //if(inchunk == fp->in_block_offset + 1 || inchunk == fp->in_block_offset - 1 )
+ // fprintf(stderr, "MATCH: IN_BLOCK_OFFSET=%u IN_CHUNK_OFFSET=%u/%u IS_LAST=%d\n", fp->in_block_offset, inchunk, fp -> txt_buffer_used, fp->is_the_last_chunk);
+ //}
+ if(alll==344780683){
+ char * quickview = malloc(100000);
+ quickview[0]=0;
+ //memcpy(quickview, fp -> block_dict_window, fp -> block_dict_window_size);
+ quickview[fp -> block_dict_window_size]=0;
+ fprintf(stderr, "----------------------------------------------------\n%s\n=====================================================\n\n", quickview);
+ free(quickview);
+ }
+ }
+ tested_cell_no++;
+ }
+ }
+
+ fprintf(stderr, "TOTAL=%lld\n",alll);
+ assert(tested_cell_no >= tested_cell_total);
+
+ int i, valid=0;
+ long long int last_all = 0;
+ for(i=0;i< tested_cell_total ;i++){
+ //assert(pos[i].block_gzfile_offset > 10);
+ seekgz_seek(fp, pos+i);
+ unsigned int inchunk = fp -> in_chunk_offset, chunk_size = fp -> txt_buffer_used;
+ //fprintf(stderr, "JUMPTO=%u\n", pos[i].block_gzfile_offset);
+ int rl = seekgz_gets(fp, buf, 1000);
+ if(rl <= 0) break;
+ if(strcmp(should[i], buf)!=0)
+ {
+ char * quickshow = malloc(1000000);
+ quickshow[0]=0;
+ //memcpy(quickshow , pos[i].dict_window, pos[i].block_dict_window_size);
+ quickshow[ pos[i].block_dict_window_size ] = 0;
+
+ fprintf(stderr, "=================================================\nMATCH:LEN=%d; TOTAL=%lld; GZFP=%lld; INBLOCK=%u; INCHUNK=%u/%u\t\tMATCH=%d; \nREAD=%s\nORGN=%s\n%s\n", rl, text_pos[i], pos[i].block_gzfile_offset, pos[i].in_block_text_offset, inchunk, chunk_size, strcmp(should[i], buf), buf, should[i], quickshow);
+ free(quickshow);
+ }
+ else
+ valid++;
+ last_all = text_pos[i];
+ }
+
+ fprintf(stderr, "FINISHED size=%lld first=%lld [-1]=%lld DOTS=%d/%u rand=%u\n", full_size, first_all, last_all, valid,tested_cell_total , rand_seed);
+}
diff --git a/src/test_qs.c b/src/test_qs.c
new file mode 100644
index 0000000..6ff5ec7
--- /dev/null
+++ b/src/test_qs.c
@@ -0,0 +1,106 @@
+/* Double-Click To Select Code */
+
+#include<stdio.h>
+
+void merge_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2));
+void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2));
+void merge_ints(void * arr, int start, int items1, int items2);
+int compare_ints(void* arr, int l, int r);
+int exchange_ints(void* arr, int l, int r);
+
+void main()
+{
+
+int arr[30];
+int val [] = {9,1,2,9,6,7,8,9,1,2,3,9,4,1,2,3,4,6,1,9,3,1,4,5,3,2,5,4,2,4,8};
+int x;
+
+for(x=0; x<30;x++)arr[x]=val[x];
+
+merge_sort(arr, 30, compare_ints, exchange_ints, merge_ints);
+
+for(x=0; x<30;x++)printf("V[%d]=%d\n",x, arr[x]);
+
+}
+
+int exchange_ints(void* arr, int l, int r)
+{
+ int *arri = arr;
+ int tm;
+ tm=arri[l];
+ arri[l]=arri[r];
+ arri[r]=tm;
+}
+
+int compare_ints(void* arr, int l, int r)
+{
+ int * arri = arr;
+ if(arri[l]==arri[r])return 0;
+ if(arri[l]>arri[r])return 1;
+ return -1;
+}
+
+
+
+void merge_ints(void * arr, int start, int items1, int items2)
+{
+ int r1, r2;
+ int * arri = arr;
+ r1=start;
+ r2=start+items1;
+ int * tmp = malloc(sizeof(int)*(items1+items2));
+ int x;
+
+ for(x=0; x<items1+items2; x++)
+ {
+ if((r1>= start+items1)||(r2<start+items1+items2 && arri[r1]>=arri[r2]))
+ {
+ tmp[x]=arri[r2];
+ r2++;
+ }else{
+ tmp[x]=arri[r1];
+ r1++;
+ }
+ }
+
+ memcpy(arri+start , tmp, sizeof(int)*(items1+items2));
+ free(tmp);
+}
+
+
+void merge_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2))
+{
+ if(items > 4)
+ {
+ int xx,half_point = items/2;
+
+ merge_sort_run(arr, start, half_point, compare, exchange, merge);
+ merge_sort_run(arr, start + half_point, items - half_point, compare, exchange, merge);
+ merge(arr, start, half_point, items - half_point);
+ printf("IN: %d-%d-%d\n", start, start + half_point, start+items);
+ for(xx=start; xx < items+start; xx++)
+ printf("INNER: %d\n", ((int *)arr)[xx]);
+ }
+ else
+ {
+ int i, j, xx;
+ for(i=start; i< start + items - 1; i++)
+ {
+ int min_j = i;
+ for(j=i + 1; j< start + items; j++)
+ {
+ if(compare(arr, min_j, j) > 0)
+ min_j = j;
+ }
+ if(i!=min_j)
+ exchange(arr, i, min_j);
+ }
+ printf("RD: %d-%d\n", start,start+items);
+ for(xx=start; xx < items+start; xx++)
+ printf("INRED: %d\n", ((int *)arr)[xx]);
+ }
+}
+void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2))
+{
+ merge_sort_run(arr, 0, arr_size, compare, exchange, merge);
+}
diff --git a/src/zlib_test.c b/src/zlib_test.c
new file mode 100644
index 0000000..9d96820
--- /dev/null
+++ b/src/zlib_test.c
@@ -0,0 +1,88 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "zlib.h"
+
+
+main()
+{
+ char * tdata = "ABCABC\n";
+ int tdata_len = 7, ret;
+
+ z_stream strm;
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+ ret = deflateInit(&strm, 0);
+ if (ret != Z_OK)printf("Ohh!\n");
+
+ char * out_buff = malloc(999999);
+ strm.avail_out = 99999;
+ strm.next_out = out_buff;
+ strm.next_in = tdata;
+ strm.avail_in = tdata_len;
+ ret = deflate(&strm, Z_FINISH);
+
+ int have = 99999 - strm.avail_out;
+
+ printf("RET=%d; LEN=%d\n",ret, have);
+
+ FILE * ofp = fopen("tt.gz","wb");
+ fwrite(out_buff,1,have,ofp);
+ fclose(ofp);
+
+ z_stream strmx;
+ strmx.zalloc = Z_NULL;
+ strmx.zfree = Z_NULL;
+ strmx.opaque = Z_NULL;
+ strmx.avail_in = 0;
+ strmx.next_in = Z_NULL;
+ ret = inflateInit(&strmx);
+ if (ret != Z_OK)printf("Ohh!\n");
+ char * in_buff = malloc(999999);
+
+ strmx.avail_out = 99999;
+ strmx.next_out = in_buff;
+ strmx.next_in = out_buff;
+ strmx.avail_in = have;
+ ret = inflate(&strmx, Z_FINISH);
+ have = 99999 - strmx.avail_out;
+
+
+ printf("XRET=%d; LEN=%d; RES=%s\n",ret, have, in_buff);
+ FILE * fbig = fopen("t.bin","rb");
+ int fpos = 0;
+ while(!feof(fbig))
+ {
+ int nch = fgetc(fbig);
+ if(nch<0) break;
+ in_buff[fpos++]=nch;
+ }
+ fclose(fbig);
+
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+ strm.avail_in = 0;
+ strm.next_in = Z_NULL;
+
+ ret = deflateInit(&strm, 1);
+ if (ret != Z_OK)printf("Ohh!\n");
+
+ strm.next_in = in_buff;
+ strm.avail_in = fpos;
+ strm.next_out = out_buff;
+ strm.avail_out = 999999;
+
+ ret = deflate(&strm, Z_FINISH);
+
+ have = 999999 - strmx.avail_out;
+
+ printf("XRET=%d; LEN=%d; RES=%s\n",ret, have, in_buff);
+ FILE * fbigo = fopen("tt.bin.gz","wb");
+ fwrite(out_buff, 1, have, fbigo);
+ fclose(fbigo);
+}
diff --git a/src/zpipe.c b/src/zpipe.c
deleted file mode 100644
index 0265b07..0000000
--- a/src/zpipe.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* zpipe.c: example of proper use of zlib's inflate() and deflate()
- Not copyrighted -- provided to the public domain
- Version 1.2 9 November 2004 Mark Adler */
-
-/* Version history:
- 1.0 30 Oct 2004 First version
- 1.1 8 Nov 2004 Add void casting for unused return values
- Use switch statement for inflate() return values
- 1.2 9 Nov 2004 Add assertions to document zlib guarantees
- 1.3 6 Apr 2005 Remove incorrect assertion in inf()
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include "zlib.h"
-
-#define CHUNK 16384
-
-/* Compress from file source to file dest until EOF on source.
- def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
- allocated for processing, Z_STREAM_ERROR if an invalid compression
- level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
- version of the library linked do not match, or Z_ERRNO if there is
- an error reading or writing the files. */
-int def(FILE *source, FILE *dest, int level)
-{
- int ret, flush;
- unsigned have;
- z_stream strm;
- char in[CHUNK];
- char out[CHUNK];
-
- /* allocate deflate state */
- strm.zalloc = Z_NULL;
- strm.zfree = Z_NULL;
- strm.opaque = Z_NULL;
- ret = deflateInit(&strm, level);
- if (ret != Z_OK)
- return ret;
-
- /* compress until end of file */
- do {
- strm.avail_in = fread(in, 1, CHUNK, source);
- if (ferror(source)) {
- (void)deflateEnd(&strm);
- return Z_ERRNO;
- }
- flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
- strm.next_in = in;
-
- /* run deflate() on input until output buffer not full, finish
- compression if all of source has been read in */
- do {
- strm.avail_out = CHUNK;
- strm.next_out = out;
- ret = deflate(&strm, flush); /* no bad return value */
- assert(ret != Z_STREAM_ERROR); /* state not clobbered */
- have = CHUNK - strm.avail_out;
- if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
- (void)deflateEnd(&strm);
- return Z_ERRNO;
- }
- } while (strm.avail_out == 0);
- assert(strm.avail_in == 0); /* all input will be used */
-
- /* done when last data in file processed */
- } while (flush != Z_FINISH);
- assert(ret == Z_STREAM_END); /* stream will be complete */
-
- /* clean up and return */
- (void)deflateEnd(&strm);
- return Z_OK;
-}
-
-/* Decompress from file source to file dest until stream ends or EOF.
- inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
- allocated for processing, Z_DATA_ERROR if the deflate data is
- invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
- the version of the library linked do not match, or Z_ERRNO if there
- is an error reading or writing the files. */
-int inf(FILE *source, FILE *dest)
-{
- int ret;
- unsigned have;
- z_stream strm;
- char in[CHUNK];
- char out[CHUNK];
-
- /* allocate inflate state */
- strm.zalloc = Z_NULL;
- strm.zfree = Z_NULL;
- strm.opaque = Z_NULL;
- strm.avail_in = 0;
- strm.next_in = Z_NULL;
- ret = inflateInit(&strm);
- if (ret != Z_OK)
- return ret;
-
- /* decompress until deflate stream ends or end of file */
- do {
- strm.avail_in = fread(in, 1, CHUNK, source);
- if (ferror(source)) {
- (void)inflateEnd(&strm);
- return Z_ERRNO;
- }
- if (strm.avail_in == 0)
- break;
- strm.next_in = in;
-
- /* run inflate() on input until output buffer not full */
- do {
- strm.avail_out = CHUNK;
- strm.next_out = out;
- ret = inflate(&strm, Z_NO_FLUSH);
- assert(ret != Z_STREAM_ERROR); /* state not clobbered */
- switch (ret) {
- case Z_NEED_DICT:
- ret = Z_DATA_ERROR; /* and fall through */
- case Z_DATA_ERROR:
- case Z_MEM_ERROR:
- (void)inflateEnd(&strm);
- return ret;
- }
- have = CHUNK - strm.avail_out;
- if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
- (void)inflateEnd(&strm);
- return Z_ERRNO;
- }
- } while (strm.avail_out == 0);
-
- /* done when inflate() says it's done */
- } while (ret != Z_STREAM_END);
-
- /* clean up and return */
- (void)inflateEnd(&strm);
- return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
-}
-
-/* report a zlib or i/o error */
-void zerr(int ret)
-{
- fputs("zpipe: ", stderr);
- switch (ret) {
- case Z_ERRNO:
- if (ferror(stdin))
- fputs("error reading stdin\n", stderr);
- if (ferror(stdout))
- fputs("error writing stdout\n", stderr);
- break;
- case Z_STREAM_ERROR:
- fputs("invalid compression level\n", stderr);
- break;
- case Z_DATA_ERROR:
- fputs("invalid or incomplete deflate data\n", stderr);
- break;
- case Z_MEM_ERROR:
- fputs("out of memory\n", stderr);
- break;
- case Z_VERSION_ERROR:
- fputs("zlib version mismatch!\n", stderr);
- }
-}
-
-/* compress or decompress from stdin to stdout */
-int main(int argc, char **argv)
-{
- int ret;
-
- /* do compression if no arguments */
- if (argc == 1) {
- ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
- if (ret != Z_OK)
- zerr(ret);
- return ret;
- }
-
- /* do decompression if -d specified */
- else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
- ret = inf(stdin, stdout);
- if (ret != Z_OK)
- zerr(ret);
- return ret;
- }
-
- /* otherwise, report usage */
- else {
- fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
- return 1;
- }
-}
-
diff --git a/test/featureCounts/data/DEL4-1385690721.FC b/test/featureCounts/data/DEL4-1385690721.FC
deleted file mode 100644
index 745aff7..0000000
--- a/test/featureCounts/data/DEL4-1385690721.FC
+++ /dev/null
@@ -1,9 +0,0 @@
-# Program:featureCounts v1.4.3; Command:"../../bin/featureCounts" "-F" "SAF" "-p" "-A" "data/test-chralias.txt" "-o" "data/DEL4-1385690721.FC" "-a" "data/test-chralias.SAF" "data/test-chralias.sam"
-Geneid Chr Start End Strand Length data/test-chralias.sam
-simu_gene1 chr3;chr3;chr3 100;20000;40000 10000;30000;89000 +;+;+ 68903 16
-simu_gene2 chr3;chr3 100010;102000 101000;131000 +;+ 29992 5
-simu_gene3 chr3;chr3;chr3;chr3 500010;502000;504000;600000 501000;503000;529000;669000 -;-;-;- 95994 8
-simu_gene4 chr3;chr3;chr3 602000;672000;702000 631000;699000;719000 +;+;+ 73003 6
-simu_gene5 chr4;chr4;chr4;chr4 20000;120000;200000;220000 100000;190000;210000;300000 -;-;-;- 240004 48
-simu_gene6 chr4;chr4 420000;500000 490000;560000 -;- 130002 22
-simu_gene7 chr5;chr5;chr5 120000;500000;970000 490000;960000;1000000 -;-;- 860003 169
diff --git a/test/featureCounts/data/DEL4-1385690721.FC.summary b/test/featureCounts/data/DEL4-1385690721.FC.summary
deleted file mode 100644
index 893999d..0000000
--- a/test/featureCounts/data/DEL4-1385690721.FC.summary
+++ /dev/null
@@ -1,9 +0,0 @@
-Status data/test-chralias.sam
-Assigned 274
-Unassigned_Ambiguity 4
-Unassigned_MultiMapping 0
-Unassigned_NoFeatures 322
-Unassigned_Unmapped 0
-Unassigned_MappingQuality 0
-Unassigned_FragementLength 0
-Unassigned_Chimera 0
diff --git a/test/featureCounts/data/corner-BINS.SAF b/test/featureCounts/data/corner-BINS.SAF
new file mode 100644
index 0000000..b8974db
--- /dev/null
+++ b/test/featureCounts/data/corner-BINS.SAF
@@ -0,0 +1,7 @@
+GeneID Chr Start End Strand
+bin1 chrX 10000 10049 +
+bin2 chrX 10050 10099 +
+bin3 chrX 10100 10149 +
+bin4 chrX 10150 10199 +
+bin5 chrX 10200 10249 +
+bin6 chrX 10250 10299 +
diff --git a/test/featureCounts/data/corner-BINS.sam b/test/featureCounts/data/corner-BINS.sam
new file mode 100644
index 0000000..546d001
--- /dev/null
+++ b/test/featureCounts/data/corner-BINS.sam
@@ -0,0 +1,9 @@
+R1.1 0 chrX 10000 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10040 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10080 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10120 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10160 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10200 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10240 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10280 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+R1.1 0 chrX 10320 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
diff --git a/test/featureCounts/data/corner-JUNC.sam b/test/featureCounts/data/corner-JUNC.sam
index a4e8bc9..85f20fd 100644
--- a/test/featureCounts/data/corner-JUNC.sam
+++ b/test/featureCounts/data/corner-JUNC.sam
@@ -5,14 +5,14 @@
@PG ID:subread PN:subread VN:1.4.0b4
chr3_271603_272081_0:0:0_0:0:0_2 99 chr3 271603 199 30M1000N70M = 271982 479 CAGGGAAAAGCAGGTGGAAAAACAGAAATCGAACATAAAGATGGTAGACTCCAACCCAAACACTCTAACACCTACATTAAATACAAATGGTTAAATTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0
chr3_271603_272081_0:0:0_0:0:0_2 147 chr3 271982 199 10M500N90M = 271603 -479 GAAGAACTGAAAACCAGGACTCCAAGAGACATTTGTACAACCATATCTTAGCAGCATTGTTCACCACAACCACCATGTCTTAGTGAAAGGTGACAACATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0
-chr3_827607_828091_0:0:0_0:0:0_3 83 chr3 827992 199 100M = 827607 -485 CCCACAGAGCTGGGATCATAGGCGTGAGCCACCACACCCAGATGAAATATTTTTAAGTAAATTACAGGTATCATGACATCTCACCCCTGAGTACTTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1
+chr3_827607_828091_0:0:0_0:0:0_3 1107 chr3 827992 199 100M = 827607 -485 CCCACAGAGCTGGGATCATAGGCGTGAGCCACCACACCCAGATGAAATATTTTTAAGTAAATTACAGGTATCATGACATCTCACCCCTGAGTACTTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1
chr3_827607_828091_0:0:0_0:0:0_3 163 chr3 827607 199 100M = 827992 485 TGGGTCTGGAGGCTGGGTGGGGTTGGGGGACTCAGCGTCACGGTGACATCAGCCCTGCGGCCAGCAGCTCGGCTGACCCCGGGTCTGGAGGCCAGGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1
chr3_720214_720690_0:0:0_0:0:0_4 99 chr3 720214 199 100M = 720591 477 CTGGCCACAGGCACTGGAGCCACGAAAGCAACAGCCCTGGGCAGCCCAGCACCATCCTGGGTTCCCTGCTGCCGGCGCCAGCCCCACGTACCCCCGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2
-chr3_720214_720690_0:0:0_0:0:0_4 147 chr3 720591 199 100M = 720214 -477 CAGCCATGCCCCTGCCACACACACAGAAGACTCCCCACATCAGAGGGGAGGTCAGAGGTCTCAAAGGTCAGGTTAGAGCTGGGTCAATCCGTTTCCATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2
+chr3_720214_720690_0:0:0_0:0:0_4 1171 chr3 720591 199 100M = 720214 -477 CAGCCATGCCCCTGCCACACACACAGAAGACTCCCCACATCAGAGGGGAGGTCAGAGGTCTCAAAGGTCAGGTTAGAGCTGGGTCAATCCGTTTCCATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2
chr3_447928_448462_0:0:0_0:0:0_5 99 chr3 447928 199 10M2000N90M = 448363 535 GACAGTGAGGCCACCTGGATATCTAGGGTCCCACAGTAGACAGGGATGGGGTGGTCCTGGGGGACAGGGACACCTGCCTTCCACACAACCGCACTGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:3
chr3_447928_448462_0:0:0_0:0:0_5 147 chr3 448363 199 100M = 447928 -535 CACCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGGCAGGATGGTCTCAAACTCCTGACCTTGTGATCCGCCCGCCTCGGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:3
-chr3_260025_260490_0:0:0_0:0:0_6 83 chr3 260391 199 100M = 260025 -466 CATTTCTTTCCTTATGTATAAACAGTTGCTAAAAAGACTTTTCTTTCCATGTGGAATTACGTTGACATCTTCATTGAAAATCAATGGACTATAGAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4
-chr3_260025_260490_0:0:0_0:0:0_6 163 chr3 260025 199 100M = 260391 466 TATTTTTAGTAGAGACGGGGTTGCACCATGTTGGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4
+chr3_260025_260490_0:0:0_0:0:0_6 1107 chr3 260391 199 100M = 260025 -466 CATTTCTTTCCTTATGTATAAACAGTTGCTAAAAAGACTTTTCTTTCCATGTGGAATTACGTTGACATCTTCATTGAAAATCAATGGACTATAGAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4
+chr3_260025_260490_0:0:0_0:0:0_6 1207 chr3 260025 199 100M = 260391 466 TATTTTTAGTAGAGACGGGGTTGCACCATGTTGGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4
chr3_665069_665608_0:0:0_0:0:0_7 99 chr3 665069 199 100M = 665509 540 AGTTTTGCCTTGTAGCCCAGGCTGGAGTGCAGTGGCGCAATCTCTGGTCACTGCAACCTCCGCCTGCCGGGTTCAAGCGATTCTCCTTCCTCAGCCTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5
chr3_665069_665608_0:0:0_0:0:0_7 147 chr3 665509 199 40M400N60M = 665069 -540 GGCTAAGTTTTTGTATTTTAGTAGAGACGGGGTTTCACCATGTTACCAAGGCTGGTTGCAAACTCCTGAGCTCAGGCGATCCACCCGCCTCAGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5
chr3_107489_107962_0:0:0_0:0:0_8 83 chr3 107863 199 100M = 107489 -474 ACGGCTTCCTGCCCCCCGCGCAGGCGGAGATGTTCGCCTGGCAGCAGGAGCTCCTGCGGAAGCAGAACCTGGCCCGGTAGGTGCGGGGAGGCGGGCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:6
diff --git a/test/featureCounts/data/test-chrname.sam b/test/featureCounts/data/test-chrname.sam
index 29c8c1a..71df299 100644
--- a/test/featureCounts/data/test-chrname.sam
+++ b/test/featureCounts/data/test-chrname.sam
@@ -3,17 +3,17 @@
@SQ SN:4 LN:999950
@SQ SN:chr5 LN:999950
@PG ID:subread PN:subread VN:1.4.0b4
-chr3_271603_272081_0:0:0_0:0:0_2 99 chr3 271603 199 100M = 271982 479 CAGGGAAAAGCAGGTGGAAAAACAGAAATCGAACATAAAGATGGTAGACTCCAACCCAAACACTCTAACACCTACATTAAATACAAATGGTTAAATTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0 NH:i:1
+chr3_271603_272081_0:0:0_0:0:0_2 1123 chr3 271603 199 100M = 271982 479 CAGGGAAAAGCAGGTGGAAAAACAGAAATCGAACATAAAGATGGTAGACTCCAACCCAAACACTCTAACACCTACATTAAATACAAATGGTTAAATTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0 NH:i:1
chr3_271603_272081_0:0:0_0:0:0_2 147 chr3 271982 199 100M = 271603 -479 GAAGAACTGAAAACCAGGACTCCAAGAGACATTTGTACAACCATATCTTAGCAGCATTGTTCACCACAACCACCATGTCTTAGTGAAAGGTGACAACATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0 NH:i:1
chr3_827607_828091_0:0:0_0:0:0_3 83 chr3 827992 199 100M = 827607 -485 CCCACAGAGCTGGGATCATAGGCGTGAGCCACCACACCCAGATGAAATATTTTTAAGTAAATTACAGGTATCATGACATCTCACCCCTGAGTACTTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1 NH:i:1
chr3_827607_828091_0:0:0_0:0:0_3 163 chr3 827607 199 100M = 827992 485 TGGGTCTGGAGGCTGGGTGGGGTTGGGGGACTCAGCGTCACGGTGACATCAGCCCTGCGGCCAGCAGCTCGGCTGACCCCGGGTCTGGAGGCCAGGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1 NH:i:1
-chr3_720214_720690_0:0:0_0:0:0_4 99 chr3 720214 199 100M = 720591 477 CTGGCCACAGGCACTGGAGCCACGAAAGCAACAGCCCTGGGCAGCCCAGCACCATCCTGGGTTCCCTGCTGCCGGCGCCAGCCCCACGTACCCCCGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2 NH:i:1
+chr3_720214_720690_0:0:0_0:0:0_4 1123 chr3 720214 199 100M = 720591 477 CTGGCCACAGGCACTGGAGCCACGAAAGCAACAGCCCTGGGCAGCCCAGCACCATCCTGGGTTCCCTGCTGCCGGCGCCAGCCCCACGTACCCCCGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2 NH:i:1
chr3_720214_720690_0:0:0_0:0:0_4 147 chr3 720591 199 100M = 720214 -477 CAGCCATGCCCCTGCCACACACACAGAAGACTCCCCACATCAGAGGGGAGGTCAGAGGTCTCAAAGGTCAGGTTAGAGCTGGGTCAATCCGTTTCCATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2 NH:i:1
-chr3_447928_448462_0:0:0_0:0:0_5 99 chr3 447928 199 100M = 448363 535 GACAGTGAGGCCACCTGGATATCTAGGGTCCCACAGTAGACAGGGATGGGGTGGTCCTGGGGGACAGGGACACCTGCCTTCCACACAACCGCACTGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:3 NH:i:1
+chr3_447928_448462_0:0:0_0:0:0_5 1123 chr3 447928 199 100M = 448363 535 GACAGTGAGGCCACCTGGATATCTAGGGTCCCACAGTAGACAGGGATGGGGTGGTCCTGGGGGACAGGGACACCTGCCTTCCACACAACCGCACTGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:3 NH:i:1
chr3_447928_448462_0:0:0_0:0:0_5 147 chr3 448363 199 100M = 447928 -535 CACCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGGCAGGATGGTCTCAAACTCCTGACCTTGTGATCCGCCCGCCTCGGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:3 NH:i:1
chr3_260025_260490_0:0:0_0:0:0_6 83 chr3 260391 199 100M = 260025 -466 CATTTCTTTCCTTATGTATAAACAGTTGCTAAAAAGACTTTTCTTTCCATGTGGAATTACGTTGACATCTTCATTGAAAATCAATGGACTATAGAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4 NH:i:1
chr3_260025_260490_0:0:0_0:0:0_6 163 chr3 260025 199 100M = 260391 466 TATTTTTAGTAGAGACGGGGTTGCACCATGTTGGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4 NH:i:1
-chr3_665069_665608_0:0:0_0:0:0_7 99 chr3 665069 199 100M = 665509 540 AGTTTTGCCTTGTAGCCCAGGCTGGAGTGCAGTGGCGCAATCTCTGGTCACTGCAACCTCCGCCTGCCGGGTTCAAGCGATTCTCCTTCCTCAGCCTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5 NH:i:1
+chr3_665069_665608_0:0:0_0:0:0_7 1123 chr3 665069 199 100M = 665509 540 AGTTTTGCCTTGTAGCCCAGGCTGGAGTGCAGTGGCGCAATCTCTGGTCACTGCAACCTCCGCCTGCCGGGTTCAAGCGATTCTCCTTCCTCAGCCTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5 NH:i:1
chr3_665069_665608_0:0:0_0:0:0_7 147 chr3 665509 199 100M = 665069 -540 GGCTAAGTTTTTGTATTTTAGTAGAGACGGGGTTTCACCATGTTACCAAGGCTGGTTGCAAACTCCTGAGCTCAGGCGATCCACCCGCCTCAGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5 NH:i:1
chr3_107489_107962_0:0:0_0:0:0_8 83 chr3 107863 199 100M = 107489 -474 ACGGCTTCCTGCCCCCCGCGCAGGCGGAGATGTTCGCCTGGCAGCAGGAGCTCCTGCGGAAGCAGAACCTGGCCCGGTAGGTGCGGGGAGGCGGGCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:6 NH:i:1
chr3_107489_107962_0:0:0_0:0:0_8 163 chr3 107489 199 100M = 107863 474 GGGGCCGGCAATTAGCGGAGGCGGCGGGGGAGGGGCGCCGGGGCCTTTACGGGAACGGGGGCGGGGGGGACGCCGCTCATTGCGCTGCCGTCCACAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:6 NH:i:1
@@ -21,29 +21,29 @@ chr3_552773_553208_0:0:0_0:0:0_9 83 chr3 553109 199 100M = 552773 -436 CAAGACACA
chr3_552773_553208_0:0:0_0:0:0_9 163 chr3 552773 199 100M = 553109 436 TGTAGTAATGACTTTTCTTCTTGTATTTTCCCGGATTATCCGCCCGCTCCCGTGACCTGCTTCGAGAACGGGAAGAACTCCGGCTCCGAGACTTGTGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:7 NH:i:1
chr3_692297_692829_0:0:0_0:0:0_a 83 chr3 692730 199 100M = 692297 -533 GGTCCTTCCCAGAGAGGCAAGGCTGGGGCCCTGCTGAGCCTCCAGTGAACCCGGGCCCCTGAGGTCCTGCTCCTGGCACGTGTGGGCGACTTCTCGACAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:8 NH:i:1
chr3_692297_692829_0:0:0_0:0:0_a 163 chr3 692297 199 100M = 692730 533 ATCGTGTGAGGCTCTGCTGGGTCTCCCTTGGAGGGTGTGTGTGCCCTGGGGTGGGAGATGGAGACAAGTTTGCTCCCACGGGAAGTTGGACACCAGCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:8 NH:i:1
-chr3_888926_889477_0:0:0_0:0:0_b 99 chr3 888926 199 100M = 889378 552 CAAGAATCATAGACAGCTACTACCACGGCTGCTTCGTTTGGACAAAAATAACCAGGAGGCATCCACGGGATTAGTTACACGGTATCAACTTACCACCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:9 NH:i:1
+chr3_888926_889477_0:0:0_0:0:0_b 1123 chr3 888926 199 100M = 889378 552 CAAGAATCATAGACAGCTACTACCACGGCTGCTTCGTTTGGACAAAAATAACCAGGAGGCATCCACGGGATTAGTTACACGGTATCAACTTACCACCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:9 NH:i:1
chr3_888926_889477_0:0:0_0:0:0_b 147 chr3 889378 199 100M = 888926 -552 AAAAAGGACTGCCAGGCGGAACAGTTTCCAACCGAGTTTTCGTTGAGTGAGGATCCAGCAGCCATCAAACTCAAACATAGGGGCCCGCAGGGAAACTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:9 NH:i:1
-chr3_246890_247339_0:0:0_0:0:0_c 99 chr3 246890 199 100M = 247240 450 CACCCCCACACTGCTGCCCCCCCAGATGGCCATGGCAGCCCTCCCGGGGCCCGTGTCTGCAGCCCCCACGCAGCCCGGCCGGCCGATGGAAACGCACACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:10 NH:i:1
+chr3_246890_247339_0:0:0_0:0:0_c 1123 chr3 246890 199 100M = 247240 450 CACCCCCACACTGCTGCCCCCCCAGATGGCCATGGCAGCCCTCCCGGGGCCCGTGTCTGCAGCCCCCACGCAGCCCGGCCGGCCGATGGAAACGCACACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:10 NH:i:1
chr3_246890_247339_0:0:0_0:0:0_c 147 chr3 247240 199 100M = 246890 -450 CATCCTACGGGAGAGAGCAGCTCGGAACGCAGCTTGAGTAATGCCGACTTTATATCAGCACACCCAGTGCCCCCACGTTCCCGCTGGCCCAGGTCCCGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:10 NH:i:1
chr3_843867_844485_0:0:0_0:0:0_d 81 chr3 844386 199 100M = 843867 -619 GACCAATCTGGCCAACAAGGTGAAACCCCGTCTCTACTAAAAATATAAAAATTAGCCAGGTGTGGTTGTGGGCACCTGTAATCCCACCTACTCAAGAGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:11 NH:i:1
chr3_843867_844485_0:0:0_0:0:0_d 161 chr3 843867 199 100M = 844386 619 GCAAACACCAAAAGAATGCCAACCTACGTGCCTGTCAACAGTAGAAACAAACACCTTATTGATGGAGCTGAAGCTACGATGGAAATGAGGGGCTCAGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:11 NH:i:1
-chr3_614905_615430_0:0:0_0:0:0_e 99 chr3 614905 199 100M = 615331 526 CTTCTACAGGCTTATTTTTCTAGTTCTTTCTTTTGTAATTTGTGGTTAATATCTGGCCAACTGTTAGTGACAAATTCCTTGCCCAAGAGGATCCTCGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:12 NH:i:1
+chr3_614905_615430_0:0:0_0:0:0_e 1123 chr3 614905 199 100M = 615331 526 CTTCTACAGGCTTATTTTTCTAGTTCTTTCTTTTGTAATTTGTGGTTAATATCTGGCCAACTGTTAGTGACAAATTCCTTGCCCAAGAGGATCCTCGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:12 NH:i:1
chr3_614905_615430_0:0:0_0:0:0_e 147 chr3 615331 199 100M = 614905 -526 TTCCCCACTAGGAACAAAATGTAACTGAGGACGTTGTCAGATGCTTGTCCTCGTCACCCTGAGCTTGTTGGCTGCTATCTAATTCTTATTCTTTCATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:12 NH:i:1
-chr3_133529_134066_0:0:0_0:0:0_f 99 chr3 133529 199 100M = 133967 538 TGAGCTGCGCTGCAGGGCCAGCTGTGTGGCCCTTCCTGGTCTGTGGCCTATTTTTCATGGGTGCCAACCCGGCATCAGTTCCCACGCTGGGTGTGGGTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:13 NH:i:1
+chr3_133529_134066_0:0:0_0:0:0_f 1123 chr3 133529 199 100M = 133967 538 TGAGCTGCGCTGCAGGGCCAGCTGTGTGGCCCTTCCTGGTCTGTGGCCTATTTTTCATGGGTGCCAACCCGGCATCAGTTCCCACGCTGGGTGTGGGTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:13 NH:i:1
chr3_133529_134066_0:0:0_0:0:0_f 147 chr3 133967 199 100M = 133529 -538 ATCCCAGGCCCTGTCTTAGGCTCTGGGGACACAGCAGTGAACGGAATAGACAACCCCTGTTCTGCTAGTGCTCCCATTCCCACGGCAGAGACACACGACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:13 NH:i:1
-chr3_604517_605016_0:0:0_0:0:0_10 99 chr3 604517 199 100M = 604917 500 TGCACCACCCACAGACGCGATGCGGCCGCAGCAGCTCCATGCCACGGAGATCACGTCCAGCGGCTTCCGCCTGGCCTGGCCACCCCTGCTGACCGCAGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:14 NH:i:1
+chr3_604517_605016_0:0:0_0:0:0_10 1123 chr3 604517 199 100M = 604917 500 TGCACCACCCACAGACGCGATGCGGCCGCAGCAGCTCCATGCCACGGAGATCACGTCCAGCGGCTTCCGCCTGGCCTGGCCACCCCTGCTGACCGCAGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:14 NH:i:1
chr3_604517_605016_0:0:0_0:0:0_10 147 chr3 604917 199 100M = 604517 -500 ATCTCCCACGCCCGGCCGCGCAGCCTCCGCGTGAGTTGGGCCCCAGCGCTGGGCTCAGCCGCGGCGCTCGGCTACCACGTGCAGTTCGGGCCGCTGCGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:14 NH:i:1
chr3_918655_919138_0:0:0_0:0:0_11 83 chr3 919039 199 100M = 918655 -484 CATCACACGCAGGCTCAGGCCCACAACCCCCACCCGGGCACCACGCTGACCCAAGTGCACGCACCACAGCCCTTCCCAGCCCCCGGCCACTTGCCTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:15 NH:i:1
chr3_918655_919138_0:0:0_0:0:0_11 163 chr3 918655 199 100M = 919039 484 GCACCCCAGGCCCCCTTCCCCCCTGCCCCGCGTGCCTCCATGAGGTGCGTGCAGAGCTCCTTGAACGGCTGCAGTAGGCTGGCATCTCTCATCTTCTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:15 NH:i:1
-chr3_267776_268278_0:0:0_0:0:0_12 99 chr3 267776 199 100M = 268179 503 TCCCAGAGGCTCTGTTCATGGTTTTGTTTTATTTTCCTCCAATCACTTCCACCCTTTTTTTTTTTTCCAGCTTGGATGATTTCTATTCCTCTGTCTTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:16 NH:i:1
+chr3_267776_268278_0:0:0_0:0:0_12 1123 chr3 267776 199 100M = 268179 503 TCCCAGAGGCTCTGTTCATGGTTTTGTTTTATTTTCCTCCAATCACTTCCACCCTTTTTTTTTTTTCCAGCTTGGATGATTTCTATTCCTCTGTCTTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:16 NH:i:1
chr3_267776_268278_0:0:0_0:0:0_12 147 chr3 268179 199 100M = 267776 -503 CAGAAGTTGCAGTGAGACGAGATCGTGCCACTGAACTCCAGCCTGGGTGACAGAGTAGAACTGTCTCAAAAATAAATAAATAAAATAAAATTTCAGATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:16 NH:i:1
chr3_729587_730090_0:0:0_0:0:0_13 83 chr3 729991 199 100M = 729587 -504 TGAACCCCACACCGTGTCTGTGCGCCTCACCACGCCTACTGCCATGGGTGGCACTGGAACTAACCCGTGGACGATCCGACGTGCACCAGGAATAGAAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:17 NH:i:1
chr3_729587_730090_0:0:0_0:0:0_13 163 chr3 729587 199 100M = 729991 504 CCGGGCACTGATTGTCATAACCATTATGCAACTGGTGTTGCGTCCATCAGAATCTAGTTTAAGAATACTCTTCTCTCTATAGGAGTCTTCGCGGCAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:17 NH:i:1
chr3_630579_631075_0:0:0_0:0:0_14 83 chr3 630976 199 100M = 630579 -497 GCTTGCAGTGAGCCGAGATCGTGCCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTGTCTTTAAAAAAAAAAAAAAAAAAAAAAAAAGATTATTCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:18 NH:i:1
chr3_630579_631075_0:0:0_0:0:0_14 163 chr3 630579 199 100M = 630976 497 GCCCGCCTCAGCCTCCCAAAGTGCTGGGATTCATTCAGGTGTGGGCCACGGAGCCCGGTCAGTTTTCACTGTGCAAGTCTTTCACCTTCTCGGTCAAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:18 NH:i:1
-chr3_553408_553865_0:0:0_0:0:0_15 99 chr3 553408 199 100M = 553766 458 TCACGGGGCTGTCCGCCTTGGCTTTCTTGGCGCCCTCCAGCCTCCTCTTGGTGTTCTTCACAGACAGTGGGGAAGGCTTGCTCCCTTTACCTTCTTTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:19 NH:i:1
+chr3_553408_553865_0:0:0_0:0:0_15 1123 chr3 553408 199 100M = 553766 458 TCACGGGGCTGTCCGCCTTGGCTTTCTTGGCGCCCTCCAGCCTCCTCTTGGTGTTCTTCACAGACAGTGGGGAAGGCTTGCTCCCTTTACCTTCTTTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:19 NH:i:1
chr3_553408_553865_0:0:0_0:0:0_15 147 chr3 553766 199 100M = 553408 -458 ATTGAGACCATCCTGCCCAACATGGTGAAACCCCATCTCTACTAAAACTACAAAAATTAGCTGGGCTTGGTGGCACGCGCCTGTAGTCCCAGCTACTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:19 NH:i:1
-chr3_748616_749140_0:0:0_0:0:0_16 99 chr3 748616 199 100M = 749041 525 GACAAATACGTGACCTGTTTTTGCTGCCAAATTTTGGGATGGTCTGTTACACAGTGAAGGCTAGCTGATACGCAGTTCCTCATTTCTCCTCTGGTGATGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:20 NH:i:1
+chr3_748616_749140_0:0:0_0:0:0_16 1123 chr3 748616 199 100M = 749041 525 GACAAATACGTGACCTGTTTTTGCTGCCAAATTTTGGGATGGTCTGTTACACAGTGAAGGCTAGCTGATACGCAGTTCCTCATTTCTCCTCTGGTGATGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:20 NH:i:1
chr3_748616_749140_0:0:0_0:0:0_16 147 chr3 749041 199 100M = 748616 -525 TGCAGCTACCGGTAGGCTGCGTGCCAATAACAGCCGCCAAAGGAGTAGAAGTAATCACCAACCTGGGTCCCTTTGATCCAGGACATCCGTGGGGCACACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:20 NH:i:1
chr3_584048_584548_0:0:0_0:0:0_17 83 chr3 584449 199 100M = 584048 -501 CCTCCGGAAGCCTCTTCCCTGATGTCCCCTCCAGGCAGGCAGCCTCAGCCAGAGAGCCTGGAAATCACTGGGGCGGTGGAGCCTGGAGCCTGCTGTCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:21 NH:i:1
chr3_584048_584548_0:0:0_0:0:0_17 163 chr3 584048 199 100M = 584449 501 CGGCGGTCAGAGAGAAACAGAACACGGCAGGGAGTTTCACAAATGTTCTTCTATACAATGTCTGGAATCTATGAATAACATCGGTTTCTAAGTTATGAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:21 NH:i:1
@@ -51,7 +51,7 @@ chr3_790246_790711_0:0:0_0:0:0_18 83 chr3 790612 199 100M = 790246 -466 ACAAGGAC
chr3_790246_790711_0:0:0_0:0:0_18 163 chr3 790246 199 100M = 790612 466 GTCACACTGAGTCCCCGCCAGGGCCTCCCGAGGATCCCACTAAGGGGCATCTTCCAGGGAGCGAAGGTGGTGCGAGGCCCCGACTGGGAGTGGGGCTCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:22 NH:i:1
chr3_218192_218639_0:0:0_0:0:0_19 83 chr3 218540 199 100M = 218192 -448 CTGGGGCATCCAAGCCCCTGGTCTCCACTCCATACCACCCTACGCCTACCTCCTTGATCTCTGCGCCCAGCCTTGGCTGTGCTCCCCTGCTGTCTGCACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:23 NH:i:1
chr3_218192_218639_0:0:0_0:0:0_19 163 chr3 218192 199 100M = 218540 448 AGTGTGTGTGCATGGGTCCATGTGTGTATAGTGTGTGCACATGGGTCCATGTATGTGTGTGTATATGAGGGAGACACGCAGGTGTGTGTCCGAGTGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:23 NH:i:1
-chr3_912528_913023_0:0:0_0:0:0_1a 99 chr3 859918 199 100M = 860314 496 GTTTGAGTCCCTTCTGTCTACCCCTACCCCCGCCAGGGCACTGCCCCCTTGCCCGGAAGAGGCAGCGGCACCCCCAGCCCCTTGGGGAGGATGCCCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:24 NH:i:1
+chr3_912528_913023_0:0:0_0:0:0_1a 1123 chr3 859918 199 100M = 860314 496 GTTTGAGTCCCTTCTGTCTACCCCTACCCCCGCCAGGGCACTGCCCCCTTGCCCGGAAGAGGCAGCGGCACCCCCAGCCCCTTGGGGAGGATGCCCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:24 NH:i:1
chr3_912528_913023_0:0:0_0:0:0_1a 147 chr3 860314 199 100M = 859918 -496 ATGAGCGGCTGGAATTCTGAACACTGCCGTCTTCCAGCCCTAACGCTGGGCGCTGGTCCCTCTCTCCTAAGCCCACGGCTGGGCTTCCCCTGTGCCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:24 NH:i:1
chr3_475245_475772_0:0:0_0:0:0_1b 83 chr3 475673 199 100M = 475245 -528 CTATTGGGGTGAGCCCTGAAGGGGGGAAGAACTTCTGCCAGCCCCAGACTTCTGTGGCAGAGCAGCAGGTACAGGACTGGGGGTCTCTGGCACCCGCACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:25 NH:i:1
chr3_475245_475772_0:0:0_0:0:0_1b 163 chr3 475245 199 100M = 475673 528 CCGGGTCTCCGTTTCCCCAGGCCAAGCCAGCCCCTTGGTCACCCCCGAGGAGAGCAGGTGAGGAAGGGCCCCTGGGCTGTGGCCCTGCCCTCAAGTCACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:25 NH:i:1
@@ -69,53 +69,53 @@ chr3_470587_471080_0:0:0_0:0:0_21 83 chr3 470981 199 100M = 470587 -494 GGGATGGC
chr3_470587_471080_0:0:0_0:0:0_21 163 chr3 470587 199 100M = 470981 494 GAGCTGGAGCTGCAGGCACCAGGCTGCCCCAGCCAGGGACCTTGAAGGGTCCCAGCCACCCCAGAAGGACACTAGGTCCTCTCTGAGCAGCCCTCTCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:31 NH:i:1
chr3_408926_409422_0:0:0_0:0:0_22 83 chr3 409323 199 100M = 408926 -497 AGTGTCAGGCGGTGTGTGTGGTCCCGTCTTGCCTGTGGGGCCCCACCCAACACCCCGCTCTAAGCTCCCGGCTCCACTCACAGCCTGGAAACCATGCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:32 NH:i:1
chr3_408926_409422_0:0:0_0:0:0_22 163 chr3 408926 199 100M = 409323 497 CCGGACACGGGGGCCGTGAACCGACCCGAGGCCAGGCTCAGACCGGAGCCTCGCAGGAAGGCACCTTGGGCAGCAGGCTGTGAGGGGCAGTGGGTGAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:32 NH:i:1
-chr3_754888_755442_0:0:0_0:0:0_23 99 chr3 754888 199 100M = 755343 555 ATTATTCCTAGAGCTAAGTGGTGGTGGGGTGGAAAACCGAGGATGTGGAGGTGACCTCTTTTTTTTTTTTTCGAGACAGAGTCTCGCTCTGTCGTCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:33 NH:i:1
+chr3_754888_755442_0:0:0_0:0:0_23 1123 chr3 754888 199 100M = 755343 555 ATTATTCCTAGAGCTAAGTGGTGGTGGGGTGGAAAACCGAGGATGTGGAGGTGACCTCTTTTTTTTTTTTTCGAGACAGAGTCTCGCTCTGTCGTCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:33 NH:i:1
chr3_754888_755442_0:0:0_0:0:0_23 147 chr3 755343 199 100M = 754888 -555 AAGTCGTCTACCATTGCTTTTGTGGTATTTTAGAGAAAAAGGCTGAGAACCAGGTGTTGGAAATCTTTAAGGATGCCTGGGGCTTCCTTGGGTGACAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:33 NH:i:1
-chr3_72591_73147_0:0:0_0:0:0_24 99 chr3 72591 199 100M = 73048 557 CCTTAAAGTGCTGTTACACTCTCCTTTCCCAGGATGCAGCAAGCCAAAACAGTACCACTGCACGTCAGCCTGGGTGACAGAGTGAGACCCTATCTTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:34 NH:i:1
+chr3_72591_73147_0:0:0_0:0:0_24 1123 chr3 72591 199 100M = 73048 557 CCTTAAAGTGCTGTTACACTCTCCTTTCCCAGGATGCAGCAAGCCAAAACAGTACCACTGCACGTCAGCCTGGGTGACAGAGTGAGACCCTATCTTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:34 NH:i:1
chr3_72591_73147_0:0:0_0:0:0_24 147 chr3 73048 199 100M = 72591 -557 GCCTCTTGCACTGGCCAGAGGGGCTGGGGTCACTGTCTGTTTCCTTGCCCACCCTCAAGGGACAAGGAGGAGAGGTGAGGCTGTGGGCGCACCTGATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:34 NH:i:1
chr3_76941_77414_0:0:0_0:0:0_25 83 chr3 77315 199 100M = 76941 -474 GGGCAGCCACTATATGCTGGTCAACATCTAGAATTTATCAACAATTGTACAAGCCAAGTATTTTATCCTCATTTCTACAGAGGATAAAACCGAGAATAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:35 NH:i:1
chr3_76941_77414_0:0:0_0:0:0_25 163 chr3 76941 199 100M = 77315 474 GCCCTTGGTAAAAGCACACGTGTGCACGATTCTGTGTGCATGTGCTTGCGGGGGCTGGGGGCTGAGGGAGGCCATCTGGGATGTGGGGTCTGGATGTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:35 NH:i:1
chr3_820467_821036_0:0:0_0:0:0_26 83 chr3 820937 199 100M = 820467 -570 TGGAGCGCGCTCCATTTACCTGCTGGTTACCTCGTGAGGCGCCTCAGGTCTGTGTGTCTTGTAAAGGCCGATCTCGGAATTTAACTCTGAACCTTACTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:36 NH:i:1
chr3_820467_821036_0:0:0_0:0:0_26 163 chr3 820467 199 100M = 820937 570 CCGCCGCCGCCGCCGGTCCCGGAGCCAGAGAAGAAACAGCAACCGGCGCGCGCCAAAAGTATCGTCACTTCCTGTATTGGCGCGTAATGATGATATAATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:36 NH:i:1
-chr3_437104_437619_0:0:0_0:0:0_27 99 chr3 437104 199 100M = 437520 516 GACAGCAGGAAGGACAAGGAAAACAAAAATTCACAAAGGGGCTGGGCGTGGTGGCTCACACCTGTAATCCCAACACTTTGGGAGGCCCAGGCAGGCAGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:37 NH:i:1
+chr3_437104_437619_0:0:0_0:0:0_27 1123 chr3 437104 199 100M = 437520 516 GACAGCAGGAAGGACAAGGAAAACAAAAATTCACAAAGGGGCTGGGCGTGGTGGCTCACACCTGTAATCCCAACACTTTGGGAGGCCCAGGCAGGCAGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:37 NH:i:1
chr3_437104_437619_0:0:0_0:0:0_27 147 chr3 437520 199 100M = 437104 -516 CTTGAACTCGGGAGCCGGAGGCTGTAGTGAACTGAGATCATGTCACTGCACTCTAGCTGGGCCAACAGAGACCCTGTATCAAAAAAAAAAAATTCCACAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:37 NH:i:1
-chr3_678781_679282_0:0:0_0:0:0_28 99 chr3 678781 199 100M = 679183 502 CCCACTTCCAGAAGCTGAATCGTAGTGATCAGCCTGCGGCCCCCTGCAGTGGTACAGGCCTGGTTGAGCAGCCTCAGGCCGCCTCCCCATAGTGCAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:38 NH:i:1
+chr3_678781_679282_0:0:0_0:0:0_28 1123 chr3 678781 199 100M = 679183 502 CCCACTTCCAGAAGCTGAATCGTAGTGATCAGCCTGCGGCCCCCTGCAGTGGTACAGGCCTGGTTGAGCAGCCTCAGGCCGCCTCCCCATAGTGCAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:38 NH:i:1
chr3_678781_679282_0:0:0_0:0:0_28 147 chr3 679183 199 100M = 678781 -502 CCTGGTAGCCCCTCTGCAAGCCCGGCCCCTCCCGACGCTCCTGGCTTCCTCCGTGCCCCTTTCCTGGCAGCGGGTCGCCGAGATTCGGCCGGAGCCCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:38 NH:i:1
-chr3_27362_27856_0:0:0_0:0:0_29 99 chr3 27362 199 100M = 27757 495 ATAGATGATAGATAATGATAGATGATAGCTAGATAGTAGATAGATGTGATAGGTAGATGGTAGATAGAGATGATGATAGATGGATGATAGATGGATGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:39 NH:i:1
+chr3_27362_27856_0:0:0_0:0:0_29 1123 chr3 27362 199 100M = 27757 495 ATAGATGATAGATAATGATAGATGATAGCTAGATAGTAGATAGATGTGATAGGTAGATGGTAGATAGAGATGATGATAGATGGATGATAGATGGATGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:39 NH:i:1
chr3_27362_27856_0:0:0_0:0:0_29 147 chr3 27757 199 100M = 27362 -495 CATGGAAGTGGCAGCTATTCCAGAAGCTGCACCTCTCAGCATGGGCACAGAACCACAGAAGTGGAGCCAGAGTCCTCTCCACATCTCCTCTGGGTCGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:39 NH:i:1
-chr3_353185_353740_0:0:0_0:0:0_2a 99 chr3 353185 199 100M = 353641 556 TCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCGTGCACCTGTAGTCCCAGCTACTCAAGATGCTGAGGCAAGCGAATCACTTGAACCCGGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:40 NH:i:1
+chr3_353185_353740_0:0:0_0:0:0_2a 1123 chr3 353185 199 100M = 353641 556 TCTCTACTAAAAATACAAAAATTAGCTGGGTGTGGTGGCGTGCACCTGTAGTCCCAGCTACTCAAGATGCTGAGGCAAGCGAATCACTTGAACCCGGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:40 NH:i:1
chr3_353185_353740_0:0:0_0:0:0_2a 147 chr3 353641 199 100M = 353185 -556 CCATTCCATTAGTGTATATGTCTGTCTTTATGCCAGTACCACATGGTTTTGATTGCTGTAGCTTTGTAGTAAGTCCTGAAATTGGGATATGTGAGTCTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:40 NH:i:1
-chr3_25571_26096_0:0:0_0:0:0_2b 99 chr3 25571 199 100M = 25997 526 AGGTAGAACCAGGAATCTAAAGAAACCTGAGGAAGGTGCTAGAATCATCACTACCATCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATGGAGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:41 NH:i:1
+chr3_25571_26096_0:0:0_0:0:0_2b 1123 chr3 25571 199 100M = 25997 526 AGGTAGAACCAGGAATCTAAAGAAACCTGAGGAAGGTGCTAGAATCATCACTACCATCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATGGAGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:41 NH:i:1
chr3_25571_26096_0:0:0_0:0:0_2b 147 chr3 25997 199 100M = 25571 -526 GGGAACCTAAAGACATGCCCTTCTTTGAAGGCAGAAGTCCTGTCCACGTCAAATGCCCTCCCTCCATCCCTCCCAGCTCCCTAGTCATCCTTCCTATTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:41 NH:i:1
chr3_126523_127051_0:0:0_0:0:0_2c 83 chr3 126952 199 100M = 126523 -529 ACAAAGTGGTGCTGGCCTCCTGCAGCCCCTACTTCCACGCCATGTTCACAAGCAAGTACCCGCCTGGGCGGCGCTGGGGGCTCCGTGGGTCCCTCGGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:42 NH:i:1
chr3_126523_127051_0:0:0_0:0:0_2c 163 chr3 126523 199 100M = 126952 529 GTTCGCGGCTCTGACTACGCCCCAGGGGAGCAGGGGCGCAGCGGAGGCCTGGACACGGCGCGCTCCGGGGCGGGGGTCCTTGGCGGAGGTCAGGCGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:42 NH:i:1
-chr3_767232_767761_0:0:0_0:0:0_2d 99 chr3 767232 199 100M = 767662 530 CTGGATGATGGTATATGGGGTGGGGCGGCCCCCAACTATCTTGGCATGTTGGCCTGAGGACAGCTGGGGCCTTCACCCTGTGCCATGACCATTCCAGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:43 NH:i:1
+chr3_767232_767761_0:0:0_0:0:0_2d 1123 chr3 767232 199 100M = 767662 530 CTGGATGATGGTATATGGGGTGGGGCGGCCCCCAACTATCTTGGCATGTTGGCCTGAGGACAGCTGGGGCCTTCACCCTGTGCCATGACCATTCCAGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:43 NH:i:1
chr3_767232_767761_0:0:0_0:0:0_2d 147 chr3 767662 199 100M = 767232 -530 ACCTCCTGGACTGCAAAGTCATTGTCAACACCGAAACACAGGGTTTCTGACCATTGCAACCCAGGGTCCCGGCGTGTCGTGGCTGCAGACCCTGCAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:43 NH:i:1
chr3_752986_753422_0:0:0_0:0:0_2e 83 chr3 753323 199 100M = 752986 -437 TTTGAAATAATTATCCTTGGCTGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCAAGACCATCCCAGCTAACACGGTGAAACCCGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:44 NH:i:1
chr3_752986_753422_0:0:0_0:0:0_2e 163 chr3 752986 199 100M = 753323 437 GCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTGGTAAAGATGGGGTTACACTGTGTTAGCCAGGATGGTCTCAGTCTCCTGACCTCGTGATCCGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:44 NH:i:1
chr3_529850_530335_0:0:0_0:0:0_2f 83 chr3 530236 199 100M = 529850 -486 AGCATTTTGGGAGGCCAAGGCAGGCAAATCACCTGAGGTCAGGAGTTCGAGACCAGTCTGGCCAACATGGTGAAACCCCCATCTCTACTAAAAATACAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:45 NH:i:1
chr3_529850_530335_0:0:0_0:0:0_2f 163 chr3 529850 199 100M = 530236 486 CAGACTCCACCTCTGGGACTCTGCATCCGCCTCCCTTCCCGGCTGGGCACGTCTTAGGGCAGCTGTCACACCAGCCTCAGGCGAGGGCCAGGCCACGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:45 NH:i:1
-chr3_716842_717306_0:0:0_0:0:0_30 99 chr3 716842 199 100M = 717207 465 GAGCAGGCACGCTGGGCAGCTTCTCCCCAAATCACAGCACCCAAGCTGCTTTTGTTGGGCTACAATCAGCAAAAAATGAATTGAGAAGTACAAAGTAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:46 NH:i:1
+chr3_716842_717306_0:0:0_0:0:0_30 1123 chr3 716842 199 100M = 717207 465 GAGCAGGCACGCTGGGCAGCTTCTCCCCAAATCACAGCACCCAAGCTGCTTTTGTTGGGCTACAATCAGCAAAAAATGAATTGAGAAGTACAAAGTAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:46 NH:i:1
chr3_716842_717306_0:0:0_0:0:0_30 147 chr3 717207 199 100M = 716842 -465 TTTGCCCATCCTTACGCCAACGTCACATCGCCCCGAAGGCCTGAGGGAGGGTGGTGGGGCCCGAGGCCTGTGCTCACCCCATGCAGGCAGGGCCACGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:46 NH:i:1
-chr3_577580_578072_0:0:0_0:0:0_31 99 chr3 577580 199 100M = 577973 493 CGTCGAGGTCCCAGGTCTGTCCAGGTCCCAGGTCCGTCTGTCCCAGGTCCGTCCAGGTCCCAGGTCCGTCTGTCCCAGGTCTGTGCCTCCATCCATCATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:47 NH:i:1
+chr3_577580_578072_0:0:0_0:0:0_31 1123 chr3 577580 199 100M = 577973 493 CGTCGAGGTCCCAGGTCTGTCCAGGTCCCAGGTCCGTCTGTCCCAGGTCCGTCCAGGTCCCAGGTCCGTCTGTCCCAGGTCTGTGCCTCCATCCATCATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:47 NH:i:1
chr3_577580_578072_0:0:0_0:0:0_31 147 chr3 577973 199 100M = 577580 -493 ACAATGTCTGGAATCTATGAATAACATCTGTTTCTAAGTTATGAGTTGATTTTTCACTACTGAGTTTAGGCCAGGCAGGCGCAGGCCTGGTTTCGGGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:47 NH:i:1
-chr3_459379_459835_0:0:0_0:0:0_32 99 chr3 459379 199 100M = 459736 457 CCTGGCTGAGGGGCGAGGTCGGAGGGGCGTGTCGGGGCGGGGCGGGGCGGGGCGAGTGTGGTCGCGGGGGCGTGGCCGGATAGGGCATGGCGGACAGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:11 SD:i:0 SN:i:48 NH:i:1
+chr3_459379_459835_0:0:0_0:0:0_32 1123 chr3 459379 199 100M = 459736 457 CCTGGCTGAGGGGCGAGGTCGGAGGGGCGTGTCGGGGCGGGGCGGGGCGGGGCGAGTGTGGTCGCGGGGGCGTGGCCGGATAGGGCATGGCGGACAGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:11 SD:i:0 SN:i:48 NH:i:1
chr3_459379_459835_0:0:0_0:0:0_32 147 chr3 459736 199 100M = 459379 -457 AGCCGGGACCGAACGTGGTCCCCACCGCGGACAGGCGACCCTTGTGTGGGCGCCACAGAAGATGCAAACGCGACTGTGCACCTGTGTCCTGCGCGCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:48 NH:i:1
-chr3_376818_377305_0:0:0_0:0:0_33 99 chr3 376818 199 100M = 377206 488 CCCGCGGGGCAGGAGGTATGCATGGCATACGTAAGCAGAGAGCCGGAGGCAGCCATCGGCACCTAGAACGGTGCAGAGTTGGCCCAGGAGCGTGGCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:49 NH:i:1
+chr3_376818_377305_0:0:0_0:0:0_33 1123 chr3 376818 199 100M = 377206 488 CCCGCGGGGCAGGAGGTATGCATGGCATACGTAAGCAGAGAGCCGGAGGCAGCCATCGGCACCTAGAACGGTGCAGAGTTGGCCCAGGAGCGTGGCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:49 NH:i:1
chr3_376818_377305_0:0:0_0:0:0_33 147 chr3 377206 199 100M = 376818 -488 CAGGGCCAGCAGGATGGCCAGGGGGCCCAGCAGCCCCAGCACCAGGCCCAGGCCCAGGATGGCGGCAACCGCACGGCCTGCAGGAAGGGGTCTGCTGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:49 NH:i:1
-chr3_905568_905999_0:0:0_0:0:0_34 99 chr3 852293 199 100M = 852625 432 ATTTATGTTTTTGGAGAAGGAGTTTCGCTCCTGTCGCCCAGGCTGGAGTGCAGTGGTGAGATATCAGCTCACTGCAACCTCAACCTCCCAGGTTCAAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:50 NH:i:1
+chr3_905568_905999_0:0:0_0:0:0_34 1123 chr3 852293 199 100M = 852625 432 ATTTATGTTTTTGGAGAAGGAGTTTCGCTCCTGTCGCCCAGGCTGGAGTGCAGTGGTGAGATATCAGCTCACTGCAACCTCAACCTCCCAGGTTCAAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:50 NH:i:1
chr3_905568_905999_0:0:0_0:0:0_34 147 chr3 852625 199 100M = 852293 -432 GCTCAACTGGGGCTGAGGCCCACCTCTGAGCTCACTCAGGTGGACGCTGGCTGGGTTCAGTTCCTTGCTGGCTATAGGTGGAAAGGGCCCCCACCAGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:50 NH:i:1
-chr3_967612_968079_0:0:0_0:0:0_35 99 chr3 967612 199 100M = 967980 468 ATCTCTTCTAAAAACTTGTAACTGAATGAAAAGTGATACTTCTGCACTCTATAAATAAAACATGAAACTTCCCAGACTCTGCTAAAACTGGAATTGAGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:51 NH:i:1
+chr3_967612_968079_0:0:0_0:0:0_35 1123 chr3 967612 199 100M = 967980 468 ATCTCTTCTAAAAACTTGTAACTGAATGAAAAGTGATACTTCTGCACTCTATAAATAAAACATGAAACTTCCCAGACTCTGCTAAAACTGGAATTGAGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:51 NH:i:1
chr3_967612_968079_0:0:0_0:0:0_35 147 chr3 967980 199 100M = 967612 -468 CTACCTTGTTGGTGGTGTAGCTGTCCCAGATGATAAGTTTACCATCCTGCGAGGCACTGACGAGAAGCCTGGAGGGACAGACAAAAGCAAACCTATCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:51 NH:i:1
-chr3_798691_799155_0:0:0_0:0:0_36 99 chr3 798691 199 100M = 799056 465 TCGACGGCCCCACGGGGGAGCTGGCCCACGCCTTCTTCCCCCCGCACGGCGGCATCCACTTCGACGACAGCGAGTACTGGGTCCTGGGCCCCACGCGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:52 NH:i:1
+chr3_798691_799155_0:0:0_0:0:0_36 1123 chr3 798691 199 100M = 799056 465 TCGACGGCCCCACGGGGGAGCTGGCCCACGCCTTCTTCCCCCCGCACGGCGGCATCCACTTCGACGACAGCGAGTACTGGGTCCTGGGCCCCACGCGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:52 NH:i:1
chr3_798691_799155_0:0:0_0:0:0_36 147 chr3 799056 199 100M = 798691 -465 AACATTCTTATCTTTCCGTGGCTGCGGCCGAGGGCGGCTCCGCGGCTGCGCTCCAGCAGATACACCGGGCCTCGGGGAGCTGGCCCACGGGCGGCGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:52 NH:i:1
-chr3_434306_434872_0:0:0_0:0:0_37 99 chr3 434306 199 100M = 434773 567 TTGTAGAGCTGGGATCTCACTATGTTGCCCAAGGTGGTCTCAAACTCCTGGCCTCAACTGATTCTCAGGCCTCAGCCTCCGGAAGTGCTGGAATCACAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:53 NH:i:1
+chr3_434306_434872_0:0:0_0:0:0_37 1123 chr3 434306 199 100M = 434773 567 TTGTAGAGCTGGGATCTCACTATGTTGCCCAAGGTGGTCTCAAACTCCTGGCCTCAACTGATTCTCAGGCCTCAGCCTCCGGAAGTGCTGGAATCACAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:53 NH:i:1
chr3_434306_434872_0:0:0_0:0:0_37 147 chr3 434773 199 100M = 434306 -567 GGGGTGGTGATCATCATGCAAGAGAGGAGGCTGGATCAGGGACCCCAAGGCCCCTCAGCCACACCTGAGACTGGGGAGAGGGGTGGTGATGAAAACAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:53 NH:i:1
chr3_2556_3200_0:0:0_0:0:0_38 97 chr3 2556 199 100M = 3101 645 CCTGCCTCAGCCTCCCAAACTGCTGGGATTACAGGCTTGAGCCACCAAGCCTGGCCTATGTTTGTGGTCTCTTTGACTAAAAATGATCTTTTCTTCTAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:54 NH:i:1
chr3_2556_3200_0:0:0_0:0:0_38 145 chr3 3101 199 100M = 2556 -645 ATAATCAAATCTCAGTAAGAACGGTAAGGATCAGTGGTATCAGAGCCACAATCCACCGCTCCCCACCCTCAAAGCGCATTGTGATGAAAGCTCTGCTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:54 NH:i:1
-chr3_182099_182555_0:0:0_0:0:0_39 99 chr3 182099 199 100M = 182456 457 TACAGGTGCGTGTCACCACACCTGGCTAATTTTGTATTTGTTTTGTTTTGTGTTTCTTTGAGATGGAGTTTCGCTCTGTCGCCCAGGCTGGAGTGCAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:55 NH:i:1
+chr3_182099_182555_0:0:0_0:0:0_39 1123 chr3 182099 199 100M = 182456 457 TACAGGTGCGTGTCACCACACCTGGCTAATTTTGTATTTGTTTTGTTTTGTGTTTCTTTGAGATGGAGTTTCGCTCTGTCGCCCAGGCTGGAGTGCAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:55 NH:i:1
chr3_182099_182555_0:0:0_0:0:0_39 147 chr3 182456 199 100M = 182099 -457 TGTTGGCCAGGCTGGTCTCGAACTACTGACCTCAAGTGATCCGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCCTGAGCCACCACGCCCGGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:55 NH:i:1
-chr3_60424_60938_0:0:0_0:0:0_3a 99 chr3 60424 199 100M = 60839 515 AAGCACTCATCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCATGTGCCACCACGCCCATTTAATTTTGTATTTTTAGTAGAGACAGGGTTTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:56 NH:i:1
+chr3_60424_60938_0:0:0_0:0:0_3a 1123 chr3 60424 199 100M = 60839 515 AAGCACTCATCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCATGTGCCACCACGCCCATTTAATTTTGTATTTTTAGTAGAGACAGGGTTTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:56 NH:i:1
chr3_60424_60938_0:0:0_0:0:0_3a 147 chr3 60839 199 100M = 60424 -515 TTTAGTAGACACGGTGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTTGTGATCCGCCCACCTCGGCCTCCCAGAGTGCTGGGATTACAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:56 NH:i:1
chr3_372157_372672_0:0:0_0:0:0_3b 83 chr3 372573 199 100M = 372157 -516 CCCCCCTCAGGCCAACCTGGGAAGTTCCCCGGGGCTTGGGAGGGGCACCAGGAGGAGCTGGTGGGGAGACGAGAGGGTGCTGGCGCCTCCTCGGGACAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:57 NH:i:1
chr3_372157_372672_0:0:0_0:0:0_3b 163 chr3 372157 199 100M = 372573 516 GACACTGCGGACACAGCCCGGTCCCCCGCAGGTCGCCGGCAGGGGCGGGGAAACCAGGCTGGCGGGGATCAGAAAGGTTTTCTATGCAAAGTCTGCGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:57 NH:i:1
@@ -127,11 +127,11 @@ chr3_534381_534910_0:0:0_0:0:0_3e 83 chr3 534811 199 100M = 534381 -530 TCCAGGAT
chr3_534381_534910_0:0:0_0:0:0_3e 163 chr3 534381 199 100M = 534811 530 GGAGGCTGAGGCAGGAGAATTGCTTGAACCTGGGAGGTGGAGGTTGCAGTGAGCTGAAATCGCGCCACCGCACTCCAGTCTCGGGACAGAGTGAGATACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:60 NH:i:1
chr3_988857_989421_0:0:0_0:0:0_3f 83 chr3 989322 199 100M = 988857 -565 TTAGTCTGTAAAGCTACTTTAATCCTTCCAAAATCAACATGAAGAAAACAAGTTAATGTTATGAAGTACCCCGAATTAAACTGTTTTAGTTAACACACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:61 NH:i:1
chr3_988857_989421_0:0:0_0:0:0_3f 163 chr3 988857 199 100M = 989322 565 AGCAACACGGTGTCAGAAAGATTTCTGACCAAATAGTTGTTTCTAAAGGCATTATTTGAAACGGATGTTTTAATCATGAGGAAAGCTTCACTAAGGAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:61 NH:i:1
-chr3_467247_467722_0:0:0_0:0:0_40 99 chr3 467247 199 100M = 467623 476 CCACACATGCAGGGTGTGGAACCCCCCGCACTCCACCCGGCTGTCTGCCTCTGAGCCTTCTGCCTTGTGGAGCCGCTGCACAGCCTGGAGGCCTGGGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:62 NH:i:1
+chr3_467247_467722_0:0:0_0:0:0_40 1123 chr3 467247 199 100M = 467623 476 CCACACATGCAGGGTGTGGAACCCCCCGCACTCCACCCGGCTGTCTGCCTCTGAGCCTTCTGCCTTGTGGAGCCGCTGCACAGCCTGGAGGCCTGGGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:62 NH:i:1
chr3_467247_467722_0:0:0_0:0:0_40 147 chr3 467623 199 100M = 467247 -476 GGGCTGGCCTCAAGGGGGTCAGGGGAGCAGCCCCCAGGCCTGAGTTGGCACTCAGAGGCGATGGCTGTCACCCCTGCCCCTCTGTCCCACCTGGCCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:62 NH:i:1
chr3_847412_847880_0:0:0_0:0:0_41 83 chr3 847781 199 100M = 847412 -469 GATTTACAGGCATCAGCCACTGCGCCCAGCCTCCCTTTTCTTTTTTTTTCTTTTTTTTTTTTTTTTTTGCTGACTCATGGGTTAGAATTCCGGATTGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:63 NH:i:1
chr3_847412_847880_0:0:0_0:0:0_41 163 chr3 847412 199 100M = 847781 469 CTCACTGCAACCTCTACCTCCCGGGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCACCCGCCACCACGCCCAGTGAATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:63 NH:i:1
-chr3_862323_862817_0:0:0_0:0:0_42 99 chr3 799104 199 100M = 799499 495 CGCTCCAGCAGATACACCGGGCCTCGGGGAGCTGGCCCACGGGCGGCGGGGCTGGGCCCGGGGCTCCCAGGCGCTGACCCCCGGGGCCCGCAGGCGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:64 NH:i:1
+chr3_862323_862817_0:0:0_0:0:0_42 1123 chr3 799104 199 100M = 799499 495 CGCTCCAGCAGATACACCGGGCCTCGGGGAGCTGGCCCACGGGCGGCGGGGCTGGGCCCGGGGCTCCCAGGCGCTGACCCCCGGGGCCCGCAGGCGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:64 NH:i:1
chr3_862323_862817_0:0:0_0:0:0_42 147 chr3 799499 199 100M = 799104 -495 CCGGCGGCTCATGAAGAGGCTCTGCCCCAGCAGCTGCGACTTCTGCTACGGTGATGCCCACGGGGCCGGGACAGGGCTGCGTGGGAGCTGGGCCTTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:64 NH:i:1
chr3_398443_398980_0:0:0_0:0:0_43 83 chr3 398881 199 100M = 398443 -538 GGGGCAGCGCGCCGTGTCCAGGTGGAGGTGCCCGTTCCTGGACCTCAGCGAGCCTGAGCCGGGCCCGGCCGCACGCTGACCCCCGTGCTGTCCCCGACCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:65 NH:i:1
chr3_398443_398980_0:0:0_0:0:0_43 163 chr3 398443 199 100M = 398881 538 CACAGCGAGGACGTGTCTCTGGGCGCCTGGCTGGCGCCGGTGGACGTCCAGCGGGAGCACGACCCGCGCTTCGACACCGAATACCGGTCCCGCGGCTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:65 NH:i:1
@@ -139,15 +139,15 @@ chr3_806057_806506_0:0:0_0:0:0_44 83 chr3 806407 199 100M = 806057 -450 GCTGTCTT
chr3_806057_806506_0:0:0_0:0:0_44 163 chr3 806057 199 100M = 806407 450 CTCGGCTGAGACAGAGCCCGGATGCTGAGCTGGGAGGAGGCGTCGGGTGTCATGTGGGGGACAAGCCCACATCCACGTCCACCAGGCTGAGGACATAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:66 NH:i:1
chr3_484915_485324_0:0:0_0:0:0_45 83 chr3 485225 199 100M = 484915 -410 AAGCTGCTGAGGGAACCGCGCATAGGGGAGCCGCTGGCTTTTCAGGCAGTCAGGACAGGCTGAGTGGAGCTGCAGGTGAAGGTGGCACAGACCTGAGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:67 NH:i:1
chr3_484915_485324_0:0:0_0:0:0_45 163 chr3 484915 199 100M = 485225 410 TCGTAGCCCACCATCTCGCTGAAGTAGGGGAGTGCCCCGCAGTGGTCCAGGTGGAAGTGGCTAGGGGGACGCAGCACAGGTCAGCCTGGGCCCACCCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:67 NH:i:1
-chr3_279997_280451_0:0:0_0:0:0_46 99 chr3 279997 199 100M = 280352 455 GGGCTGAGGTGACTGCGTGTGGAAACACCTGCCTGACTGGCTTTTAGGAGGAGTACTGGGGGCGGGATGTGTGTCAGGAAAACAACGTTTCTGTATAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:68 NH:i:1
+chr3_279997_280451_0:0:0_0:0:0_46 1123 chr3 279997 199 100M = 280352 455 GGGCTGAGGTGACTGCGTGTGGAAACACCTGCCTGACTGGCTTTTAGGAGGAGTACTGGGGGCGGGATGTGTGTCAGGAAAACAACGTTTCTGTATAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:68 NH:i:1
chr3_279997_280451_0:0:0_0:0:0_46 147 chr3 280352 199 100M = 279997 -455 CAAATAGGGTGCAGTGCGTACTGCTCGGGTGATGGGAGCACCAAAATCTCACAAATCACCACTAAAGAGCTTACCCATGTAACCAAGTAACCAAACACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:68 NH:i:1
chr3_203442_203968_0:0:0_0:0:0_47 83 chr3 203869 199 100M = 203442 -527 TGCTCTCCATGCCCCTGCAAGCAGTCCAGCCCCGCTCACCCTGGTGAGCTGCCATCATCACCTTCTTCCCCAAGGCTCTGTCTTGGGACGTAGCAGGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:69 NH:i:1
chr3_203442_203968_0:0:0_0:0:0_47 163 chr3 203442 199 100M = 203869 527 AGCCCGCAGTGTGAGCGGAGAATGGGGGGGTGGGGCTTCCCGGAGGCCGTGATTCCTGAGCTCACTCTTGAGGGGCAGATGTGTTTCCAGCCTGGATGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:69 NH:i:1
chr3_566865_567305_0:0:0_0:0:0_48 83 chr3 567206 199 100M = 566865 -441 GTCAGGTAACTGGAAAGCGGGCACTGTGCACGCAGCCCTTGAGGGGAGCCAGGGTGGGTGGGGATCCCCCACTGTGTGCAGAAGGCAGCAGCCTTGGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:70 NH:i:1
chr3_566865_567305_0:0:0_0:0:0_48 163 chr3 566865 199 100M = 567206 441 AGCAAGACAAGCAGGTTGCGGCAAGGGTCTGCGCACTGCTGTGTAAATGTAGGGGCATCGGGGGGCAGAAAGACCAGGGGAGAAGTGTGCATCACCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:70 NH:i:1
-chr3_326245_326784_0:0:0_0:0:0_49 99 chr3 326245 199 100M = 326685 540 TGGCCGGGGCTCAGAGCAGCCCACGGAGCCAGGCCCATCCTGGGAGGCTCAGGGCTGCAGCCCATGGACTCATGAGGGGGGCTTCTGGGGTCTAAGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:71 NH:i:1
+chr3_326245_326784_0:0:0_0:0:0_49 1123 chr3 326245 199 100M = 326685 540 TGGCCGGGGCTCAGAGCAGCCCACGGAGCCAGGCCCATCCTGGGAGGCTCAGGGCTGCAGCCCATGGACTCATGAGGGGGGCTTCTGGGGTCTAAGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:71 NH:i:1
chr3_326245_326784_0:0:0_0:0:0_49 147 chr3 326685 199 100M = 326245 -540 CAGAGCCTCTGCCCAGACGTCCCGTCCCATCCCGGGGGGCTGTCACGCTTCCGCCCCACCTCTCGCCCATTCCCGGACAGTCACAGCCGTGGAACAACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:71 NH:i:1
-chr3_252268_252806_0:0:0_0:0:0_4a 99 chr3 252268 199 100M = 252707 539 TGCGGTGTGGCCGTGAGCCATGAGTGTGAACCTGAGAGCGTGTGGCTGTGAGCTGTGTGACCCTGAGAGAGTGCGGTGTGGCCATCAGGGCCTCCCCTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:72 NH:i:1
+chr3_252268_252806_0:0:0_0:0:0_4a 1123 chr3 252268 199 100M = 252707 539 TGCGGTGTGGCCGTGAGCCATGAGTGTGAACCTGAGAGCGTGTGGCTGTGAGCTGTGTGACCCTGAGAGAGTGCGGTGTGGCCATCAGGGCCTCCCCTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:72 NH:i:1
chr3_252268_252806_0:0:0_0:0:0_4a 147 chr3 252707 199 100M = 252268 -539 ACCCACGTGCACCCTGAAGGGATCTCAGGACGGAAATATGAAAGCGAGGGGCTGAGGGGGCGGAGCAGCCCTCACCACAGAGCTGTCCCGAATGGCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:72 NH:i:1
chr3_806288_806788_0:0:0_0:0:0_4b 83 chr3 806689 199 100M = 806288 -501 CAGCGCTTTGGGAGGCCGAGGCGGGTGGATCACCTGAGGTCAGGAGTTCGAGACCATCCTGGCCAACGTGGTGAAACCCCGTCTCGACTAAAAATAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:73 NH:i:1
chr3_806288_806788_0:0:0_0:0:0_4b 163 chr3 806288 199 100M = 806689 501 ACGCCCTTGGTCAGCACTGTGCCTCGCTGAGGAATGCGGGCCCCACCGGCACAGCCTGGAGCGGCCAACGAATCAGGCGGCCTCCCAGACCCTGGCGTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:73 NH:i:1
@@ -157,67 +157,67 @@ chr3_94129_94631_0:0:0_0:0:0_4d 83 chr3 94532 199 100M = 94129 -503 GGAAAATAATCA
chr3_94129_94631_0:0:0_0:0:0_4d 163 chr3 94129 199 100M = 94532 503 CCAGCAGTGCTGCGTTTTCCCAGTGAGCTGTCGTGGAGAGAGCAGAGGGGACCCAGCGCAGGCCCAGTGGCCGGTGAGGGGAGACGTGGCTCTGGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:75 NH:i:1
chr3_762106_762551_0:0:0_0:0:0_4e 83 chr3 762452 199 100M = 762106 -446 GGAACGAGCAGTCTGTGCCAGTGACCACGAGCTCAGATGCAGACGCCCCTCCCTGGCAGGGACGCCCACTCCCAGGCCAAGGCTTGTCCCCTGGGCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:76 NH:i:1
chr3_762106_762551_0:0:0_0:0:0_4e 163 chr3 762106 199 100M = 762452 446 TACAGAGAGAGGCAGAGAGATGGAGACAGAGAGGCAGAGAGAGATGGAGGGAGATGGAGACAGGGACAGAGAGATAGGGAGAGAGAGAGATGAAGAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:76 NH:i:1
-chr3_721055_721506_0:0:0_0:0:0_4f 99 chr3 721055 199 100M = 721407 452 TGCACAGCACGATGCTCTGGGCTCCCGGGATGGACTGGGATCCACTCACCACACTATGCCACAAGCTTGCCAGCTACCACTTCCATGTCAACGTATCAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:77 NH:i:1
+chr3_721055_721506_0:0:0_0:0:0_4f 1123 chr3 721055 199 100M = 721407 452 TGCACAGCACGATGCTCTGGGCTCCCGGGATGGACTGGGATCCACTCACCACACTATGCCACAAGCTTGCCAGCTACCACTTCCATGTCAACGTATCAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:77 NH:i:1
chr3_721055_721506_0:0:0_0:0:0_4f 147 chr3 721407 199 100M = 721055 -452 TGAACTCCAGCATCAAAGTCAGAGCGAGACCGTCTCGAAAAAATAAATTCACTTTGAGGCTGGGCATGGTGGCTCACTCCTGTAATCCCAGCACTATGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:77 NH:i:1
-chr3_101266_101747_0:0:0_0:0:0_50 99 chr3 101266 199 100M = 101648 482 GCCGCCACCTCGTTATGCCCGAGCATCAGAGCCGCTGTGAATTCCAGAGAGGCAGCCTGGAGATTGGCCTGCGACCCGCCGGTGAGGAGCACAGGGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:78 NH:i:1
+chr3_101266_101747_0:0:0_0:0:0_50 1123 chr3 101266 199 100M = 101648 482 GCCGCCACCTCGTTATGCCCGAGCATCAGAGCCGCTGTGAATTCCAGAGAGGCAGCCTGGAGATTGGCCTGCGACCCGCCGGTGAGGAGCACAGGGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:78 NH:i:1
chr3_101266_101747_0:0:0_0:0:0_50 147 chr3 101648 199 100M = 101266 -482 GGAGGAACGCACGCACTCCCGCAGCGCACGCATGACTGGTCCCGCCTCCTAGGGCTCCTGGACGGAAGGGGTCCCCGGTCCCGCCTCCTAGGGCTCCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:78 NH:i:1
chr3_837381_837891_0:0:0_0:0:0_51 83 chr3 837792 199 100M = 837381 -511 TGACCTGGGCTCCCCCGGCCCTCCCGGGTGGTTGCTGGCTGCTCCCTGTGGGGTGGCAGGTGGCCGGCTTCCACCCTGCCCGAGCCGCCGCCTACCTAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:79 NH:i:1
chr3_837381_837891_0:0:0_0:0:0_51 163 chr3 837381 199 100M = 837792 511 TGACTGAACACACGTGGCAGTGAGAGCCACGAAGCCACATTCATTTGGTGTCCTGAAATCTGGACAGCCCTGGTGCTTTTATAAAGTCTGCACTGAAAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:79 NH:i:1
-chr3_532631_533082_0:0:0_0:0:0_52 99 chr3 532631 199 100M = 532983 452 AGTTGAATTGGTGTGTGTAGTGTGGATGAGTGTGGATGTGTGGATGAGTGTGAATTGGTGACTGTTGAGTGTGGATGTGTGAATTGGTGAGAGTGAATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:80 NH:i:1
+chr3_532631_533082_0:0:0_0:0:0_52 1123 chr3 532631 199 100M = 532983 452 AGTTGAATTGGTGTGTGTAGTGTGGATGAGTGTGGATGTGTGGATGAGTGTGAATTGGTGACTGTTGAGTGTGGATGTGTGAATTGGTGAGAGTGAATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:80 NH:i:1
chr3_532631_533082_0:0:0_0:0:0_52 147 chr3 532983 199 100M = 532631 -452 ATTGGTGAGTTGAACTGGTGTGTGTAGTGTGGATGAGTGTGAATTGGTGAATGTCGATAGGTGTGAATTGTTGAGTGAGTGTATGAATGAGTGTGGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:80 NH:i:1
chr3_647238_647676_0:0:0_0:0:0_53 83 chr3 647577 199 100M = 647238 -439 CTCTGGGGCAGCCACCGTGGAGCGGGAGATGGAGCTGCGGCACAAGAATGAGATGCTGCGAGTGGAGACCGAGGCCCGGGCGCGCGCCAAGGCCGAGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:81 NH:i:1
chr3_647238_647676_0:0:0_0:0:0_53 163 chr3 647238 199 100M = 647577 439 CAGGGCCGGCCTGTGGCGCTGTCCCTACCAAGGTCTGTGTGTGTCTGTGGCACGGGCCTGTCCATGGACTGGGCTTGTCCGTGGAGTGGGTCGGTCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:81 NH:i:1
chr3_299390_299874_0:0:0_0:0:0_54 83 chr3 299775 199 100M = 299390 -485 CCCTAAACAGTTCTGTGCTGTCCTGGGAGCACCAGTAACATCCAGTTCCCCGAGGGACCAGGGGGCTGCCTCAGGGGGGAGGGGTCTCCTGCAGGCCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:82 NH:i:1
chr3_299390_299874_0:0:0_0:0:0_54 163 chr3 299390 199 100M = 299775 485 TGGGCGTGGTGGCGGGCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACTCGGGAGGCGGAGCTTGCAGTGAGCCGAGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:82 NH:i:1
-chr3_460523_461019_0:0:0_0:0:0_55 99 chr3 460523 199 100M = 460920 497 ACCCGGCACCCCCCGCAGTGGGCCCCCCAGCCCCGTCTGTGGCTGGCCCAGGCTGAAATGTCTCCACCCAACCAACGGGCAGCATGGCTCCAGAAGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:83 NH:i:1
+chr3_460523_461019_0:0:0_0:0:0_55 1123 chr3 460523 199 100M = 460920 497 ACCCGGCACCCCCCGCAGTGGGCCCCCCAGCCCCGTCTGTGGCTGGCCCAGGCTGAAATGTCTCCACCCAACCAACGGGCAGCATGGCTCCAGAAGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:83 NH:i:1
chr3_460523_461019_0:0:0_0:0:0_55 147 chr3 460920 199 100M = 460523 -497 CCTGCGTCGAAGTAGGAGAAGAGCGAGTCCAGCTCGTCGGGACAGAAGAGGGAGTCTCGGCGGAACTTACGATCCAGGGTGCCCACTGCAGGGGCAGTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:83 NH:i:1
chr3_116526_117003_0:0:0_0:0:0_56 83 chr3 116904 199 100M = 116526 -478 TCATCTGTTCCTGCCTGCTCTCCTCCCTGTTCCCGCCTTTCCTACTTCTTCCCTTGGACTTCCCACCTCCCTTCCCAGCCTTGAGTTTCTAAGGCTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:84 NH:i:1
chr3_116526_117003_0:0:0_0:0:0_56 163 chr3 116526 199 100M = 116904 478 TCCCTACAGGAGCAGGCAGAGGTGCCACACGCCCACCACAGCCTCACTCACCTCCAGGATGAAAGGCAGCACCGGGATGAAGGCCCCCGAGCTCCCCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:84 NH:i:1
-chr3_713984_714541_0:0:0_0:0:0_57 99 chr3 713984 199 100M = 714442 558 ACAGGAGCAAGACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAACTCCCAAGGATGGGCCAAGAAAATTGTCCAGAGGTGAGATGTGGCAGCCACGGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:85 NH:i:1
+chr3_713984_714541_0:0:0_0:0:0_57 1123 chr3 713984 199 100M = 714442 558 ACAGGAGCAAGACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAACTCCCAAGGATGGGCCAAGAAAATTGTCCAGAGGTGAGATGTGGCAGCCACGGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:85 NH:i:1
chr3_713984_714541_0:0:0_0:0:0_57 147 chr3 714442 199 100M = 713984 -558 GCCTTGGGCTGTGACACATACCTGTGGTCCCAGCTACTCGGGAGGCCGAGGCAGGAGGATGCCTTGAATCCAGGAGGGACCCAGAGAGATCTGGAATATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:85 NH:i:1
-chr3_816442_816980_0:0:0_0:0:0_58 99 chr3 816442 199 100M = 816881 539 AAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTTTCCTTTTAAGACGGGGTTTCACCATGTTGGCTAGACTGGTCTCGAACTCCTGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:86 NH:i:1
+chr3_816442_816980_0:0:0_0:0:0_58 1123 chr3 816442 199 100M = 816881 539 AAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTTTCCTTTTAAGACGGGGTTTCACCATGTTGGCTAGACTGGTCTCGAACTCCTGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:86 NH:i:1
chr3_816442_816980_0:0:0_0:0:0_58 147 chr3 816881 199 100M = 816442 -539 ACATCCGCTCACCTGTCTTCCATAGAGTCTTCTCTTCTATACGGGGAGTTCCTTATTGTGATCTCCATGCGGTGATCTCTCAGCTCCCCCTCCTCAAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:86 NH:i:1
chr3_527813_528314_0:0:0_0:0:0_59 83 chr3 528215 199 100M = 527813 -502 AGTGAGACTCCATCTCAAAAAAAAAAAAAAAAAAAAGGGTTGGGCGCGGTGGCTCACATCGGTAATCCCAGCACCTTGGGAGGCCGAGGTGGGTGGATCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:87 NH:i:1
chr3_527813_528314_0:0:0_0:0:0_59 163 chr3 527813 199 100M = 528215 502 AGTAGTATCTGGCTTGTTCTAAACATGCAAATGCAGTGAGACGGGCTGCCTGGTACACACACATCGCCCCGGAATCATGACCTTAGGCCCCAGGGCCACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:87 NH:i:1
-chr3_870946_871420_0:0:0_0:0:0_5a 99 chr3 807736 199 100M = 808111 475 ATCCTACACACTTATGCCAAACATCCCCCAGAAAAGACTCACACTGTGCTGGCTTCTGCGCAGGCGCCGGCCTGGCCTCACCTGGCTTCCGGCCGTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:88 NH:i:1
+chr3_870946_871420_0:0:0_0:0:0_5a 1123 chr3 807736 199 100M = 808111 475 ATCCTACACACTTATGCCAAACATCCCCCAGAAAAGACTCACACTGTGCTGGCTTCTGCGCAGGCGCCGGCCTGGCCTCACCTGGCTTCCGGCCGTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:88 NH:i:1
chr3_870946_871420_0:0:0_0:0:0_5a 147 chr3 808111 199 100M = 807736 -475 AGGGAGAAAAAGCTCCAACCTGGAGGCCGTGAGCCCGAGGACCCAGGTGGACCTTCTAGACTGATCTTCACTGGTCAAAAAGGAAGGGGCGGGAGAAGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:88 NH:i:1
chr3_208099_208502_0:0:0_0:0:0_5b 83 chr3 208403 199 100M = 208099 -404 GGGAAGCCTGGCCCTCCCATAGCCTGCTGTGGACAATCAGGAAGCCCCAAGCTTGGGGGCAGCCTCGCCCGCAGCCACCGGGGACTCCTGGGTGTGTGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:89 NH:i:1
chr3_208099_208502_0:0:0_0:0:0_5b 163 chr3 208099 199 100M = 208403 404 GGTCTCTTCTTCCCTGTCGGCTCCGCCTCTTGTGTCTTCCAGCCTGACCTGCTGGCAGGATGAGGTGCCGTGCCTGCCCTCCAGGAGCCCGCAGCCCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:89 NH:i:1
-chr3_745700_746199_0:0:0_0:0:0_5c 99 chr3 745700 199 100M = 746100 500 CAGGTGATCCACCCGCCTTGGACTCCCAAAGCTGTGAGCCACCACACCCGGCCTGTTAAATTTTTTTTGAGACAGGGTTTTGCTCTGTTGCCCTGTGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:90 NH:i:1
+chr3_745700_746199_0:0:0_0:0:0_5c 1123 chr3 745700 199 100M = 746100 500 CAGGTGATCCACCCGCCTTGGACTCCCAAAGCTGTGAGCCACCACACCCGGCCTGTTAAATTTTTTTTGAGACAGGGTTTTGCTCTGTTGCCCTGTGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:90 NH:i:1
chr3_745700_746199_0:0:0_0:0:0_5c 147 chr3 746100 199 100M = 745700 -500 GACCTCTGCCTAGGAAAGCCAGGTATTGTCCAAGGTTTCTCCCCATGTGATAGTCTGAAATATGGCCTCATGGGAAGGGAAAGACCTGACCGTCCCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:90 NH:i:1
-chr3_836292_836840_0:0:0_0:0:0_5d 99 chr3 836292 199 100M = 836741 549 CAGACACTTCCCTCGGCCTTAAGATCTTGTAGTTTCTTAAATAAGCGAACACGTGTGCACCCCCCGACTCCGTTCAAGACGCCGCGCTCTGTGGGCGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:91 NH:i:1
+chr3_836292_836840_0:0:0_0:0:0_5d 1123 chr3 836292 199 100M = 836741 549 CAGACACTTCCCTCGGCCTTAAGATCTTGTAGTTTCTTAAATAAGCGAACACGTGTGCACCCCCCGACTCCGTTCAAGACGCCGCGCTCTGTGGGCGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:91 NH:i:1
chr3_836292_836840_0:0:0_0:0:0_5d 147 chr3 836741 199 100M = 836292 -549 CACAACCCTGCCCTGACCTCCGCATGCCAGTGTGAGAATACCTGGCAGACGCCAGGCTCCAAGACACCCCCGCACATGTGACCGTGAGAGAAGTGAAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:91 NH:i:1
chr3_536565_537034_0:0:0_0:0:0_5e 83 chr3 536935 199 100M = 536565 -470 GGTGAGCTCTGCAGGCTGGGCAGGATGTGGGTCTGCAGGAGGGGCCTGGGTTGCGGGAGCTGGAGTGGACAGCTGAGGCACCGCCTCGCCGGGATGGTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:92 NH:i:1
chr3_536565_537034_0:0:0_0:0:0_5e 163 chr3 536565 199 100M = 536935 470 AGGTTGGGGCACCATCCCCACTGACCAGGACCATCCCCAGCAGAGCCAAGTCCTGGTTGGAACTGGAGCTCTGAACACCACCAGGACACGCTGTGGTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:92 NH:i:1
-chr3_970312_970810_0:0:0_0:0:0_5f 99 chr3 970312 199 100M = 970711 499 CTGGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCCCCATGTTACCCAGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCCGCCCACCTTGGCCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:93 NH:i:1
+chr3_970312_970810_0:0:0_0:0:0_5f 1123 chr3 970312 199 100M = 970711 499 CTGGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCCCCATGTTACCCAGGCTGGTCTCCAACTCCTGGCCTCAAGTGATCCGCCCACCTTGGCCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:93 NH:i:1
chr3_970312_970810_0:0:0_0:0:0_5f 147 chr3 970711 199 100M = 970312 -499 TTTTTTTTTTGAGAGAGAGTCTCACTCTGTCACCCAGGCTGGAGCTCGCTGCAACCTCTGCCTCCCAGATGCAAGCAATTCTCGTGCGTCAGCCTTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:93 NH:i:1
-chr3_397635_398142_0:0:0_0:0:0_60 99 chr3 397635 199 100M = 398043 508 TCCCAGGAGCGCCGTGCGCACGCGCACCGCCCCGAGCCGGCGGCGCCTGCGCACTCGCGAGTCCGGCCTGGGCCGCCGGCCCGGCGCGGGCGCCATGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:94 NH:i:1
+chr3_397635_398142_0:0:0_0:0:0_60 1123 chr3 397635 199 100M = 398043 508 TCCCAGGAGCGCCGTGCGCACGCGCACCGCCCCGAGCCGGCGGCGCCTGCGCACTCGCGAGTCCGGCCTGGGCCGCCGGCCCGGCGCGGGCGCCATGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:94 NH:i:1
chr3_397635_398142_0:0:0_0:0:0_60 147 chr3 398043 199 100M = 397635 -508 GCGGCGCGCCCTGGAGCGGGAGCAGGCGCGGCACGGGGACCTGCTGCTGCTGCCCGCGCTGCGCGACGCCTACGAAAACCTCACGGCCAAGGTGCTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:94 NH:i:1
chr3_549449_549992_0:0:0_0:0:0_61 83 chr3 549893 199 100M = 549449 -544 GGGTTTCACCGCATTGGCCAGGATGGTCTCGATCTCCTGACCTCATGATCTACCCACCTTGGCCTCCCACAGTGTTGGGATTACAGGCGTCAGTGTGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:95 NH:i:1
chr3_549449_549992_0:0:0_0:0:0_61 163 chr3 549449 199 100M = 549893 544 CTGCCGTGAGGGAACAGGCCCTTGGACTTGGGACCTGGCCGGAGCACAGATCTCACAGGAGGTGGCGGTGCTGTGGCCGTCGCCCACTGCTCGGCTCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:95 NH:i:1
-chr3_282401_282895_0:0:0_0:0:0_62 99 chr3 282401 199 100M = 282796 495 CCGAAGGAACTGGGGACGTCCGTCCGCTCATCAAAGGCTTCAATCCTGTCGGTGTCCCCGGGCCCTCTCCTAGCTGGGCCAGCGCGCAGGGTGGGGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:96 NH:i:1
+chr3_282401_282895_0:0:0_0:0:0_62 1123 chr3 282401 199 100M = 282796 495 CCGAAGGAACTGGGGACGTCCGTCCGCTCATCAAAGGCTTCAATCCTGTCGGTGTCCCCGGGCCCTCTCCTAGCTGGGCCAGCGCGCAGGGTGGGGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:96 NH:i:1
chr3_282401_282895_0:0:0_0:0:0_62 147 chr3 282796 199 100M = 282401 -495 GCGTGAGGATGTTTTGTAAACAGTATTGGGGACAGCTGGGGAAGTCACATTTGTGTCTTCTCTTTAGAGGGACAGTGTTCGCCTGAGGGTCCATCCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:96 NH:i:1
chr3_935322_935892_0:0:0_0:0:0_63 83 chr3 935793 199 100M = 935322 -571 GCAGTGGCGCGATCTCGGCTCACCGCAAACTCTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCTTCCCAAGTAGCTGGGACTACAGGTGTGCGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:97 NH:i:1
chr3_935322_935892_0:0:0_0:0:0_63 163 chr3 935322 199 100M = 935793 571 TTACAGCTTACTGCAGCCTCAATCTCCCAGGCTTAAGGGATCCTCCCATGTAGCTGAGACTACAGGCATGAGCCACTATGTCCAGCTAATTTTTAAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:97 NH:i:1
-chr3_397709_398206_0:0:0_0:0:0_64 99 chr3 397709 199 100M = 398107 498 GCCGGCCCGGCGCGGGCGCCATGAAGCTGCTGCGGCGGGCGTGGCGGCGGCGGGCGGCGCTAGGCCTGGGCACGCTGGCGCTGTGCGGGGCGGCGCTGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:98 NH:i:1
+chr3_397709_398206_0:0:0_0:0:0_64 1123 chr3 397709 199 100M = 398107 498 GCCGGCCCGGCGCGGGCGCCATGAAGCTGCTGCGGCGGGCGTGGCGGCGGCGGGCGGCGCTAGGCCTGGGCACGCTGGCGCTGTGCGGGGCGGCGCTGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:98 NH:i:1
chr3_397709_398206_0:0:0_0:0:0_64 147 chr3 398107 199 100M = 397709 -498 GACGCCTACGAAAACCTCACGGCCAAGGTGCTGGCCATGCTGGCCTGGCTGGACGAGCACGTGGCCTTCGAGTTCGTGCTCAAGGCGGACGACGACTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:98 NH:i:1
chr3_410570_411113_0:0:0_0:0:0_65 83 chr3 411014 199 100M = 410570 -544 CCTCCCTTCCAGGGGGACTGGCCCGGACTGCTGAAGACCCCCCCACTGCCCCGCCCCTGCCAGCACTGCCAGCCTGGCCCTAGCGCCGGGAACCAGCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:99 NH:i:1
chr3_410570_411113_0:0:0_0:0:0_65 163 chr3 410570 199 100M = 411014 544 CCCACTCCTTCCCCCACTCATCTCCCTCCTCCCCCACTCCCCCTCCTCCCCCACTTTTCACCCTCCTCCCCCCACTCTTCCCCCTCCTCCCCGCACTCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:99 NH:i:1
-chr3_208172_208636_0:0:0_0:0:0_66 99 chr3 208172 199 100M = 208537 465 CTGCCCTCCAGGAGCCCGCAGCCCGAGGTCTGCCCCTGGTGCGGCAGGGGGGGTGGCTTGCTGCTGCCTGGCCTTGTGGTCCTCAGAGCAAGTGGACAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:100 NH:i:1
+chr3_208172_208636_0:0:0_0:0:0_66 1123 chr3 208172 199 100M = 208537 465 CTGCCCTCCAGGAGCCCGCAGCCCGAGGTCTGCCCCTGGTGCGGCAGGGGGGGTGGCTTGCTGCTGCCTGGCCTTGTGGTCCTCAGAGCAAGTGGACAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:100 NH:i:1
chr3_208172_208636_0:0:0_0:0:0_66 147 chr3 208537 199 100M = 208172 -465 TGCCGTGTCTGCTGTGCATCTGGCCCTTCTCCTGTGTTCTCTCTTCCTCCACCATCCCCTCCCTGGAAGAGGGACTGCTGCGTGGGGCTGGGGGCTTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:100 NH:i:1
chr3_541732_542211_0:0:0_0:0:0_67 83 chr3 542112 199 100M = 541732 -480 GAACCAGAAATAACCTGGGCTGGCAACCTTGGCCTTCAGGCTTCTGAGGCATTGACGTGCAGATATAAGTACCTCCCCTGAGTTAGGGACTGGTAGAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:101 NH:i:1
chr3_541732_542211_0:0:0_0:0:0_67 163 chr3 541732 199 100M = 542112 480 GCAGGAAGTGGAAAGGCCAGGTCATCTGCCGGGCCCAGCCTGGCTAAGCAGTGAGTGACAGGTAGGATAGGCCAGTGTGAGGACATAAAGTTCAGGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:101 NH:i:1
-chr3_982782_983260_0:0:0_0:0:0_68 99 chr3 982782 199 100M = 983161 479 CGCCTGTGCATCCTGTGGAGCTTTCCTTACTCCATCAGCTACTCTGTGTATTTTAAGGCACACCATGTTTTAATCAGAATCCCTCTGAGAGCCTGTGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:102 NH:i:1
+chr3_982782_983260_0:0:0_0:0:0_68 1123 chr3 982782 199 100M = 983161 479 CGCCTGTGCATCCTGTGGAGCTTTCCTTACTCCATCAGCTACTCTGTGTATTTTAAGGCACACCATGTTTTAATCAGAATCCCTCTGAGAGCCTGTGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:102 NH:i:1
chr3_982782_983260_0:0:0_0:0:0_68 147 chr3 983161 199 100M = 982782 -479 GTACTTCGGAGACAAAGCAGCTAGGCGCCTCTCCTAGTGAACGTCCCACCGCGGGGGCCATGACCTGCCTGTGGCTGCTGCGCACAGCAAACGGAACCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:102 NH:i:1
-chr3_374707_375245_0:0:0_0:0:0_69 99 chr3 374707 199 100M = 375146 539 CCCACTGTGCACCCCCGGGGCCGGTTCCCCACTTCCACGACACCGGCAGGGGGGAGCCATCCGCCAGGCACAGGAGGCCTGGACAGCCGTGGGCAGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:103 NH:i:1
+chr3_374707_375245_0:0:0_0:0:0_69 1123 chr3 374707 199 100M = 375146 539 CCCACTGTGCACCCCCGGGGCCGGTTCCCCACTTCCACGACACCGGCAGGGGGGAGCCATCCGCCAGGCACAGGAGGCCTGGACAGCCGTGGGCAGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:103 NH:i:1
chr3_374707_375245_0:0:0_0:0:0_69 147 chr3 375146 199 100M = 374707 -539 CCCAGGCGGGTCTCAAACTCCTGAGCTCAGACGATCGTCTGGCCTCAGCCTCCCAAAGTGCTAGGATTCTAGGCGTGAGCCACCGTGACCGGCCTCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:103 NH:i:1
-chr3_762226_762808_0:0:0_0:0:0_6a 99 chr3 762226 199 100M = 762709 583 AACAGAGAGAGAGAGAGAGAGCCAGAGACAGAGACAGAGAGATACATGGGAAGGAGAGGGAGAGAATGGAGAGAGACAGAGAGAGGGAGGGGAGGCTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:104 NH:i:1
+chr3_762226_762808_0:0:0_0:0:0_6a 1123 chr3 762226 199 100M = 762709 583 AACAGAGAGAGAGAGAGAGAGCCAGAGACAGAGACAGAGAGATACATGGGAAGGAGAGGGAGAGAATGGAGAGAGACAGAGAGAGGGAGGGGAGGCTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:104 NH:i:1
chr3_762226_762808_0:0:0_0:0:0_6a 147 chr3 762709 199 100M = 762226 -583 CCCGGCTGCTCCCCAAGGAGCCGTGCTGCCCCAGCTCCCTGGACGCTGCCTTTGCTGGTAGAAGACATGGAAACACCCTTGCAGGTGACCAGGGCAGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:104 NH:i:1
-chr3_388964_389451_0:0:0_0:0:0_6b 99 chr3 388964 199 100M = 389352 488 CACCTGTGAGGCCAGCCGGCAACGTCTGTGCCTCGGGGCCACCAAGAGGCATAGACACCACCAGATGAAGGCCCCGAAGACATAAGAGGCGCCCTCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:105 NH:i:1
+chr3_388964_389451_0:0:0_0:0:0_6b 1123 chr3 388964 199 100M = 389352 488 CACCTGTGAGGCCAGCCGGCAACGTCTGTGCCTCGGGGCCACCAAGAGGCATAGACACCACCAGATGAAGGCCCCGAAGACATAAGAGGCGCCCTCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:105 NH:i:1
chr3_388964_389451_0:0:0_0:0:0_6b 147 chr3 389352 199 100M = 388964 -488 CGTCTTCTCCATGATCCAGCGCTGCATCTCCTTGGCACTGATCTTCCGGTCAGTGTTCACATCCACCCTGCAAGACAGCAAATGGGCAGGTGGCCGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:105 NH:i:1
chr3_882627_883063_0:0:0_0:0:0_6c 83 chr3 882964 199 100M = 882627 -437 ACACGCCCTGTCACGGTAACTCTAGGAAAGAGTAAACCTTAATAGTTACAATAGCACACAGTTGTCACAGTGACCCTAGGAAGGACTGGCCAGGCCAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:106 NH:i:1
chr3_882627_883063_0:0:0_0:0:0_6c 163 chr3 882627 199 100M = 882964 437 GGGAGGCGGAGCTTGCAGTGAGCCGAGATGGCGCCACTGCACTCCAGCCTGGGTGAGCAAGACTCTTGAGACACCGTCTCAAAAAAAAAAAGAGTACACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:106 NH:i:1
-chr3_96343_96789_0:0:0_0:0:0_6d 99 chr3 96343 199 100M = 96690 447 GGGCATGGCAGCCGCCCTCGTTCACTGCCCAGGGCTGTGGCCCAGCGGGGCACTGACCCGAGACAGGTCTGCGCACGCCCTGCTATCCTGAGGCTGGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:107 NH:i:1
+chr3_96343_96789_0:0:0_0:0:0_6d 1123 chr3 96343 199 100M = 96690 447 GGGCATGGCAGCCGCCCTCGTTCACTGCCCAGGGCTGTGGCCCAGCGGGGCACTGACCCGAGACAGGTCTGCGCACGCCCTGCTATCCTGAGGCTGGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:107 NH:i:1
chr3_96343_96789_0:0:0_0:0:0_6d 147 chr3 96690 199 100M = 96343 -447 TGCAAGGTTGTCCCCCATCCCGGGAGGCAGACAGTGTTGCACCCAGTTGGGACTGAGGGACCCCAGACCCAGTCAGATGCAGCTCTCGGCAGCAGCTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:107 NH:i:1
chr3_458917_459400_0:0:0_0:0:0_6e 83 chr3 459301 199 100M = 458917 -484 GCACTGCGATGGCCAACGGGTCCCGCTGCTCTTGGTCCAGGGCGTGCTGGTCCGCGCCCCGCTTCAGGAACAGGCAAACCTGGCTGAGGGGCGAGGTCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:108 NH:i:1
chr3_458917_459400_0:0:0_0:0:0_6e 163 chr3 458917 199 100M = 459301 484 GGTCACACGCAGGGCCGCGGCCGGGTGGGCGCCAGGGACTTCGGGGCATGCGGGGCGTCGGGCCGGGCGGGGTGGCAGCTGCCCGGCCTGCCCGGCCCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:108 NH:i:1
@@ -225,67 +225,67 @@ chr3_745773_746314_0:0:0_0:0:0_6f 83 chr3 746215 199 100M = 745773 -542 GGGTCTGT
chr3_745773_746314_0:0:0_0:0:0_6f 163 chr3 745773 199 100M = 746215 542 AGGGTTTTGCTCTGTTGCCCTGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATAGGAGACTCCATTCTGTTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:109 NH:i:1
chr3_134744_135350_0:0:0_0:0:0_70 81 chr3 135251 199 100M = 134744 -607 GTATGTGTGCGCATATATGGATGCATGCATGTTGCCTGCATGTGCACGTATCGGGTGTGTGTGCGTGTACGTGTACACGAATATGCATGTATGCATATAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:110 NH:i:1
chr3_134744_135350_0:0:0_0:0:0_70 161 chr3 134744 199 100M = 135251 607 CAGGCAGCTCTGTGTGGGGAAGTGTGTCTGTGTGTGCGTGTGTGTGCACAAGTGTGTATGTGTGCATGTGTTTGCACGTGTATATGTGTGCACACATCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:110 NH:i:1
-chr3_472864_473330_0:0:0_0:0:0_71 99 chr3 472864 199 100M = 473231 467 GCCGACGCCCCCACGGACCCAGACGACCCTGGCCAGAGCTGGACTGCAGGGCCCGGGAGGGGCTCAGCCCAGTGGAGGTGCAGACACAGAGCGGGGGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:111 NH:i:1
+chr3_472864_473330_0:0:0_0:0:0_71 1123 chr3 472864 199 100M = 473231 467 GCCGACGCCCCCACGGACCCAGACGACCCTGGCCAGAGCTGGACTGCAGGGCCCGGGAGGGGCTCAGCCCAGTGGAGGTGCAGACACAGAGCGGGGGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:111 NH:i:1
chr3_472864_473330_0:0:0_0:0:0_71 147 chr3 473231 199 100M = 472864 -467 GAGTCCTTGACGCACTCCTCGAACTCCACGGTCATGGCTGCGGCGGCCGCGGCGCTCACTGGCACGAGGACCGCGGCGCCGAGCGGCAGCCGCGCCGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:111 NH:i:1
chr3_320681_321196_0:0:0_0:0:0_72 83 chr3 321097 199 100M = 320681 -516 ATGGAGTCACTTCACTTCGTCCTCACTTTGGGAGTGGAGCCCTTTGGGACTCCCAGGTTTTGTGGGGAGGGTCTCCTGGTGGATGATTTCCACATTGGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:112 NH:i:1
chr3_320681_321196_0:0:0_0:0:0_72 163 chr3 320681 199 100M = 321097 516 TTTTAAATTGTGGTTATATTTTCATTTCTAAAAGCTCATTGTCTTTTTAAATCTACTACATAATTTTTATAAGCTAAAGTTCTCTGCAAATATAAAACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:112 NH:i:1
chr3_248742_249234_0:0:0_0:0:0_73 83 chr3 249135 199 100M = 248742 -493 CCGTGGGGTCTGCTGGGCACTTGCAGACAGGGCTGCTTTAGGGACGGGGCACGTCCAGCCTTGATGGAGAACGGTCTGTCCCCTAAAGGCACGGTCACCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:113 NH:i:1
chr3_248742_249234_0:0:0_0:0:0_73 163 chr3 248742 199 100M = 249135 493 ACCTCAGGGGGCGACAAGCAGGCCGTAGTCCCAAGACCTCGAATGTGACCTTATTTAGAAGAAGGGGCTTTGCAGGGGGTTACGTCAGGGTCTCAGGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:113 NH:i:1
-chr3_510117_510562_0:0:0_0:0:0_74 99 chr3 510117 199 100M = 510463 446 GCCTTCCCAGGTAAGACCATGACCCCATGCCAGGCTGAGGCCGCCACAGTGAGAGGCTGTCACACCTGAAAGGCTGGAGCAACCCAGGGTGAGGCGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:114 NH:i:1
+chr3_510117_510562_0:0:0_0:0:0_74 1123 chr3 510117 199 100M = 510463 446 GCCTTCCCAGGTAAGACCATGACCCCATGCCAGGCTGAGGCCGCCACAGTGAGAGGCTGTCACACCTGAAAGGCTGGAGCAACCCAGGGTGAGGCGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:114 NH:i:1
chr3_510117_510562_0:0:0_0:0:0_74 147 chr3 510463 199 100M = 510117 -446 CCACCCAGCACCCTCTGACTCACTGGAGGGAGACTCGGCCACCCAGCACCCCAGTACAGCACCCGCCTGGCTCCACCGGAAGAGGGCCCTGCCCCCTCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:114 NH:i:1
-chr3_881153_881677_0:0:0_0:0:0_75 99 chr3 881153 199 100M = 881578 525 CTCGCTCTGTCACCCAGGCTGGAGTGTAGACACGCGATCTCGGATCACTGCAACCTCCACCTTTCCGGGTTCACACCATTCTCCTGCCTCAGCCTCCGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:115 NH:i:1
+chr3_881153_881677_0:0:0_0:0:0_75 1123 chr3 881153 199 100M = 881578 525 CTCGCTCTGTCACCCAGGCTGGAGTGTAGACACGCGATCTCGGATCACTGCAACCTCCACCTTTCCGGGTTCACACCATTCTCCTGCCTCAGCCTCCGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:115 NH:i:1
chr3_881153_881677_0:0:0_0:0:0_75 147 chr3 881578 199 100M = 881153 -525 TTATTTATTTAATTTAATTTTATTTATTTATTTATTTTATTTGAGATTGGAGTCTTGCTCTGTCGCCCAGGCTGGAGGGCAGTGGCGAGACTCTCGGCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:115 NH:i:1
chr3_102298_102764_0:0:0_0:0:0_76 83 chr3 102665 199 100M = 102298 -467 CATCCATCCTGTTCCCAGCCCGGGCCCTCCCGCTAAGCCGCACCTCCCTGGGCCCTGGGCTGTGAGGGACTCAGAGCAGGTCCTGCATCTGCCTCTGTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:116 NH:i:1
chr3_102298_102764_0:0:0_0:0:0_76 163 chr3 102298 199 100M = 102665 467 AGCAGAGCGGAGGGAGCCCCACAATGATTGAGATATTCTGAGCCAGCAGGCCCTCCCCTGTGCCTTCACACAGGGAGACCTCCTCAGGTACACGCGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:116 NH:i:1
-chr3_21932_22451_0:0:0_0:0:0_77 99 chr3 21932 199 100M = 22352 520 TTCTCTTCGAGGACGCCTCTGCTCTTCTGCCTCACACTCGGATTGGACCTTCTTCAGCACAGCCAGTGGCCGAGGCTTCCTCCCTGGGGCTCAGTGGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:117 NH:i:1
+chr3_21932_22451_0:0:0_0:0:0_77 1123 chr3 21932 199 100M = 22352 520 TTCTCTTCGAGGACGCCTCTGCTCTTCTGCCTCACACTCGGATTGGACCTTCTTCAGCACAGCCAGTGGCCGAGGCTTCCTCCCTGGGGCTCAGTGGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:117 NH:i:1
chr3_21932_22451_0:0:0_0:0:0_77 147 chr3 22352 199 100M = 21932 -520 TGAACTCTGGAGTCTGCTCACCTTCCTGCCTCAAGCCCCTCTCCCACGTGGTCCAGTCACCATTCTCCACAGAGACTACCCTAAAACCCAGCGACTCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:117 NH:i:1
-chr3_473635_474126_0:0:0_0:0:0_78 99 chr3 473635 199 100M = 474027 492 GGGGAGGGCGCGGGGAGGGCGCGGGCCCGCCCGCTCCAACCATCCCGGCCGTTGCGCAACTCAGGGCGTTTGCAAAAACCACGTCTGGGAGGACTTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:118 NH:i:1
+chr3_473635_474126_0:0:0_0:0:0_78 1123 chr3 473635 199 100M = 474027 492 GGGGAGGGCGCGGGGAGGGCGCGGGCCCGCCCGCTCCAACCATCCCGGCCGTTGCGCAACTCAGGGCGTTTGCAAAAACCACGTCTGGGAGGACTTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:118 NH:i:1
chr3_473635_474126_0:0:0_0:0:0_78 147 chr3 474027 199 100M = 473635 -492 CGCGTCCGCGCGCGCGCAGGATTCCTGCGCTGGAGGCCGCCTCTGACGCCACCGGCTGGGCTCCGCCATGAGTTCGGCGCCGGCCTCAGGCTCCGTGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:118 NH:i:1
-chr3_699269_699750_0:0:0_0:0:0_79 99 chr3 699269 199 100M = 699651 482 CATCCTGGAGCCCCTGGTTTGGTCCCTCCCCACCTCGGGGCCCTGGCGTGCATTTGGGGTGGGGGGTTCCCATGGCGGCCTCCCTCAGCTGCCTCTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:119 NH:i:1
+chr3_699269_699750_0:0:0_0:0:0_79 1123 chr3 699269 199 100M = 699651 482 CATCCTGGAGCCCCTGGTTTGGTCCCTCCCCACCTCGGGGCCCTGGCGTGCATTTGGGGTGGGGGGTTCCCATGGCGGCCTCCCTCAGCTGCCTCTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:119 NH:i:1
chr3_699269_699750_0:0:0_0:0:0_79 147 chr3 699651 199 100M = 699269 -482 CAGGATGTCTTGTGGTGCGGGTCGGCCGTTCTGCCCCCCAGGGCACCCCCTGTTGTAGGCACTGGCTAGGGAGGGGCAGGCCTCCTTCCTGCCCCTCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:119 NH:i:1
-chr3_819362_819858_0:0:0_0:0:0_7a 99 chr3 819362 199 100M = 819759 497 CTGCAATGAGCCAAGACTGCGCCACTGCACTCCAGCCTACATAACAAAGTAAGACTTGGTTAAAAAAAAAAAAAAAAAAAAAAAGGTAAGCTCTAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:120 NH:i:1
+chr3_819362_819858_0:0:0_0:0:0_7a 1123 chr3 819362 199 100M = 819759 497 CTGCAATGAGCCAAGACTGCGCCACTGCACTCCAGCCTACATAACAAAGTAAGACTTGGTTAAAAAAAAAAAAAAAAAAAAAAAGGTAAGCTCTAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:120 NH:i:1
chr3_819362_819858_0:0:0_0:0:0_7a 147 chr3 819759 199 100M = 819362 -497 CCATATACTCTGCAAGCTGAGATGGGAGACCGATTGAGCCCAGGAGTTCCAGGCTGAAGTGCGCTTGTGAACAGCCACTGCGCTGCAGCATGGGCGACAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:120 NH:i:1
chr3_323274_323711_0:0:0_0:0:0_7b 83 chr3 323612 199 100M = 323274 -438 CCTCCCCTTCTTCCCAGCACCGTCCCAGGGGGAAGGTAGCCCCCGGCGCAGGCGGGACCCTCGGCAGTCTGGCCCAAGAAACGGGGACCCTGTGGCTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:121 NH:i:1
chr3_323274_323711_0:0:0_0:0:0_7b 163 chr3 323274 199 100M = 323612 438 GGGGCCACCGGGCCTCCCCTCAACCCGTAGGGGGTCACGGGCCCCGCCCCGCTCCCGGTCAAGACCCTGACATTGCGGAGGGTCAGGAGGGGGGAGCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:121 NH:i:1
chr3_525283_525780_0:0:0_0:0:0_7c 83 chr3 525681 199 100M = 525283 -498 ATGCAGGCCAGAGGCTGTCGTCTGAGAGGGCCCTGACCTCTGAGAGGGCCACCTGGGGGCAGCTGACCCCCATGGGGCCCTGGGCAAGGAACAAAAGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:122 NH:i:1
chr3_525283_525780_0:0:0_0:0:0_7c 163 chr3 525283 199 100M = 525681 498 GGCCTTTCATTCTGGAAAAACAAGCCCCACCTGGCTGGCCTCGAGGCCTCTGCTGGCCTCTCCTCCTGTTTTTCCAACTGGTTTGTATTAGTGCTGGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:122 NH:i:1
-chr3_324456_324963_0:0:0_0:0:0_7d 99 chr3 324456 199 100M = 324864 508 CAGTGGGAACCACGTGATTGGTTTAAAAGCAGGTGAGGGGCCGGGCATGGTGGCTCATGCCTGTAATCCTGGCACTTTTGGAGGCCAAGGCAGGCGGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:123 NH:i:1
+chr3_324456_324963_0:0:0_0:0:0_7d 1123 chr3 324456 199 100M = 324864 508 CAGTGGGAACCACGTGATTGGTTTAAAAGCAGGTGAGGGGCCGGGCATGGTGGCTCATGCCTGTAATCCTGGCACTTTTGGAGGCCAAGGCAGGCGGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:123 NH:i:1
chr3_324456_324963_0:0:0_0:0:0_7d 147 chr3 324864 199 100M = 324456 -508 AGAGGCAGCTCCAGTTCCACAGCCAGACACAGTCATTTTCCCTCTACTCAGGAATTAGGCAGGATGGTGTGGGGCCTGAGTGTCACCATGAAGCCGGGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:123 NH:i:1
chr3_786062_786650_0:0:0_0:0:0_7e 83 chr3 786551 199 100M = 786062 -589 GCAGCCATTCAGTCAAAGCTGAAGCTGGTGGGGTCCTGGCCAAACAGGACGCAGCAGTGCACCACCTGGGGGCTGCCCTCAGAGGCCCCGCGAGGGGAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:124 NH:i:1
chr3_786062_786650_0:0:0_0:0:0_7e 163 chr3 786062 199 100M = 786551 589 AGCAGACTGCTGCGTCGGCCTCCCCGGCCTGTGCCATTCTGTGGCGAAGGTCCCTGTGTAGAGGAGGCTGGAGTGGGGGAGCTCAGCCCAGGCCATGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:124 NH:i:1
chr3_699762_700285_0:0:0_0:0:0_7f 83 chr3 700186 199 100M = 699762 -524 TAGCGTCCCCCTGGGGTCAAAGGTGACAGAAGAGGCGGAGGCTGGCGCTTTCTGGAGAATTTACTGACCACAGAGGGGTGGGCTTCACATGAGTGCCCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:125 NH:i:1
chr3_699762_700285_0:0:0_0:0:0_7f 163 chr3 699762 199 100M = 700186 524 AGATGCATTTTCCGTCTGGCTCACAGGGGGAGGGTGAGGCTTTGCACCCCAGCCCCTGCCCAGGCCACTGTGAGGGTGGGTGCTGGCTGAGCCCCCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:125 NH:i:1
-chr3_178025_178595_0:0:0_0:0:0_80 99 chr3 178025 199 100M = 178496 571 GTTGTAAGAGTTCTCTATATGTCCTGGATGCTATGCCCTCATCAGATAGATAATTTGCAAATATTTCTTCCCATTCTATGGATTGTCTTTTCACTTTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:126 NH:i:1
+chr3_178025_178595_0:0:0_0:0:0_80 1123 chr3 178025 199 100M = 178496 571 GTTGTAAGAGTTCTCTATATGTCCTGGATGCTATGCCCTCATCAGATAGATAATTTGCAAATATTTCTTCCCATTCTATGGATTGTCTTTTCACTTTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:126 NH:i:1
chr3_178025_178595_0:0:0_0:0:0_80 147 chr3 178496 199 100M = 178025 -571 ACAACAAAAAAACACCAATCTGAGCAAATACTGCCCTAAACCGAGTGTTGTTATCTCTGGGTAGTTTGGAGTTCTTGTTTCTCAATTAACCATGGGGATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:126 NH:i:1
chr3_493151_493655_0:0:0_0:0:0_81 83 chr3 493556 199 100M = 493151 -505 AGCCTGTGGGGTGCATCTGCGAACCAGGGTGAAGTCACAGGTCCCGGGGTGTGGAGGCTCCATCCTTTCTCCTTTCTGCCAGCCGATGTGTCCTCATCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:127 NH:i:1
chr3_493151_493655_0:0:0_0:0:0_81 163 chr3 493151 199 100M = 493556 505 CAGCGTGTCTACAACGTCTCCCAGAAGCTCTACGCCGAGCACTCCCTGCTGGACCTGCCCTAGGGGCGGGAAGCCAGGGCCGCACCGGCTTTCCTGCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:127 NH:i:1
chr3_926086_926623_0:0:0_0:0:0_82 83 chr3 926524 199 100M = 926086 -538 TGCCTGTGCTGGCACCTGGGAACGTGCGCCAGGCAGGTGTCCATGGGCCAGGATCCCCCTTCAAGGGCACAGCTTCACCTGGGCAAGGACCCAGCCTCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:128 NH:i:1
chr3_926086_926623_0:0:0_0:0:0_82 163 chr3 926086 199 100M = 926524 538 GTCCCCTCACACCCCAGTGACTTCTGTAGAGCAAATGTTTCCAGGCCAGGCACAGTGGCTTGCACCGGTAATCCCAGCACTTTGGGAGGCTGAGACGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:128 NH:i:1
-chr3_393113_393643_0:0:0_0:0:0_83 99 chr3 393113 199 100M = 393544 531 AAGTAGGTTCCAGGGCCACACCGCAGGGCACCCCGGCCGTGCCGTAAGGAGTAAGGCGCCCGTGCAGGACCTGGCTCCTTGTGGGTGAGTGGTCAGGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:129 NH:i:1
+chr3_393113_393643_0:0:0_0:0:0_83 1123 chr3 393113 199 100M = 393544 531 AAGTAGGTTCCAGGGCCACACCGCAGGGCACCCCGGCCGTGCCGTAAGGAGTAAGGCGCCCGTGCAGGACCTGGCTCCTTGTGGGTGAGTGGTCAGGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:129 NH:i:1
chr3_393113_393643_0:0:0_0:0:0_83 147 chr3 393544 199 100M = 393113 -531 AGGCCTCGTCCAACTGGGGCTGCAGCCTGCGGAGGCTGCCCGTGGCCGGCGTCTGCCCCTCAGCATGGAGGACGCTGACCACTGCAGTGCCGCGTGGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:129 NH:i:1
-chr3_547075_547588_0:0:0_0:0:0_84 99 chr3 547075 199 100M = 547489 514 CTTGGCACAGCTGACGCCTGGGAACTGTTCTGGGCACTCAGCGTGCTAAGCAGTGTCCTGAGCTCTCAGACCCCCGCCACGTTCCCCCTAGCCTGAGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:130 NH:i:1
+chr3_547075_547588_0:0:0_0:0:0_84 1123 chr3 547075 199 100M = 547489 514 CTTGGCACAGCTGACGCCTGGGAACTGTTCTGGGCACTCAGCGTGCTAAGCAGTGTCCTGAGCTCTCAGACCCCCGCCACGTTCCCCCTAGCCTGAGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:130 NH:i:1
chr3_547075_547588_0:0:0_0:0:0_84 147 chr3 547489 199 100M = 547075 -514 TCCCATCGGCACACTGCAGCAGGGAGATGAGAGACCTGCAGGACAGTCTTCCTGGTCCAGACAGAATGAAGCAAAGAAACCAGAAGGAAGGGGCAGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:130 NH:i:1
chr3_736310_736820_0:0:0_0:0:0_85 83 chr3 736721 199 100M = 736310 -511 AAAAAAAAAAAAGTTAGCTGGGAGTGGTGGCAGGCGCCTGTAGTCCCAGCTACTCGGAGGGTTTGAAGGAGAATCGCTTCAACCTGGGATGCAGAGGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:131 NH:i:1
chr3_736310_736820_0:0:0_0:0:0_85 163 chr3 736310 199 100M = 736721 511 CCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACGAGGTCAGGAGATCGAGACCACCCTGGCCAACACAGTCAAACCCCATCTCTACTAAAAATACAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:131 NH:i:1
chr3_61597_61994_0:0:0_0:0:0_86 83 chr3 61895 199 100M = 61597 -398 TTGCTTAGCAGCCATTATTCTATCCATATTGGCTTATCTGTTAACCATTTGTAAAGGTATAGGTTCTGGAAGCTTAACAACGGCCGCCGTCAAAAATGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:132 NH:i:1
chr3_61597_61994_0:0:0_0:0:0_86 163 chr3 61597 199 100M = 61895 398 AAATGCAAATCGTTCACAGTCTTGCTCAGCTAAAAGGGTAGTAAAGAAACAGTCTTTTAAATCTGTGACTATTAAAGGCCAATTTTTTGGAATTATAGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:132 NH:i:1
-chr3_765202_765617_0:0:0_0:0:0_87 99 chr3 765202 199 100M = 765518 416 GCTGCGGTTGCGCAGGACGCTGGCGCGCAGGGAGCGGCCGGCCGGGGCGTGCAGCACGCAGCCCGGAGCCTGGCAGCGGAAGCCGCGCGCGGGGCTGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:133 NH:i:1
+chr3_765202_765617_0:0:0_0:0:0_87 1123 chr3 765202 199 100M = 765518 416 GCTGCGGTTGCGCAGGACGCTGGCGCGCAGGGAGCGGCCGGCCGGGGCGTGCAGCACGCAGCCCGGAGCCTGGCAGCGGAAGCCGCGCGCGGGGCTGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:133 NH:i:1
chr3_765202_765617_0:0:0_0:0:0_87 147 chr3 765518 199 100M = 765202 -416 CCGTCCCCGCTGCCCGCTCCCCGCGATCCCCGGCGCGCCGCGCCCTCCGCCGCCGCCCGCTCCGACCCGCCCCCGCGCCCCGTGCGCGCTCGGACCCGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:133 NH:i:1
-chr3_85057_85521_0:0:0_0:0:0_88 99 chr3 85057 199 100M = 85422 465 ATCCCTCCACCCTCACTCATGTAGGGATGTGGGGCACACGGACTGGAGCCCAAGGCCTTCGTGCTGGGCCCTGGCTCTGCACTCGGCTCCCAGAGTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:134 NH:i:1
+chr3_85057_85521_0:0:0_0:0:0_88 1123 chr3 85057 199 100M = 85422 465 ATCCCTCCACCCTCACTCATGTAGGGATGTGGGGCACACGGACTGGAGCCCAAGGCCTTCGTGCTGGGCCCTGGCTCTGCACTCGGCTCCCAGAGTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:134 NH:i:1
chr3_85057_85521_0:0:0_0:0:0_88 147 chr3 85422 199 100M = 85057 -465 GCTGGGGCCTCTGGCTGCTGCAGCATTTACAGAGGAATTGTCTGGGATGTGCCGGGCCGAGCACTGTAGCTCTCGCCGGTGTCTGCGTCAGCACGCTAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:134 NH:i:1
chr3_813299_813766_0:0:0_0:0:0_89 83 chr3 813667 199 100M = 813299 -468 ATATGACAGCCCAAATTTCATCTACCAACCTCCACTTCTATCCCAAGCTGTACTCTCGTGGGACAAGACACACCAATGTCTGGCACAGGGTGTGGCATAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:135 NH:i:1
chr3_813299_813766_0:0:0_0:0:0_89 163 chr3 813299 199 100M = 813667 468 AAACTGCAGGTCCTGCAGCTGCCCTGTCCCAGTGGAACCTCCGTGCTTCCCAAGTGCAGAGTGGAAGCAGTAAAGGGAGCAGCTCTCAGGCCAGAAGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:135 NH:i:1
chr3_559334_559822_0:0:0_0:0:0_8a 83 chr3 559723 199 100M = 559334 -489 TGCTTTGTTTCCATGTGACACAGAAGGCACCCACAACGCTGGGAGTGAGGGACCGTGTCACGGCCATGGTCCTCAGTCAGTACATGCCGCAAAACTCGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:136 NH:i:1
chr3_559334_559822_0:0:0_0:0:0_8a 163 chr3 559334 199 100M = 559723 489 TGGGAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGAGTTCGAGAACAGCCTGACCAATATGGAGAAACCCAGTCTCTACTAAAAATACAAAATTAGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:136 NH:i:1
-chr3_179671_180098_0:0:0_0:0:0_8b 99 chr3 179671 199 100M = 179999 428 CCTCTGAGCATCCTGGTGAGGAATAACAAGGGCCGCAGCAGCACCTACGAGGTACGGCTGACGCAGACCGTGGCCCACCTGAAGCAGCAAGTGAGCGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:137 NH:i:1
+chr3_179671_180098_0:0:0_0:0:0_8b 1123 chr3 179671 199 100M = 179999 428 CCTCTGAGCATCCTGGTGAGGAATAACAAGGGCCGCAGCAGCACCTACGAGGTACGGCTGACGCAGACCGTGGCCCACCTGAAGCAGCAAGTGAGCGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:137 NH:i:1
chr3_179671_180098_0:0:0_0:0:0_8b 147 chr3 179999 199 100M = 179671 -428 TCTGTGTCTGTGCTTGCCCCTCCTCCAGCGTCCGCCTCAGACCCCTGGTGATGGGGCGGGAGAAGTCAGGAGGGAGAACCCAGGGTCCATCCTGGTGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:137 NH:i:1
-chr3_501335_501804_0:0:0_0:0:0_8c 99 chr3 501335 199 100M = 501705 470 CCCACCAGACACAAGGGGTTGGGAGGTCCGAGGCTCTCGCTGAGGGGCAGAGAGGGAGCGCCCCCAACACGGCTGCTCAGACACAGGTGCTGTCAGGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:138 NH:i:1
+chr3_501335_501804_0:0:0_0:0:0_8c 1123 chr3 501335 199 100M = 501705 470 CCCACCAGACACAAGGGGTTGGGAGGTCCGAGGCTCTCGCTGAGGGGCAGAGAGGGAGCGCCCCCAACACGGCTGCTCAGACACAGGTGCTGTCAGGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:138 NH:i:1
chr3_501335_501804_0:0:0_0:0:0_8c 147 chr3 501705 199 100M = 501335 -470 CGGGTGGCCCCCCCACCACTGTATAGGCCTTGGTCGTGGGGTGGGGCGGGGGGAGCCCCGGGGCGGTAGCCGAGGCCTGACTGCGTGGGCTGCTGCCACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:138 NH:i:1
chr3_498319_498817_0:0:0_0:0:0_8d 83 chr3 498718 199 100M = 498319 -499 TCCCGGTGCTCGCGGCAGTGCCAGGAGGGCCAGGTGCGCCGGGTCAAGGGGTTCCACTCCTGCTGCTACGACTGTGTGGACTGCGAGGCGGGCAGCTACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:139 NH:i:1
chr3_498319_498817_0:0:0_0:0:0_8d 163 chr3 498319 199 100M = 498718 499 CCACGCCTGAGCTGGAGGTGGCTGGCGGCTCAGCCCCGTCCCCCGCCCGCAGCTCCTGGAGAACATGTACAACCTGACCTTCCACGTGGGCGGGCTGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:139 NH:i:1
-chr3_552085_552643_0:0:0_0:0:0_8e 99 chr3 552085 199 100M = 552544 559 TTTAAAAGTCAGGATTCCCTACACAAGTTTTAAGCTGACGGGATTCAAGTTCTGAGTTTTCATACATAGCTTTAACTTGTATTAAACACATGTTTATTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:140 NH:i:1
+chr3_552085_552643_0:0:0_0:0:0_8e 1123 chr3 552085 199 100M = 552544 559 TTTAAAAGTCAGGATTCCCTACACAAGTTTTAAGCTGACGGGATTCAAGTTCTGAGTTTTCATACATAGCTTTAACTTGTATTAAACACATGTTTATTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:140 NH:i:1
chr3_552085_552643_0:0:0_0:0:0_8e 147 chr3 552544 199 100M = 552085 -559 CAAAAAGAATCCTGTTCTAGGACCACTTGCGCTGAGAGCACACCCGGGGGTCAAAGGGCAGCCACCGGGGGTCAAAGGGCAGCCATCAGGTACTCCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:140 NH:i:1
-chr3_620886_621456_0:0:0_0:0:0_8f 99 chr3 620886 199 100M = 621357 571 ATGGTGCTTCCCCTTCCCCTCCAGCAGGTCAGCCGGCGGCTCCTCAGTCGACCCCAGGACGTGCTGGAGGGTGTTGTGCTTAGTGTAAGTCGGTGTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:141 NH:i:1
+chr3_620886_621456_0:0:0_0:0:0_8f 1123 chr3 620886 199 100M = 621357 571 ATGGTGCTTCCCCTTCCCCTCCAGCAGGTCAGCCGGCGGCTCCTCAGTCGACCCCAGGACGTGCTGGAGGGTGTTGTGCTTAGTGTAAGTCGGTGTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:141 NH:i:1
chr3_620886_621456_0:0:0_0:0:0_8f 147 chr3 621357 199 100M = 620886 -571 GTTTGCCAAGGTGAGAGTGCCTAGCTGAACAGGTGGGCCAGGGGCCGCTGGGGTCTCACCTGCCTGCAGGTGTCTGGGGGGCTCAGCTGCCTGGGGAATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:141 NH:i:1
chr3_745774_746274_0:0:0_0:0:0_90 83 chr3 746175 199 100M = 745774 -501 AGGGAAAGACCTGACCGTCCCCCAGCCCGACACCCGTAAAGGGTCTGTGCTGAGGAGGATTAGTAAAAGAGGAAGGCATGCCTCTTGCAGTTGAGACAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:142 NH:i:1
chr3_745774_746274_0:0:0_0:0:0_90 163 chr3 745774 199 100M = 746175 501 GGGTTTTGCTCTGTTGCCCTGTGGGGAAAAGCAAGAGAGATCAGATTGTTACTGTGTCTGTGTAGAAAGAAGTAGACATAGGAGACTCCATTCTGTTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:142 NH:i:1
@@ -295,21 +295,21 @@ chr3_113387_113848_0:0:0_0:0:0_92 83 chr3 113749 199 100M = 113387 -462 CCCTCAGG
chr3_113387_113848_0:0:0_0:0:0_92 163 chr3 113387 199 100M = 113749 462 CCCAAACCTCCTGAATGGCTTAGAACCCCTCATCAGCCCCTCCAAGGGGGCCTCACGGGGCGCGTTGCCAGCAGTCAGGTTCCACCCCAGTCCCAGGTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:144 NH:i:1
chr3_592901_593434_0:0:0_0:0:0_93 83 chr3 593335 199 100M = 592901 -534 CTTCCTGGCTGGGCGCGCAGCGCTCCCGGCTTACGACCCCGGCCTCCCCCGCCCGCAGCCCCGCCCCCGCCACGGAGGGCAGGGGAGGGGAAGAGAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:145 NH:i:1
chr3_592901_593434_0:0:0_0:0:0_93 163 chr3 592901 199 100M = 593335 534 CCTCCATCACCCTGCCCAGCCCCCTCCCCTCCATCACCCTGCCCTGCCCCCAGCCCTCCATTACCCCGCCCCGCCCCCCACTGACGGCCCGCCCGGCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:145 NH:i:1
-chr3_903981_904478_0:0:0_0:0:0_94 99 chr3 903981 199 100M = 904379 498 AATGGAGGCACCACGAATTGCTGAAGAAGGAAACTTTATTCAGTCTATGATACCAGGACAGTTGTCCATGCTGCCAGGCAAAAAGAAAAACTGGATTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:146 NH:i:1
+chr3_903981_904478_0:0:0_0:0:0_94 1123 chr3 903981 199 100M = 904379 498 AATGGAGGCACCACGAATTGCTGAAGAAGGAAACTTTATTCAGTCTATGATACCAGGACAGTTGTCCATGCTGCCAGGCAAAAAGAAAAACTGGATTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:146 NH:i:1
chr3_903981_904478_0:0:0_0:0:0_94 147 chr3 904379 199 100M = 903981 -498 TCTCTAACCAAACCCCAAAAGCATGACCCATTAAAAAAAGGGTCAGGTGGACTAACTTGGCTAAATGAAGAATTCTGTTTCACCAAAGTACACTACAAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:146 NH:i:1
chr3_512017_512522_0:0:0_0:0:0_95 83 chr3 512423 199 100M = 512017 -506 GACAGTGCTGAGAGGTCTGCGGGTAGAACTCAGGGGAAGGGATGAAGGAAGTAATTGATGAGGGTCCTCCGTAGGCTCCTGAGGCCACCAGCCAGAGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:147 NH:i:1
chr3_512017_512522_0:0:0_0:0:0_95 163 chr3 512017 199 100M = 512423 506 GAAAAGTGGAGGACAGGCGGGGGACCTTCACCCACTGGGCCAAAGCCGTGCGTGGGGAGTGGGGTCCAGACCAGGGCACGTGGTTGTGGCCATGACACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:147 NH:i:1
-chr3_321145_321646_0:0:0_0:0:0_96 99 chr3 321145 199 100M = 321547 502 ACTCCCAGGTTTTGTGGGGAGGGTCTCCTGGTGGATGATTTCCACATTGGAACAAAAGTTCACATCTGCTGACCCCCGAGGAACGTGGCAGAGTGCTCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:148 NH:i:1
+chr3_321145_321646_0:0:0_0:0:0_96 1123 chr3 321145 199 100M = 321547 502 ACTCCCAGGTTTTGTGGGGAGGGTCTCCTGGTGGATGATTTCCACATTGGAACAAAAGTTCACATCTGCTGACCCCCGAGGAACGTGGCAGAGTGCTCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:148 NH:i:1
chr3_321145_321646_0:0:0_0:0:0_96 147 chr3 321547 199 100M = 321145 -502 GCACTTCCACCCAGGCTCTGCCGGGCGCCGTAACCAGCATGGAGTCCCCACTCGGGAAACTCCCACTCAAACTACACCTGCTCAGCTGGAATCTGCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:148 NH:i:1
-chr3_65329_65867_0:0:0_0:0:0_97 99 chr3 65329 199 100M = 65768 539 GGGTACCAGCCCAGGAAGTGGGACACTGGCTGAAACAGGATGGGATGCAGGGATGGGGTGGGATGGGAGACCAAACCAGTGTGAGGAATGAGGATAAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:149 NH:i:1
+chr3_65329_65867_0:0:0_0:0:0_97 1123 chr3 65329 199 100M = 65768 539 GGGTACCAGCCCAGGAAGTGGGACACTGGCTGAAACAGGATGGGATGCAGGGATGGGGTGGGATGGGAGACCAAACCAGTGTGAGGAATGAGGATAAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:149 NH:i:1
chr3_65329_65867_0:0:0_0:0:0_97 147 chr3 65768 199 100M = 65329 -539 CTTGAGGAGATCCAGCCTAACTCCCCACCTCTCCAACGCGGCTCATCGCAGCGACCTCCTTCTTCCAACGTGGCTCATCGCAGTGACCTCCTTCTCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:149 NH:i:1
-chr3_99293_99877_0:0:0_0:0:0_98 99 chr3 99293 199 100M = 99778 585 TGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:150 NH:i:1
+chr3_99293_99877_0:0:0_0:0:0_98 1123 chr3 99293 199 100M = 99778 585 TGAAACGGATCTGCTTAGGGGCAGCCTTGGATTAGCCCAGCTCCAGCCAGCCCAGGTCAGGGGAGCCGGGAGCTATTTAACGAGGTTTAGGGTAGGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:150 NH:i:1
chr3_99293_99877_0:0:0_0:0:0_98 147 chr3 99778 199 100M = 99293 -585 TGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:150 NH:i:1
-chr3_371026_371505_0:0:0_0:0:0_99 99 chr3 371026 199 100M = 371406 480 GAGGTTGTCCGTTCTCCACCCACAGTTGGCTCCAGGGAGCAGAGGGCTCCTGGCTGGGAGTCTGGACCCTGGGTTTATCCAGCTGTCTCTTTCTCCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:151 NH:i:1
+chr3_371026_371505_0:0:0_0:0:0_99 1123 chr3 371026 199 100M = 371406 480 GAGGTTGTCCGTTCTCCACCCACAGTTGGCTCCAGGGAGCAGAGGGCTCCTGGCTGGGAGTCTGGACCCTGGGTTTATCCAGCTGTCTCTTTCTCCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:151 NH:i:1
chr3_371026_371505_0:0:0_0:0:0_99 147 chr3 371406 199 100M = 371026 -480 TCCAGACCAGCCGTGTCAGCTGAACCCTCATCCCCAGGTGCAGGTCAGAGGCAGGCAGCAGGCCAGACGGTGGTGCAGGGGGGCCCTCAACTGGGCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:151 NH:i:1
chr3_393214_393742_0:0:0_0:0:0_9a 83 chr3 393643 199 100M = 393214 -529 TCCTCCAGGCTGTCCTAGGGACGGCCACACAGGCCCACTCTGGCCCTCTGAGCCCCCGGCGGACCCAGGGCATTCAAGGAGCGGCTCTGGGCTGCCAGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:152 NH:i:1
chr3_393214_393742_0:0:0_0:0:0_9a 163 chr3 393214 199 100M = 393643 529 CCCATCTTGGACCCCTCACAACAGGCTCTCGAGACTCAGGGATGGGGTCGGGGGAGAAAGCTCGGCACTAGGCACAGAGGCAGCAAGAGGGTCTCGGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:152 NH:i:1
-chr3_433166_433663_0:0:0_0:0:0_9b 99 chr3 433166 199 100M = 433564 498 CTGGGCACCCCACTCACCCCGGGGGAGGGTGCAGGAGCACCTTGCCAATGCCATGACTGGGGCACCAGGCTCGCCCAGCAGGGGCCGGCAGAGGCCTGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:153 NH:i:1
+chr3_433166_433663_0:0:0_0:0:0_9b 1123 chr3 433166 199 100M = 433564 498 CTGGGCACCCCACTCACCCCGGGGGAGGGTGCAGGAGCACCTTGCCAATGCCATGACTGGGGCACCAGGCTCGCCCAGCAGGGGCCGGCAGAGGCCTGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:153 NH:i:1
chr3_433166_433663_0:0:0_0:0:0_9b 147 chr3 433564 199 100M = 433166 -498 TCACAGCTCACAGGTCACCCTCGCTTGCCACCCGCCCAGACACCCAGAGGCCCTGCGGCTTCTCTCTTCCCGTCTGTCCCGCCACCACCTTCATTGACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:153 NH:i:1
chr3_748786_749221_0:0:0_0:0:0_9c 83 chr3 749122 199 100M = 748786 -436 GACATCCGTGGGGCACACTGTCTGGGTCTTCCTAGGTCATGTCCACGAGGTTCTGCGTCTCGGCTGGGCCCAGCACAGATGGTGGCAGCTACGTGAGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:154 NH:i:1
chr3_748786_749221_0:0:0_0:0:0_9c 163 chr3 748786 199 100M = 749122 436 CAACCATGACAATTTTATTCCCTACTGTTGGGGAAGTGTCTCGAGTCAGACCCTCCCCACCAAGTGCAAAGAGAACGCATCTCAGCTATCCCTTCTGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:154 NH:i:1
@@ -319,9 +319,9 @@ chr3_654501_654994_0:0:0_0:0:0_9e 83 chr3 654895 199 100M = 654501 -494 TGAGGAAC
chr3_654501_654994_0:0:0_0:0:0_9e 163 chr3 654501 199 100M = 654895 494 CGCCCCTGGCCCTGGTCAGGATTTTGAGTTTAGATCCATGAAAGTGTCGCCACGTCCCTGCTCCCTGCAGGAGGGAGGCCTGTGGGACTTTCTGCTCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:156 NH:i:1
chr3_768796_769298_0:0:0_0:0:0_9f 83 chr3 769199 199 100M = 768796 -503 GGGTAGGAGTGCCCCATTCTCACCAGATCCTGGGCTCGGAGCCCGGGCCAAATACTGACCTCAGTGGGATCAGAACCTGAAAGCCCCTCTCTGGGGTGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:157 NH:i:1
chr3_768796_769298_0:0:0_0:0:0_9f 163 chr3 768796 199 100M = 769199 503 AGGGAACCTTGCAGCTGGCACGGGGTCTGCTGGGACTTGAGCATGTGGGTCCCTGGGGACCTGGGGAATGGCTCCGATGCTTAGGAGCTGTAAGAATGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:157 NH:i:1
-chr3_983737_984255_0:0:0_0:0:0_a0 99 chr3 983737 199 100M = 984156 519 CCTCCTGCCTTGGTATCCCAAAGTACTGGGATTAACGGCATGAGCCACGGTGCCTGGCCGTCTCTCTTTTTACTTTTTTTTTTTTTTTTTTTGAGACGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:158 NH:i:1
+chr3_983737_984255_0:0:0_0:0:0_a0 1123 chr3 983737 199 100M = 984156 519 CCTCCTGCCTTGGTATCCCAAAGTACTGGGATTAACGGCATGAGCCACGGTGCCTGGCCGTCTCTCTTTTTACTTTTTTTTTTTTTTTTTTTGAGACGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:158 NH:i:1
chr3_983737_984255_0:0:0_0:0:0_a0 147 chr3 984156 199 100M = 983737 -519 ATGTGGGGAATGTCACGTGACATTTGTGCCAGCCCAAGAGGGAGCCCCCTGTGCAAGAAGGCCCTGGGGAATTCACAACGTGACAGGGTCCAAACCTCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:158 NH:i:1
-chr3_827120_827632_0:0:0_0:0:0_a1 99 chr3 827120 199 100M = 827533 513 CCCAGTTTGAGTTTCTGCTGGTCTTCACCGACAAACCGAGAAAGCCGCAGGCAATGGCCAACTTAGCTCCCCATGTCCTGCACCCCAGCAGGGCCATGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:159 NH:i:1
+chr3_827120_827632_0:0:0_0:0:0_a1 1123 chr3 827120 199 100M = 827533 513 CCCAGTTTGAGTTTCTGCTGGTCTTCACCGACAAACCGAGAAAGCCGCAGGCAATGGCCAACTTAGCTCCCCATGTCCTGCACCCCAGCAGGGCCATGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:159 NH:i:1
chr3_827120_827632_0:0:0_0:0:0_a1 147 chr3 827533 199 100M = 827120 -513 GGAGGCAAGGGGAGGCAGCAGGGGCGCTCAGGGCTATGGTCTCCTCAGCCCGTGGCCAGCAGCTCGGCTGAGCCTGGGTCTGGAGGCTGGGTGGGGTTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:159 NH:i:1
chr3_456110_456714_0:0:0_0:0:0_a2 97 chr3 456110 199 100M = 456615 605 CTGGGACCTCCAGGTGGCCTTCCGCCAAGTCAGCTGTGAGTCCCCAAAGTGGTGGGGTGGGGGTGTGGACAGCCAGGCAGACCCCACAGGTCCCACAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:160 NH:i:1
chr3_456110_456714_0:0:0_0:0:0_a2 145 chr3 456615 199 100M = 456110 -605 CTAGAGCGGGGGCAGCGACAGTGGCTGGCCCTGCACAGAAGGGGCACGGGGGCCTGGGCCCGCCCCTCACACCCGCACCCCACCCGCAGGTGCCGCAGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:160 NH:i:1
@@ -329,19 +329,19 @@ chr3_391635_392135_0:0:0_0:0:0_a3 83 chr3 392036 199 100M = 391635 -501 CCTACGCA
chr3_391635_392135_0:0:0_0:0:0_a3 163 chr3 391635 199 100M = 392036 501 GGAGGCGAGCCCAGAGCATCTCACCGTGCCAGGAAGCGGGAAGGAAAGCCTGGAGCATCTCGTGGTGCAGGGAAGCGGGAAGGAAGTGAAGCTCAAAAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:161 NH:i:1
chr3_999333_999745_0:0:0_0:0:0_a4 83 chr3 999646 199 100M = 999333 -413 AGGTTGCAGAGAGCTGAGATTGCACCACTGCACTTCGGCCTGGGCAACAGAGCAAGACTCTGTCTCAAAAGAAAAAAAAGAATATCCAATTGTTCCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:162 NH:i:1
chr3_999333_999745_0:0:0_0:0:0_a4 163 chr3 999333 199 100M = 999646 413 AAATGCTGGGATTACAGGTGAGCCACCTTGCCTGGCCTGAGTTAATTTCTGTACATGATGTCAAGTTCTCCTTTTGCGTAAGAATATCCAATTGACTAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:162 NH:i:1
-chr3_540564_541085_0:0:0_0:0:0_a5 99 chr3 540564 199 100M = 540986 522 GGCGCCCTCAGGCCTCCCGCTGACCCTTCCCAAGCCCGACCTCGACGCGGCTCAAATTGACCGTTCTGCGGCCGCCCTCGGGCACTTCCGGTCCGTCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:163 NH:i:1
+chr3_540564_541085_0:0:0_0:0:0_a5 1123 chr3 540564 199 100M = 540986 522 GGCGCCCTCAGGCCTCCCGCTGACCCTTCCCAAGCCCGACCTCGACGCGGCTCAAATTGACCGTTCTGCGGCCGCCCTCGGGCACTTCCGGTCCGTCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:163 NH:i:1
chr3_540564_541085_0:0:0_0:0:0_a5 147 chr3 540986 199 100M = 540564 -522 GACGTATCTGGCAACGGAAGTGGAGTTACGTGGAAGGCGGAAGCGTCTAGAGCTATTTCCCCCGTTTTTGCTAGAGACCAAGGACCCGGAATAAGCACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:163 NH:i:1
chr3_557911_558409_0:0:0_0:0:0_a6 83 chr3 558310 199 100M = 557911 -499 TCATAGCACCAGACAATTCAGTCAGCAAAAATATGACCCTTACAGACTTAACTCCATGGGAAAAAATTCACCAGAATCCCCAGAATCCCCCAAATACTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:164 NH:i:1
chr3_557911_558409_0:0:0_0:0:0_a6 163 chr3 557911 199 100M = 558310 499 CTCTTCAATTCTCACAGCTTTCCATAGCATTTTTTTTTAAGAGAAAGGTACTGCATCTTTATCATACAGGTACAGCAGTTTTAAAGGGACAAAATTCAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:164 NH:i:1
-chr3_912743_913191_0:0:0_0:0:0_a7 99 chr3 860133 199 100M = 860482 449 ATTTTCTACAAGTCTTGTTTATTGAAAGGATCTGAAAAGCGTAATAAGGCTTTCAATGACATTTAATACATTTTCAAGAAATTAATATGAAACATTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:165 NH:i:1
+chr3_912743_913191_0:0:0_0:0:0_a7 1123 chr3 860133 199 100M = 860482 449 ATTTTCTACAAGTCTTGTTTATTGAAAGGATCTGAAAAGCGTAATAAGGCTTTCAATGACATTTAATACATTTTCAAGAAATTAATATGAAACATTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:165 NH:i:1
chr3_912743_913191_0:0:0_0:0:0_a7 147 chr3 860482 199 100M = 860133 -449 ACAGATGCTGAGGTGCCCAGACGAGGGTGAGCAGGGAGACACATGCCTCGGAGAACGTGCCCAGGCTGGGCCAGGCGGCTGCGGGAAGCTCCTCACGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:165 NH:i:1
-chr3_969586_970028_0:0:0_0:0:0_a8 99 chr3 969586 199 100M = 969929 443 TCACCGCAACCTCCACTTCCCAGATTTGAGCAATTTTCCTGCCTCAGCCTCCAAAGTAGCTGGGATTACAGACACGCGCCACCACAGCCAGCTAATTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:166 NH:i:1
+chr3_969586_970028_0:0:0_0:0:0_a8 1123 chr3 969586 199 100M = 969929 443 TCACCGCAACCTCCACTTCCCAGATTTGAGCAATTTTCCTGCCTCAGCCTCCAAAGTAGCTGGGATTACAGACACGCGCCACCACAGCCAGCTAATTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:166 NH:i:1
chr3_969586_970028_0:0:0_0:0:0_a8 147 chr3 969929 199 100M = 969586 -443 TTCACCTTAGCAGGCAAGTAAGGTGGGTGAAAGGTAACTCCTGGGAACCACAGTTGCTTATGAGAGGTCCTAAGGAACCCTGTAGCACAGAATGCTGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:166 NH:i:1
-chr3_50456_50888_0:0:0_0:0:0_a9 99 chr3 50456 199 100M = 50789 433 TATTGAATAGGAGTGGTAAGAGAGGCCATCCTTGTCTTGTGCCAGTTTTCAAGGGGAATGCTTCCAGCTTTTGCTCATTCAGTATGATATTGGCTGTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:167 NH:i:1
+chr3_50456_50888_0:0:0_0:0:0_a9 1123 chr3 50456 199 100M = 50789 433 TATTGAATAGGAGTGGTAAGAGAGGCCATCCTTGTCTTGTGCCAGTTTTCAAGGGGAATGCTTCCAGCTTTTGCTCATTCAGTATGATATTGGCTGTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:167 NH:i:1
chr3_50456_50888_0:0:0_0:0:0_a9 147 chr3 50789 199 100M = 50456 -433 CAAGCTATCCTTAACAGTCTTAAGTTCAGTCTTTTTACATAATCCCACATTTCTTGAAGGTTTTGTTGTTCATTCTTTTTTGTTCTTTTTTCTCTATTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:167 NH:i:1
chr3_952139_952595_0:0:0_0:0:0_aa 83 chr3 952496 199 100M = 952139 -457 ACTCTATCTGTGGCTGCTCCCTCTGTGCCCTCCCTTGGCCTCCCCTCCACAGGGTCTCAGAAAAGGCAGGCAGGAGAAGGCCAATGCCAGGTAAACAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:168 NH:i:1
chr3_952139_952595_0:0:0_0:0:0_aa 163 chr3 952139 199 100M = 952496 457 CCTTAACTTCTTGGGTGGCTAGTCATGTGACAGACAATCTGTCCTTCAAACCACCCAGGGCCACAGTGAGCCTCTGCACTGTTACTTTAAAAACGTAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:168 NH:i:1
-chr3_282348_282911_0:0:0_0:0:0_ab 99 chr3 282348 199 100M = 282812 564 CTGTGCGTCCCGCGGCGCCCGCAGCTTTCGGCAAACATGTTTAAAGGAGCTCCCCGAAGGAACTGGGGACGTCCGTCCGCTCATCAAAGGCTTCAATCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:169 NH:i:1
+chr3_282348_282911_0:0:0_0:0:0_ab 1123 chr3 282348 199 100M = 282812 564 CTGTGCGTCCCGCGGCGCCCGCAGCTTTCGGCAAACATGTTTAAAGGAGCTCCCCGAAGGAACTGGGGACGTCCGTCCGCTCATCAAAGGCTTCAATCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:169 NH:i:1
chr3_282348_282911_0:0:0_0:0:0_ab 147 chr3 282812 199 100M = 282348 -564 TAAACAGTATTGGGGACAGCTGGGGAAGTCACATTTGTGTCTTCTCTTTAGAGGGACAGTGTTCGCCTGAGGGTCCATCCCAGGGTTCCCTCAGGGCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:169 NH:i:1
chr3_88833_89372_0:0:0_0:0:0_ac 83 chr3 89273 199 100M = 88833 -540 GCCGGATGCCCCGAGTCGGCCGTCACGCACCCCCCGCGGGAGCCCGCGCCGCCCGCCGCGCCGGGGCCGTTTAAATGGGCCAAGTTGTGGCGGCGGCGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:170 NH:i:1
chr3_88833_89372_0:0:0_0:0:0_ac 163 chr3 88833 199 100M = 89273 540 GAGAGTTACTTGACCTTTCTGTCTCAGCGTCTTCTGCAAAAGACTGAACAGGAACTCCGTGGCTCGGGCGAACTCACTCCTGTCAAGTGCATAGGAAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:170 NH:i:1
@@ -361,7 +361,7 @@ chr3_394308_394834_0:0:0_0:0:0_b3 83 chr3 394735 199 100M = 394308 -527 CTCAGGTG
chr3_394308_394834_0:0:0_0:0:0_b3 163 chr3 394308 199 100M = 394735 527 GAGGTCCTGGCTCCAATCCAATCCCCGGGCACCACGGAGGGCTCTGTGTCCCCAGGACGGCCGCAGGATGGGGACAAGCAGCTCACAGTCTGCAGAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:177 NH:i:1
chr3_680867_681380_0:0:0_0:0:0_b4 83 chr3 681281 199 100M = 680867 -514 TGCTGGGGATGGTGCATCGTCACGCCAGGTCTGACTAGGAAGGAGGATGGGGAGGCAGGGCTGGGGGCTTTGAGGTCGAGGCGTGGCCGTGGATTCCAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:178 NH:i:1
chr3_680867_681380_0:0:0_0:0:0_b4 163 chr3 680867 199 100M = 681281 514 CATCCCAGAAAATTTTGAGACCTGATGCTGAGGCCTCAGCAGCTCAGAGAATACGTACTTCTGTCCCACGACACCCAAACATCGACCTGTGTCAGTTCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:178 NH:i:1
-chr3_797594_798131_0:0:0_0:0:0_b5 99 chr3 797594 199 100M = 798032 538 CCCCGGAGGCTCTGCGGCCCGGGCCCCCGCGCCTTGCTGCCCCATGCAGCCCTGAGCCCCACAGCAAGTCTGCCATGGGCCGCGGGGCCCGTGTCCCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:179 NH:i:1
+chr3_797594_798131_0:0:0_0:0:0_b5 1123 chr3 797594 199 100M = 798032 538 CCCCGGAGGCTCTGCGGCCCGGGCCCCCGCGCCTTGCTGCCCCATGCAGCCCTGAGCCCCACAGCAAGTCTGCCATGGGCCGCGGGGCCCGTGTCCCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:179 NH:i:1
chr3_797594_798131_0:0:0_0:0:0_b5 147 chr3 798032 199 100M = 797594 -538 TCCACTGGGCCGCGTTCCCAGGGACGTGCAACAGGGCGCTCAGGTTAGGAGACCCGAAACCACAGGCAGACAGGACCCGCCACGCCCGCTCCCAGCCCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:179 NH:i:1
chr3_158109_158565_0:0:0_0:0:0_b6 83 chr3 158466 199 100M = 158109 -457 CTGGCAGGGTCCCCGTGTCCGTCATCTGGGGAGGGTCCCCGTGTCCGTCATCTGGGGAGGGTCCCCGTGTCCGTCATCTGGGGAGGGTCCCCGTGTCTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:180 NH:i:1
chr3_158109_158565_0:0:0_0:0:0_b6 163 chr3 158109 199 100M = 158466 457 CATTCGATATTTCCTCTAGCTTCCACTTTTCTAGTTTATTTCTGTTTTTGTTTAGAGATGAGATCTCGCTTTGTTGCCCAGGCTGGCCTTGGCATCTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:180 NH:i:1
@@ -371,11 +371,11 @@ chr3_194556_195129_0:0:0_0:0:0_b8 83 chr3 195030 199 100M = 194556 -574 ATTGTGTG
chr3_194556_195129_0:0:0_0:0:0_b8 163 chr3 194556 199 100M = 195030 574 GCCACCCTTTCCGAAGGAACCGAGCCCCAGCCCCTCTGGGGCCTGCCAATTGCCAGAGAGCCCCAGTGCTCCACCCACTCCAGGCCCCAACCCCCACCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:182 NH:i:1
chr3_222630_223209_0:0:0_0:0:0_b9 83 chr3 223110 199 100M = 222630 -580 GGGAGAATTGCTTGAACCCAGGAGACGGTGGTTGCAGTGAGCCGAGATCACGCCATTGCACTCCAGCCTCAGTGGCTGAAGGAGACTCCGTCTCAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:183 NH:i:1
chr3_222630_223209_0:0:0_0:0:0_b9 163 chr3 222630 199 100M = 223110 580 ACCTGTAGCTCCAGCTACTCCGGAGGCTGAGGCAGGAGGATCACTTGAATCCAGGAGGTTGAGGCAGCAGCAAGCTGTGATTGCACCACTGCACTCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:183 NH:i:1
-chr3_490901_491397_0:0:0_0:0:0_ba 99 chr3 490901 199 100M = 491298 497 GACCAAGAAGGAAACCCCAGAGCCTCAGAGACCATCTTCTCAGTGGACAAAATTAAGGCCCGAGGAGGGGAGGGGCGTGCTGGAAGTCTATGGGACTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:184 NH:i:1
+chr3_490901_491397_0:0:0_0:0:0_ba 1123 chr3 490901 199 100M = 491298 497 GACCAAGAAGGAAACCCCAGAGCCTCAGAGACCATCTTCTCAGTGGACAAAATTAAGGCCCGAGGAGGGGAGGGGCGTGCTGGAAGTCTATGGGACTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:184 NH:i:1
chr3_490901_491397_0:0:0_0:0:0_ba 147 chr3 491298 199 100M = 490901 -497 GTCCCTGCAGCCAGGGAGGCCAGGACAGGGCTCCCAGCACCAACCGGCCTAGGAACCCCCAGGCCCTCTTCCTGGTCGAGGTGGAATGCAGCTGACTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:184 NH:i:1
chr3_415288_415783_0:0:0_0:0:0_bb 83 chr3 415684 199 100M = 415288 -496 GGAAAGGGAGACTCCCCTTCCCAGTCTGCTAAGTAGCGGGTGTTTTCCCTTGACACTGACGCTACCGCTAGACCACGGTCTGCTTAGGTCACGGGTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:185 NH:i:1
chr3_415288_415783_0:0:0_0:0:0_bb 163 chr3 415288 199 100M = 415684 496 CCGAATGCCAGGCTGCGCTGTTATTTATTGGATACAAGACAAAGGGGCAGGATAAGGAGCGTGAGCCATCTCCAGTGATAGGTAAAGTCACGTGGGTCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:185 NH:i:1
-chr3_145424_145889_0:0:0_0:0:0_bc 99 chr3 145424 199 100M = 145790 466 CGGCTCAGAGGCAGGTGTAGACACAGCCATGTCCGTGTCAGGTTGCGGCTCAGAGGCAGGTGTAGACACAGCCATGTCCCTGTCAGATTGCGGCTCGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:186 NH:i:1
+chr3_145424_145889_0:0:0_0:0:0_bc 1123 chr3 145424 199 100M = 145790 466 CGGCTCAGAGGCAGGTGTAGACACAGCCATGTCCGTGTCAGGTTGCGGCTCAGAGGCAGGTGTAGACACAGCCATGTCCCTGTCAGATTGCGGCTCGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:186 NH:i:1
chr3_145424_145889_0:0:0_0:0:0_bc 147 chr3 145790 199 100M = 145424 -466 TGGCCCCAACTCCTCCTGCCCGGCTTTGGCCATCAGCTTTGCCGTCTCAGGACTGGCCGCAGGGAGCAGCGGGGAGCCCGTCTGGGCAGAAGCAGAGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:186 NH:i:1
chr3_473576_474018_0:0:0_0:0:0_bd 83 chr3 473919 199 100M = 473576 -443 GCCCGGCCCCGCCTCCGCTCCCCGACTCTGGCCGCCGTGGCGCGGTGCCACCGCGCAGGCGCAAGAGCGGCCGGGGCCGGGGGGGAGGGGGGGGGGGCGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:187 NH:i:1
chr3_473576_474018_0:0:0_0:0:0_bd 163 chr3 473576 199 100M = 473919 443 AAGTGAGGCGGGGCCTGCCACGGCCGTCTCCGGGGACAGTCGGTGGAAATGTGGGGCGCGGGGAGGGCGCGGGGAGGGCGCGGGCCCGCCCGCTCCAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:187 NH:i:1
@@ -385,11 +385,11 @@ chr3_301964_302453_0:0:0_0:0:0_bf 83 chr3 302354 199 100M = 301964 -490 CCCACACC
chr3_301964_302453_0:0:0_0:0:0_bf 163 chr3 301964 199 100M = 302354 490 AGATGCTGTGTTTGTGTGGGGTGCTGAGTGGCCAGCCTAAAACCTTCTGAGAAAGTAGGATCCAGCACAGGGAGCTTCAGCTTCTCTGATGGGAGGCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:189 NH:i:1
chr3_429781_430253_0:0:0_0:0:0_c0 83 chr3 430154 199 100M = 429781 -473 TCAACACCAGCCTGGGCAACACGGTGAAACCCTGTCTCTATTAAAAATACAAAAATTAGCCAGCCATGGTGGCACACACCTGTAATCCCAGCTACTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:190 NH:i:1
chr3_429781_430253_0:0:0_0:0:0_c0 163 chr3 429781 199 100M = 430154 473 TCCCAAACGAAACAGAAGAGCACAGAGAAGAAAGTAAAACCAGCTAGAAATGCCAGAAACCAGACATGGCTTCAACTGCCAACACCGGCCAACACCGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:190 NH:i:1
-chr3_199006_199474_0:0:0_0:0:0_c1 99 chr3 199006 199 100M = 199375 469 TGCTCCCCCGGCCCAGCCCCACCGCTGCCCTCGACTCAATCCCAGTTGAACGCCCGCAGCCTCAGTCCCACCCCCGGCCCAGCCCCAGCGCCCCCAGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:191 NH:i:1
+chr3_199006_199474_0:0:0_0:0:0_c1 1123 chr3 199006 199 100M = 199375 469 TGCTCCCCCGGCCCAGCCCCACCGCTGCCCTCGACTCAATCCCAGTTGAACGCCCGCAGCCTCAGTCCCACCCCCGGCCCAGCCCCAGCGCCCCCAGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:191 NH:i:1
chr3_199006_199474_0:0:0_0:0:0_c1 147 chr3 199375 199 100M = 199006 -469 CCGGGGTCCCTGGAGGCGGCTTCCTTTGTCTCTGCTCCCGCCCTCCCTCCGCGGCGTCTTCGCCCCTCACTCACCTCCCCAGCCCCGCGGGGACGAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:191 NH:i:1
-chr3_955012_955581_0:0:0_0:0:0_c2 99 chr3 955012 199 100M = 955482 570 TGGCCAGGCAGGACAGCACGTCCCACAGAGACCCACAGCTCCGTGATGGGGGCAGAAGCAGAAACCACACCCATGAAACATACACATCTTTCATCTGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:192 NH:i:1
+chr3_955012_955581_0:0:0_0:0:0_c2 1123 chr3 955012 199 100M = 955482 570 TGGCCAGGCAGGACAGCACGTCCCACAGAGACCCACAGCTCCGTGATGGGGGCAGAAGCAGAAACCACACCCATGAAACATACACATCTTTCATCTGCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:192 NH:i:1
chr3_955012_955581_0:0:0_0:0:0_c2 147 chr3 955482 199 100M = 955012 -570 CATGGCAGCAACACTGCAGACACGGGGGTGCTCGTGAATATGTGTGGGTGGGTTTGTTTTCTTGGGGACAGTGGCTGTGACACCCACATCAGCGTGGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:192 NH:i:1
-chr3_581992_582495_0:0:0_0:0:0_c3 99 chr3 581992 199 100M = 582396 504 AACTGGACGATACCTTTTTAACTTTAGTTAACATGTTTACACACAGAATTTTCTTTACAATTTACATTTTAAAACTTGCTTAAACTCTTAAAAACAATAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:193 NH:i:1
+chr3_581992_582495_0:0:0_0:0:0_c3 1123 chr3 581992 199 100M = 582396 504 AACTGGACGATACCTTTTTAACTTTAGTTAACATGTTTACACACAGAATTTTCTTTACAATTTACATTTTAAAACTTGCTTAAACTCTTAAAAACAATAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:193 NH:i:1
chr3_581992_582495_0:0:0_0:0:0_c3 147 chr3 582396 199 100M = 581992 -504 GGAGGCTGAGGCAGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGTGAGACTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:193 NH:i:1
chr3_863695_864255_0:0:0_0:0:0_c4 83 chr3 800936 199 100M = 800475 -561 GTGGGAGAGGGTGTGTGGAGGTTTGTGCTGCCCCACGTGGGCACCCGAAGATGCCCTGGCAAGTCACGGAGAAAACACAGCTCTTTCCTCCACAACAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:194 NH:i:1
chr3_863695_864255_0:0:0_0:0:0_c4 163 chr3 800475 199 100M = 800936 561 CACAGGGTGGGGGACCACCTTGCCAGAGTGGAGGCCCCCCAGGAGGTAGGAGCTCCCCTCCTGCCTGGGGAAGACACTGGCCCACATGGGGTCAGAGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:194 NH:i:1
@@ -397,43 +397,43 @@ chr3_272390_272862_0:0:0_0:0:0_c5 83 chr3 272763 199 100M = 272390 -473 TGGATGCA
chr3_272390_272862_0:0:0_0:0:0_c5 163 chr3 272390 199 100M = 272763 473 CAGCACTGCCAGCCCCCCTGCACCAGGTTCGAATTCTTGCCGGGCCTCAGCTGCCTCCACGCGGGGCAGGGCTCGGCAGCCTGCCATGCCCGAGGCCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:195 NH:i:1
chr3_435144_435638_0:0:0_0:0:0_c6 83 chr3 435539 199 100M = 435144 -495 CCTGCTGCCATGTGAGACATGCCTCTTCCCCTTCCGCCATGATTGTAAGTTTCCTGAGGCCTCCCTGGCCACACGGAACTGTGAGTCCATTAAACCCCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:196 NH:i:1
chr3_435144_435638_0:0:0_0:0:0_c6 163 chr3 435144 199 100M = 435539 495 CCTCAGGTAATCTGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGTCCCCGCGCCCAGCCATGGCCACACGATTTAATAACGTCTGTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:196 NH:i:1
-chr3_419539_419983_0:0:0_0:0:0_c7 99 chr3 419539 199 100M = 419884 445 CGGCCGTGCCCCACCTCGACGCGATGCACCTGCGGTACATCCCACCCAGGCGAAGTCACGGAACAGACGCAGAAGAGGGGAGACGGCCGAGACCCGGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:197 NH:i:1
+chr3_419539_419983_0:0:0_0:0:0_c7 1123 chr3 419539 199 100M = 419884 445 CGGCCGTGCCCCACCTCGACGCGATGCACCTGCGGTACATCCCACCCAGGCGAAGTCACGGAACAGACGCAGAAGAGGGGAGACGGCCGAGACCCGGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:197 NH:i:1
chr3_419539_419983_0:0:0_0:0:0_c7 147 chr3 419884 199 100M = 419539 -445 GAGAAAAGGCTCTGCAGGTACCCAAGGCCCCCCAGCCTGCAGCACCGAGGGGCGCCCAGGAGGCCCGGCCTGCGGAGGGCACCGGCAGCCCCCGCCCCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:197 NH:i:1
-chr3_962831_963298_0:0:0_0:0:0_c8 99 chr3 962831 199 100M = 963199 468 CCCAGCCTATTTCATGCTTTTCATATTTATTCACATTATAAGACCCCTAATTTCTTAAGAATGTTTAAAGTAAAGAATACAACAATGCATCATGGAGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:198 NH:i:1
+chr3_962831_963298_0:0:0_0:0:0_c8 1123 chr3 962831 199 100M = 963199 468 CCCAGCCTATTTCATGCTTTTCATATTTATTCACATTATAAGACCCCTAATTTCTTAAGAATGTTTAAAGTAAAGAATACAACAATGCATCATGGAGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:198 NH:i:1
chr3_962831_963298_0:0:0_0:0:0_c8 147 chr3 963199 199 100M = 962831 -468 TGTAATCAAGTAACTCAAAGGAAGGTAGGAAGGGAACAACCAAAAAACAAAAATGAAGGGACAAAAAGGTGGCAAAGTAGCAGAGTTACAATAAGAAATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:198 NH:i:1
chr3_727319_727786_0:0:0_0:0:0_c9 83 chr3 727687 199 100M = 727319 -468 GTGAGGATGTGCGTCACTCTCTGCTTTTCTTTCTCTTACTGAATATACAAGAACTTAAAAATAAACAGTGCTAACTGAAGAGTAATTGTCAAAAACAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:199 NH:i:1
chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCAGAGCCCGTGGCTGACAGTGTCCCAGCGCCACCCCTGCTGCCACTCCTTCCCCCCACCACACACCAAGAGCACACACACGCACGCGGGGTTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:199 NH:i:1
4_266361_266868_0:0:0_0:0:0_12e 83 4 266769 199 100M = 266361 -508 CTAAAAATACAAAAATTAGCTCAGCATGGTAGCACACGCTTGTAGTCCCAGCTACTTGGGAGGCTGAGACACAAGAATCGCTTGAACCCAGGAGGCAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:300 NH:i:1
4_266361_266868_0:0:0_0:0:0_12e 163 4 266361 199 100M = 266769 508 TGGCTGTCATATCTGCATACATTATAAACCCAAGAGATGTTTTAATTTTTCCCCAGATATTTACTAATTTTGATGTTCATCATTCATTCTTAAAGATCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:300 NH:i:1
-4_213597_214086_0:0:0_0:0:0_12f 99 4 213597 199 100M = 213987 490 AGCCCGTTGCCAAGACCACGGCAGCCCCCACCACACGTCGGCCCCCCACCACTGCCCCCAGCCGTGTGCCCGGACGTCGGCCCCCGGCCCCCCAGCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:301 NH:i:1
+4_213597_214086_0:0:0_0:0:0_12f 1123 4 213597 199 100M = 213987 490 AGCCCGTTGCCAAGACCACGGCAGCCCCCACCACACGTCGGCCCCCCACCACTGCCCCCAGCCGTGTGCCCGGACGTCGGCCCCCGGCCCCCCAGCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:301 NH:i:1
4_213597_214086_0:0:0_0:0:0_12f 147 4 213987 199 100M = 213597 -490 GAGGCAGGCGGATCACCTGAGGTCGGGAGTTCGAGACCAGCCTGACCAACATGGAGACACTCTGTCTCTACTAAAAATACAAAATTAGCCGGGCGTGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:301 NH:i:1
-4_57766_58317_0:0:0_0:0:0_130 99 4 57766 199 100M = 58218 552 GGTGCATGCCTGTAATTACAGCTACTTGGGAGGCTGAGGTGGGAGGATCCCTTGAGGGCAGGAGGTCAAGGTTGCAGTGAGCTGTGATCATACCACTGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:302 NH:i:1
+4_57766_58317_0:0:0_0:0:0_130 1123 4 57766 199 100M = 58218 552 GGTGCATGCCTGTAATTACAGCTACTTGGGAGGCTGAGGTGGGAGGATCCCTTGAGGGCAGGAGGTCAAGGTTGCAGTGAGCTGTGATCATACCACTGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:302 NH:i:1
4_57766_58317_0:0:0_0:0:0_130 147 4 58218 199 100M = 57766 -552 CTGTGTTTATAGCTTTATAACCTTCATGCCAAACCCTAGCACCTTAAAATATCTAGCAAATGTAAATATAAAACACAGTCAAAAATGTATGCTGACAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:302 NH:i:1
-4_386583_387005_0:0:0_0:0:0_131 99 4 386583 199 100M = 386906 423 GGACCAAACTCCAGATAAGAGTACGTGAACTGGGAAAAGAGCAGGCAGAGCCAACAGGCTCAAAAGTCTCCGCGTCATTCTAGAAGGTCAAGGCCAGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:303 NH:i:1
+4_386583_387005_0:0:0_0:0:0_131 1123 4 386583 199 100M = 386906 423 GGACCAAACTCCAGATAAGAGTACGTGAACTGGGAAAAGAGCAGGCAGAGCCAACAGGCTCAAAAGTCTCCGCGTCATTCTAGAAGGTCAAGGCCAGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:303 NH:i:1
4_386583_387005_0:0:0_0:0:0_131 147 4 386906 199 100M = 386583 -423 AGGAGGCCGCCCCCATGGGGGCTGGTCACATGATATGGGCCGTTCCTGGCAGGAACGTATCTTACACCTTTATTTTTCTGTCCTCCAAGGAGGATAGATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:303 NH:i:1
-4_881083_881581_0:0:0_0:0:0_132 99 4 881083 199 100M = 881482 499 CATCATTTAAAAAATTACATTAATTCTCCAACTTCAGGCATCTTTTTTTTTTTTTTTTTTTAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGTAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:304 NH:i:1
+4_881083_881581_0:0:0_0:0:0_132 1123 4 881083 199 100M = 881482 499 CATCATTTAAAAAATTACATTAATTCTCCAACTTCAGGCATCTTTTTTTTTTTTTTTTTTTAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGTAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:304 NH:i:1
4_881083_881581_0:0:0_0:0:0_132 147 4 881482 199 100M = 881083 -499 CGAAATGCCTCTCAGAGGACCTGCAGGGGGTGAGGGGCAGGTTCATTAGCTTGACTGTGGTGACAGTTTCAGGGGCATGTAAAAATACATCACATCTTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:304 NH:i:1
-4_710464_710975_0:0:0_0:0:0_133 99 4 710464 199 100M = 710876 512 AGTTAAGGAACGTCAGAGAAAAGGCATCTGTGTATGAAGCCTGGAAGCGCCTGTGTCCTGGACACCCAGCCCCTTCCCTGCCCCACAGCAGAGGCAGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:305 NH:i:1
+4_710464_710975_0:0:0_0:0:0_133 1123 4 710464 199 100M = 710876 512 AGTTAAGGAACGTCAGAGAAAAGGCATCTGTGTATGAAGCCTGGAAGCGCCTGTGTCCTGGACACCCAGCCCCTTCCCTGCCCCACAGCAGAGGCAGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:305 NH:i:1
4_710464_710975_0:0:0_0:0:0_133 147 4 710876 199 100M = 710464 -512 GCCCTTCAGCAGAGACAAAGCAAACCCGGAAACCCAGTCCCCAGTCTAGCTCAGCTACAACAGAGTCAAGAACATCGGAAAACCCAAAGAAAGGAAGCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:305 NH:i:1
-4_879832_880383_0:0:0_0:0:0_134 99 4 879832 199 100M = 880284 552 GCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:306 NH:i:1
+4_879832_880383_0:0:0_0:0:0_134 1123 4 879832 199 100M = 880284 552 GCCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTGGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAGGCTTTTAGCTAGAGTTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:306 NH:i:1
4_879832_880383_0:0:0_0:0:0_134 147 4 880284 199 100M = 879832 -552 TACAGGTGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACTGGGTTTCACCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:306 NH:i:1
4_36219_36745_0:0:0_0:0:0_135 83 4 36646 199 100M = 36219 -527 ACTGATTTTTAGATTTGTATTGGTAGGATAATTCCACTTGGTTATATTGTCTAACTTTTTTCTAATTTTCTTTAATTTTTATTAGAGATGAGGCCTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:307 NH:i:1
4_36219_36745_0:0:0_0:0:0_135 163 4 36219 199 100M = 36646 527 GTACTCCTGTCTTCTTGGCATTATTGATAATTCGATTCTAATTGCTAATAAGTCAGAAAAATTAGGAACACCAAATTTCAGTCGTCTCAAAAGCACTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:307 NH:i:1
-4_642873_643354_0:0:0_0:0:0_136 99 4 642873 199 100M = 643255 482 TACTGGTGGGTAGGGCCAGGGGCGTGTACATGGGCAGCAGTGGGGCCAGGGCCGAGCTTGGGCGCCTCATTTCACAGAGGGAAACAAGGGGAGGTGAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:308 NH:i:1
+4_642873_643354_0:0:0_0:0:0_136 1123 4 642873 199 100M = 643255 482 TACTGGTGGGTAGGGCCAGGGGCGTGTACATGGGCAGCAGTGGGGCCAGGGCCGAGCTTGGGCGCCTCATTTCACAGAGGGAAACAAGGGGAGGTGAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:308 NH:i:1
4_642873_643354_0:0:0_0:0:0_136 147 4 643255 199 100M = 642873 -482 AAGCGATTCATCTGCCTCAGCCTCTGGAGTAGCTGGGATTACAGGCACGCGCCACCACGCCTGGCTAATGTTGTATTTTAGTAGAGACGGGGTTTCTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:308 NH:i:1
-4_689732_690243_0:0:0_0:0:0_137 99 4 621684 199 100M = 622096 512 CTGCACTCAGGCATGGACTACGCCATCATGACAGGCGGGGACGTGGCCCCCATGGGGCGGGAAGGCGTGACCGCCATGCACAAGCTCTTTGACTGGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:309 NH:i:1
+4_689732_690243_0:0:0_0:0:0_137 1123 4 621684 199 100M = 622096 512 CTGCACTCAGGCATGGACTACGCCATCATGACAGGCGGGGACGTGGCCCCCATGGGGCGGGAAGGCGTGACCGCCATGCACAAGCTCTTTGACTGGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:309 NH:i:1
4_689732_690243_0:0:0_0:0:0_137 147 4 622096 199 100M = 621684 -512 ACAGCCGCCCTCCCCCCAACACGCCTGCAGGTGGGCGTGGGCACTGGTTGCCTTTTCTAGAACCATTTGAAAGTTAGCTGAAGACAGCATGGCACACTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:309 NH:i:1
4_883909_884406_0:0:0_0:0:0_138 83 4 884307 199 100M = 883909 -498 AGAGTCCTTTTCATCACCCATTTGAGTTAAAACACTGCAAAAAGAAAAATAATTCAGCCTACATCAGGACACAGCAAGCTATGGTGCTGAACACTTGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:310 NH:i:1
4_883909_884406_0:0:0_0:0:0_138 163 4 883909 199 100M = 884307 498 TATTAAAAATACTTAGAGATAGTATTATGAATATACTAATAATGAACCGAGAAAAATTAGTCCAGTTTTGCTAATGACTTAACATTCAACGTATTTTATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:310 NH:i:1
-4_655611_656093_0:0:0_0:0:0_139 99 4 655611 199 100M = 655994 483 CAAAAAGTCTCCCCGAGGGGGCTGAGGAGCCCCCGTTGCCCTCGGGGCATCTCAGCTGGCAGCCCCAGCGTTTCCTTCCCCATCCCTGTCCTACAGATTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:311 NH:i:1
+4_655611_656093_0:0:0_0:0:0_139 1123 4 655611 199 100M = 655994 483 CAAAAAGTCTCCCCGAGGGGGCTGAGGAGCCCCCGTTGCCCTCGGGGCATCTCAGCTGGCAGCCCCAGCGTTTCCTTCCCCATCCCTGTCCTACAGATTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:311 NH:i:1
4_655611_656093_0:0:0_0:0:0_139 147 4 655994 199 100M = 655611 -483 TTGGGAACTCCTTCCCCAGGCGCCTGAAGCTGGCCCAGTTTGACTACGGGAGGAAGTGCTCGGAGGTCGCTCGGCTGACGGAGGGCATGTCGGGCCGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:311 NH:i:1
4_709060_709513_0:0:0_0:0:0_13a 83 4 709414 199 100M = 709060 -454 TCTCCTGTTCTCTGGAATTCAGATCTGATTGGGACAAGGTGACACAGACGTCAGAGGCTCCAAAACCAGGGAACAGGCAGTCAATGTGGCTGAGTCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:312 NH:i:1
4_709060_709513_0:0:0_0:0:0_13a 163 4 709060 199 100M = 709414 454 GGCCTCGGCCACTCCCCACTGTCACGGCCTCGGCCACTCCCCTCTGTCACGGCCTCGGCCACTCCCCACTGTCACGGCCTCGGCCACTCCCCACTGTCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:312 NH:i:1
-4_132268_132693_0:0:0_0:0:0_13b 99 4 132268 199 100M = 132594 426 CACGGTGGCTGTGGTCTGGGAGCGTGGCTCTGCCCGCGCGTGTGTGCCGTGTGTCCGTGCAGCTCAGGGTCTTCCCCTCGCCCCCGGGGCGTTCAGACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:313 NH:i:1
+4_132268_132693_0:0:0_0:0:0_13b 1123 4 132268 199 100M = 132594 426 CACGGTGGCTGTGGTCTGGGAGCGTGGCTCTGCCCGCGCGTGTGTGCCGTGTGTCCGTGCAGCTCAGGGTCTTCCCCTCGCCCCCGGGGCGTTCAGACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:313 NH:i:1
4_132268_132693_0:0:0_0:0:0_13b 147 4 132594 199 100M = 132268 -426 AGAGTAAGGGGCCTCAGATGAGCAGCGAGCCAGCCGGTCACCTACACCGGCCTTTCTCCCTGGGTGTCCCGCAGGAAGTTGCCGCTGGGTATGGCCGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:313 NH:i:1
4_655153_655632_0:0:0_0:0:0_13c 83 4 655533 199 100M = 655153 -480 AGGCAGGCGGGTGACCAGGGCTGTGGCTGCGTTCTCCCCATGTTTCCTGTGCTCACAAGCTGCCGCTTTAGATTCTCCCAAAAAGTCTCCCCGAGGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:314 NH:i:1
4_655153_655632_0:0:0_0:0:0_13c 163 4 655153 199 100M = 655533 480 TGTCACACGGAGGATCAAGTCCTGCTGGTCGGCCGTGGCTGACTCTTCAGGCACGTTGGGCTCCTGGGTCAGCTGCTGCCGTTCGACGCTCCCTGGAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:314 NH:i:1
-4_454901_455452_0:0:0_0:0:0_13d 99 4 454901 199 100M = 455353 552 TCCCGTGTCTCTGCCCCGTCCCCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTCTGCTCCGTCCCGTGTCCCTGCTCCGTCCCGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:315 NH:i:1
+4_454901_455452_0:0:0_0:0:0_13d 1123 4 454901 199 100M = 455353 552 TCCCGTGTCTCTGCCCCGTCCCCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTCTGCTCCGTCCCGTGTCCCTGCTCCGTCCCGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:315 NH:i:1
4_454901_455452_0:0:0_0:0:0_13d 147 4 455353 199 100M = 454901 -552 CTCCGTCCCGTGTCTCTGCCCCGTCCCCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTCTGCTCCGTCCCGTGTCCCTGCTCCGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:315 NH:i:1
4_489331_489877_0:0:0_0:0:0_13e 83 4 489778 199 100M = 489331 -547 GCGGCCCCATCTGCGACGTCCCCTCACGGCCGCCTGAACCTCGGGGCTCGCGTTCTCGCGTCACTGCCGGAGGCCCGGGGTGCAGGGACTAGGCCGCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:316 NH:i:1
4_489331_489877_0:0:0_0:0:0_13e 163 4 489331 199 100M = 489778 547 GAGGGGCTTGCGGGGTTAAGGGGCTTGGGGGCTGGGGGACTGGGGGGCTGAGGGGCTGGGGGGCTGAGGGTCTGGGGGTCTGCTGGGCTGAGGGGCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:316 NH:i:1
@@ -441,11 +441,11 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_257880_258366_0:0:0_0:0:0_13f 163 4 257880 199 100M = 258267 487 CTCTCTCCCCCCTGCACTTCCAGCCACAGGACCTGTTACCATGGCCCTGCCTTCCTAAGGGAGGAAACGCCTGCCAGGCGCACAAATCCTCAAGTGGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:317 NH:i:1
4_5243_5649_0:0:0_0:0:0_140 83 4 5550 199 100M = 5243 -407 ATGGAATTGTATTATTGGACGCATAACATACAGAAATGTAATATACTTGACAATAACAGTACAAAGGAGGCAAATGGGAAAAACTGCACTGGAGTAAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:318 NH:i:1
4_5243_5649_0:0:0_0:0:0_140 163 4 5243 199 100M = 5550 407 CATATTCAAAGTGCTCAGAGGAAGAAACTATTAATCCACAATCCTATATCCAGCAAGGCCATCTGTCAGACATGAAGGTGAAATAAATTTCAAGATAAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:318 NH:i:1
-4_838114_838559_0:0:0_0:0:0_141 99 4 838114 199 100M = 838460 446 AGAGAGCCCCAGCTGAGAGGCGACCTGCCTTTCGGCTTCTCTTCGGAGCCAGGAACCAGCTCTTCCAGTGCTGGGGTTTTCACCGAGGACGACATGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:319 NH:i:1
+4_838114_838559_0:0:0_0:0:0_141 1123 4 838114 199 100M = 838460 446 AGAGAGCCCCAGCTGAGAGGCGACCTGCCTTTCGGCTTCTCTTCGGAGCCAGGAACCAGCTCTTCCAGTGCTGGGGTTTTCACCGAGGACGACATGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:319 NH:i:1
4_838114_838559_0:0:0_0:0:0_141 147 4 838460 199 100M = 838114 -446 GGCACGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCGAGGCAGGTGGATCACCTTAGGTCAGGAGTTCGAGACCACCCTGGCCAACATGACGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:319 NH:i:1
4_161003_161509_0:0:0_0:0:0_142 83 4 161410 199 100M = 161003 -507 GCAGACAAGCCCAGACACACACGGCCCAGACACCCCAGAGGCAAAGGAATTCAGCAAACATTTATTGACCCTTGGTCCTCATCAAGGAGGCAGTGAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:320 NH:i:1
4_161003_161509_0:0:0_0:0:0_142 163 4 161003 199 100M = 161410 507 CTGGGCAGAACCCTGCTGATCCTGCAGATCACTGGACCCTCAGGACACTGAGGTGTGAATGCCAGAAGGGGACAGTGGGACATCAGGGAGGCTGCCACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:320 NH:i:1
-4_463442_463916_0:0:0_0:0:0_143 99 4 463442 199 100M = 463817 475 CCCTGGGCTGGGCCTCCTCACTTGGTGGGTGACAGCACCTCGAAGCAGAACCTCCGCTCGATGTCCTCACACGGCTTCACAGAGCACAGGCGGAGGTCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:321 NH:i:1
+4_463442_463916_0:0:0_0:0:0_143 1123 4 463442 199 100M = 463817 475 CCCTGGGCTGGGCCTCCTCACTTGGTGGGTGACAGCACCTCGAAGCAGAACCTCCGCTCGATGTCCTCACACGGCTTCACAGAGCACAGGCGGAGGTCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:321 NH:i:1
4_463442_463916_0:0:0_0:0:0_143 147 4 463817 199 100M = 463442 -475 CACCTTGAGCTTCTTCTGGTAGACCAGCTGGCTGTTCTGAATGGAGAACCAGCGCCTAGGTGGGTGGGGGGATGTGGGGAGTCAGGCGGGGCCCATCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:321 NH:i:1
4_453878_454291_0:0:0_0:0:0_144 83 4 454192 199 100M = 453878 -414 TGTCTCTGCTCCGTCCCGTGTCCCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTCCCTGTTCCGTCCCCCGAGTCTCTGCTCCGTCCCGTGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:322 NH:i:1
4_453878_454291_0:0:0_0:0:0_144 163 4 453878 199 100M = 454192 414 CCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCCCCGTCCCGTGTCCCTGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:322 NH:i:1
@@ -453,13 +453,13 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_107909_108331_0:0:0_0:0:0_145 163 4 107909 199 100M = 108232 423 GGAGCTCCTGCGGAAGCAGAACCTGGCCCGGTAGGTGCGGGGAGGCGGGCGGGGCCGCGCGGCCCGGGAGGCGGCTGACCCGCGTCTGCCCCCGGCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:323 NH:i:1
4_225281_225806_0:0:0_0:0:0_146 83 4 225707 199 100M = 225281 -526 GCTAATTTTTGTATTTTTAGTAGAGACGGAGTTTCACCATGTTGGCCAGGATGGTCTTCAACTTCTAACTTCGTGATCCACGCTGCTGGGATTACAGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:324 NH:i:1
4_225281_225806_0:0:0_0:0:0_146 163 4 225281 199 100M = 225707 526 TCTACGGCCCCGACAGGTGGGCGTGGCGAAATGCCACCCGGGCCCTCCTCGCGCTCCCAGCTGGGTCACGCCTCAGACCCGGGGTCCTGGACGCCTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:324 NH:i:1
-4_356244_356750_0:0:0_0:0:0_147 99 4 356244 199 100M = 356651 507 CATTTTCATTTGTCTCAAAGTATTTTTATTTTTATTTTTTATTTTATTTGAGATGGAGTTTCGCTCTTGTCACCTAGGCTGGAGTGTAATGGCGCAATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:325 NH:i:1
+4_356244_356750_0:0:0_0:0:0_147 1123 4 356244 199 100M = 356651 507 CATTTTCATTTGTCTCAAAGTATTTTTATTTTTATTTTTTATTTTATTTGAGATGGAGTTTCGCTCTTGTCACCTAGGCTGGAGTGTAATGGCGCAATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:325 NH:i:1
4_356244_356750_0:0:0_0:0:0_147 147 4 356651 199 100M = 356244 -507 TTGTGATTTTTCCAATTTTCCTTCTGTTATTGACTTCTAATTTCATTCCATTGTGTCAGAGAACATACTTTATAATTTCAACCCTTTAAAAATTATTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:325 NH:i:1
-4_74479_74932_0:0:0_0:0:0_148 99 4 74479 199 100M = 74833 454 GCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACTGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:326 NH:i:1
+4_74479_74932_0:0:0_0:0:0_148 1123 4 74479 199 100M = 74833 454 GCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACTGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:326 NH:i:1
4_74479_74932_0:0:0_0:0:0_148 147 4 74833 199 100M = 74479 -454 GCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCATCTTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:326 NH:i:1
-4_20913_21302_0:0:0_0:0:0_149 99 4 20913 199 100M = 21203 390 TGTGTAGTGTGTGTACACATATACACCTGTGTGCATGTGTACATGGTATTTAAGCATGTTTGCACGTGTGTTTGTGCTTGCATGTGCTGCGTGTTTGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:327 NH:i:1
+4_20913_21302_0:0:0_0:0:0_149 1123 4 20913 199 100M = 21203 390 TGTGTAGTGTGTGTACACATATACACCTGTGTGCATGTGTACATGGTATTTAAGCATGTTTGCACGTGTGTTTGTGCTTGCATGTGCTGCGTGTTTGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:327 NH:i:1
4_20913_21302_0:0:0_0:0:0_149 147 4 21203 199 100M = 20913 -390 GGCTGTGGGGAGGCTGAGAATTGAGATGCATGTATGAGATGAGTCACGAGGGCCAAGTGGTGTGTGTGAGAAAGCAGGCACAGCACCACTAAACACTAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:327 NH:i:1
-4_621258_621743_0:0:0_0:0:0_14a 99 4 621258 199 100M = 621644 486 GGTGCGCGACATCGCCATAATGACAAGGAACATCAAGAAGAACCGGGGCCTGTACAGGCACATCCTGCTGTACGGGCCACCAGGCACCGGGAAGACGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:328 NH:i:1
+4_621258_621743_0:0:0_0:0:0_14a 1123 4 621258 199 100M = 621644 486 GGTGCGCGACATCGCCATAATGACAAGGAACATCAAGAAGAACCGGGGCCTGTACAGGCACATCCTGCTGTACGGGCCACCAGGCACCGGGAAGACGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:328 NH:i:1
4_621258_621743_0:0:0_0:0:0_14a 147 4 621644 199 100M = 621258 -486 TGTTCTGTCTCCCCTCACTCTTCTTGTCCAGAAACTCGCCCTGCACTCAGGCATGGACTACGCCATCATGACAGGCGGGGACGTGGCCCCCATGGGGCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:328 NH:i:1
4_630775_631296_0:0:0_0:0:0_14b 83 4 631197 199 100M = 630775 -522 TCTGAGAACAGGTGATTTTACCTCTCCCTTTTCAGTTTGGATGACTTTTCTTTTTCTTGTCTAATGGCCCTGGCCACAACTTCCAGTGGTATGTGGAATA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:329 NH:i:1
4_630775_631296_0:0:0_0:0:0_14b 163 4 630775 199 100M = 631197 522 CTCATGCCTTTAATCCCAGCACTTTGGGAGGCTGAGGTGGGTGGATCACAAGGTCAGGAGATTGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:329 NH:i:1
@@ -467,21 +467,21 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_444547_444970_0:0:0_0:0:0_14c 163 4 444547 199 100M = 444871 424 AGCAGGTGGGGCGCGGAGGAACTGAGCTGTGGCCTGGGGTTCCGTGGAGCAGGTTGAGTATGGAGGAACTGGGGTTCCGTGGAGCAGGTGGGGCGCAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:330 NH:i:1
4_895442_895957_0:0:0_0:0:0_14d 83 4 895858 199 100M = 895442 -516 GGGATTACAGGTGCCCAGCATCACACCCGGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAAGCTGGTCTTGAACTACTGACCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:331 NH:i:1
4_895442_895957_0:0:0_0:0:0_14d 163 4 895442 199 100M = 895858 516 CAAACTGACAGCAATCAGCTTGAATTCCGAATCTGGTTCTTGTTATATTCTCCACATTTCAAGGGCTCAGAAGCCGTATGTGGCCAGTGGCTCCTGCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:331 NH:i:1
-4_224288_224870_0:0:0_0:0:0_14e 99 4 224288 199 100M = 224771 583 ATGGCTGGGCTGGGGAAGCTGGGGTTCCCTGAGTGGGGTCCAGGGGGCCGAGGGGTGCCAGGCAGGGAGAGGCTGGGCTGAAGCCTGAGGCAGGTGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:332 NH:i:1
+4_224288_224870_0:0:0_0:0:0_14e 1123 4 224288 199 100M = 224771 583 ATGGCTGGGCTGGGGAAGCTGGGGTTCCCTGAGTGGGGTCCAGGGGGCCGAGGGGTGCCAGGCAGGGAGAGGCTGGGCTGAAGCCTGAGGCAGGTGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:332 NH:i:1
4_224288_224870_0:0:0_0:0:0_14e 147 4 224771 199 100M = 224288 -583 CCTGGGGCTGGGACCTACAGGGAGGCGGACGCGGCACAGCCAGGGAGGTCGGTCCCGCGGGCCCCGCCCCGCCCCCCAGCCTGGAGCGCCCCCCTCCGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:332 NH:i:1
-4_799545_800046_0:0:0_0:0:0_14f 99 4 799545 199 100M = 799947 502 TACGGTGATGCCCACGGGGCCGGGACAGGGCTGCGTGGGAGCTGGGCCTTGGCCATGGTCGGGGCTGAGGGGGCACTGACGGGGCTCTTTCCCCCACCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:333 NH:i:1
+4_799545_800046_0:0:0_0:0:0_14f 1123 4 799545 199 100M = 799947 502 TACGGTGATGCCCACGGGGCCGGGACAGGGCTGCGTGGGAGCTGGGCCTTGGCCATGGTCGGGGCTGAGGGGGCACTGACGGGGCTCTTTCCCCCACCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:333 NH:i:1
4_799545_800046_0:0:0_0:0:0_14f 147 4 799947 199 100M = 799545 -502 CCCTGGGCGAGGCGCACCTGAGCATCATCGCCAACGCCGTCAATGAGGGCACCTACACCTGCGTGGTGCGCCGCCAGCAGCGCGTGCTGACCACCTACTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:333 NH:i:1
4_188920_189355_0:0:0_0:0:0_150 83 4 189256 199 100M = 188920 -436 CTGGAGCGAGTGCCCAGGCGGAGCAGAGCGCTGGGAGGAGGGTCCGTGGGGAGCCCCTGCCAGGTGGGTGTGGCTTAGCCCAGCTGTGGGCAGTCGCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:334 NH:i:1
4_188920_189355_0:0:0_0:0:0_150 163 4 188920 199 100M = 189256 436 CCCCTGGGTCCCCCACTGGGAGGACATGATAACCTTGGTCCTCTTCTGGGCACTCATGGTCTGGGGAAAAGATGGGGGCATCGTGGACTCAGGATAGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:334 NH:i:1
-4_383134_383602_0:0:0_0:0:0_151 99 4 383134 199 100M = 383503 469 TAGCTCTGCGGGCGAGCGGGGCACAGGTCAGCGTCGCCTTTCCCCCTCCGAGCTCCGCGGCCAGCCGCACGAAGTGGCTATTTAAGGTGCCTATTGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:335 NH:i:1
+4_383134_383602_0:0:0_0:0:0_151 1123 4 383134 199 100M = 383503 469 TAGCTCTGCGGGCGAGCGGGGCACAGGTCAGCGTCGCCTTTCCCCCTCCGAGCTCCGCGGCCAGCCGCACGAAGTGGCTATTTAAGGTGCCTATTGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:335 NH:i:1
4_383134_383602_0:0:0_0:0:0_151 147 4 383503 199 100M = 383134 -469 CAGTGAAAACGCTTCAGAGGAAGGTGGTAAATTCCAGCCATGTGGCACAGGCCGCCCCGCCCACCTGCACCTGCTGGGCCTTGGACCTGCCTCCATGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:335 NH:i:1
-4_798806_799325_0:0:0_0:0:0_152 99 4 798806 199 100M = 799226 520 TGACCGTCCAGGCTGGCCTCCTGGAGGCCTCTCCTCTGCAGCACAGTGGGCTGCCGCGGTCGGGCTTTGGGGCAGACGGCAGGAGGGACCTTCCGGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:336 NH:i:1
+4_798806_799325_0:0:0_0:0:0_152 1123 4 798806 199 100M = 799226 520 TGACCGTCCAGGCTGGCCTCCTGGAGGCCTCTCCTCTGCAGCACAGTGGGCTGCCGCGGTCGGGCTTTGGGGCAGACGGCAGGAGGGACCTTCCGGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:336 NH:i:1
4_798806_799325_0:0:0_0:0:0_152 147 4 799226 199 100M = 798806 -520 GCGGCCCACGAGATCGGCCACGCGCTGGGCCTGATGCACTCACAACACGGCCGGGCGCTCATGCACCTGAACGCCACGCTGCGCGGCTGGAAGGCGTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:336 NH:i:1
4_303918_304403_0:0:0_0:0:0_153 83 4 304304 199 100M = 303918 -486 GACAGGTTGGGGGGCCTGGGGCTTGCAGTGTTGCAGGTGAACTCTAGGGCCCTATCTCTGCAATGGGACGGCCTCCAGGTCATGGGAGGGCCCAGCCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:337 NH:i:1
4_303918_304403_0:0:0_0:0:0_153 163 4 303918 199 100M = 304304 486 GCTGAGGCAGGAGAATTGCTTGAATCTGGGAGGCAGAGGTTGCAGTGAGCTGAGATCACACCACTGCACTCCAGCCTGGGCGACAGAGTGAGACTTCATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:337 NH:i:1
4_971418_971906_0:0:0_0:0:0_154 83 4 971807 199 100M = 971418 -489 CTCGTGATCCGCCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGCCTGAGCCACCGCGCCCGGCCCACAAAAAATATTTTTTAAAAATTAGCTGGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:338 NH:i:1
4_971418_971906_0:0:0_0:0:0_154 163 4 971418 199 100M = 971807 489 GTCAAAAATCAAAACACCTAATGCCCTTCATTTTACTCAAAGGAGGGAGGGTGGTGGCTCATGCCTGTAACCCCAGCACTTTGGGAGGCTGAGATAAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:338 NH:i:1
-4_133508_134044_0:0:0_0:0:0_155 99 4 133508 199 100M = 133945 537 TCCTCCCCGGGGCCCCGAGGCTGAGCTGCGCTGCAGGGCCAGCTGTGTGGCCCTTCCTGGTCTGTGGCCTATTTTTCATGGGTGCCAACCCGGCATCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:339 NH:i:1
+4_133508_134044_0:0:0_0:0:0_155 1123 4 133508 199 100M = 133945 537 TCCTCCCCGGGGCCCCGAGGCTGAGCTGCGCTGCAGGGCCAGCTGTGTGGCCCTTCCTGGTCTGTGGCCTATTTTTCATGGGTGCCAACCCGGCATCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:339 NH:i:1
4_133508_134044_0:0:0_0:0:0_155 147 4 133945 199 100M = 133508 -537 CATGCAGTGATACCCACAGCACATCCCAGGCCCTGTCTTAGGCTCTGGGGACACAGCAGTGAACGGAATAGACAACCCCTGTTCTGCTAGTGCTCCCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:339 NH:i:1
4_193476_194030_0:0:0_0:0:0_156 83 4 193931 199 100M = 193476 -555 GGCGCCCACACCCACGCCACCCTTTCCGAAGGAACCGAGCCCCAGCCCCTCATGGGCCAAGGGCACCCACAGCCACGCCACCCTTTCCGAAGGAACCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:340 NH:i:1
4_193476_194030_0:0:0_0:0:0_156 163 4 193476 199 100M = 193931 555 GTGTGAGCAGGGCTCACCATGACCTTCACTGATTTCATCATCAGTGCCTTCCCCATTCACACACTCACGGTTGGCACCTCCACGGGGCTTCCCAGGGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:340 NH:i:1
@@ -491,33 +491,33 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_145382_145800_0:0:0_0:0:0_158 163 4 145382 199 100M = 145701 419 CTTGGAGGCAGGTATAGACACAGCCATGTCCCTGTCAGGTTGCGGCTCAGAGGCAGGTGTAGACACAGCCATGTCCGTGTCAGGTTGCGGCTCAGAGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:342 NH:i:1
4_969753_970276_0:0:0_0:0:0_159 83 4 970177 199 100M = 969753 -524 GAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGTGTGATCTCAGCTCACTGCAACCTCTATCTCCTGGGTTCAAGTGATTCTCATGCCTCAGCCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:343 NH:i:1
4_969753_970276_0:0:0_0:0:0_159 163 4 969753 199 100M = 970177 524 ATCCGCCCACCTCGGCCTCCCAACGTTTTGGTATTACAAGCGTTGAGCCACTGCGCCCGACCATGAATTTTTTCATAACTATTTCCTTGCTCTGGCAACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:343 NH:i:1
-4_360357_360952_0:0:0_0:0:0_15a 99 4 360357 199 100M = 360853 596 GGACTCCCATTATGCATATGTTGGTCCATTTGACAGTGTCCCACCCACAAGTCTCTGAGGCTTTATTTTTCTTCCCTCTCTCTCTCTCTTTCATTTTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:344 NH:i:1
+4_360357_360952_0:0:0_0:0:0_15a 1123 4 360357 199 100M = 360853 596 GGACTCCCATTATGCATATGTTGGTCCATTTGACAGTGTCCCACCCACAAGTCTCTGAGGCTTTATTTTTCTTCCCTCTCTCTCTCTCTTTCATTTTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:344 NH:i:1
4_360357_360952_0:0:0_0:0:0_15a 147 4 360853 199 100M = 360357 -596 TCTACTTTTCAACTTCAGAATTTCTATTTGATTCTCTTTTATAATTTTTCTCTCTTCACCAGTATTCTCTATTATTTGATAAGACATCATCGTCATACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:344 NH:i:1
-4_648398_648915_0:0:0_0:0:0_15b 99 4 648398 199 100M = 648816 518 CCAACATTGCACCACTGCACTCCATTCTTGGCGAGAGAATAAGACCTTGTCTCAAGAAAAAAATGGCCAGGCGGTAGTGGCTCAGGCCTGTAATCCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:345 NH:i:1
+4_648398_648915_0:0:0_0:0:0_15b 1123 4 648398 199 100M = 648816 518 CCAACATTGCACCACTGCACTCCATTCTTGGCGAGAGAATAAGACCTTGTCTCAAGAAAAAAATGGCCAGGCGGTAGTGGCTCAGGCCTGTAATCCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:345 NH:i:1
4_648398_648915_0:0:0_0:0:0_15b 147 4 648816 199 100M = 648398 -518 CTGAGGTAGGAAAATTGGTTGAACCCAGGAGGCGGAAGTTGCAGTGAGCTGAAACCGCACAATTGCACTCCAACCTGTGGAAGAAGAGCGAAACTCTGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:345 NH:i:1
4_666555_667090_0:0:0_0:0:0_15c 83 4 666991 199 100M = 666555 -536 CACCGTGCCCGGCCTAATTTTTGTATTTTTAGTAGAGGCAGGGTTTCACCATGTTGACCAGGCTGCTCCTGAACTCCTGACCTCAGATGATGCACCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:346 NH:i:1
4_666555_667090_0:0:0_0:0:0_15c 163 4 666555 199 100M = 666991 536 TTGAGATGGAGTCTTACTCTGTCGCCCAGGCTGGAGTGCAATGGCGTGATCTCGGCTCAGTGCAACCTCCACCTCCTGAGTTTAAGCGATTCTCCTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:346 NH:i:1
-4_941157_941637_0:0:0_0:0:0_15d 99 4 941157 199 100M = 941538 481 CATTCATATATATATATATTTGTGTGTGTGTGTGTGTGTGTGTGTAGAGATGAGGTTTTACCATTTTGCCCAGGCTGGGTCTCAAACTTCACCCGCCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:12 SD:i:0 SN:i:347 NH:i:1
+4_941157_941637_0:0:0_0:0:0_15d 1123 4 941157 199 100M = 941538 481 CATTCATATATATATATATTTGTGTGTGTGTGTGTGTGTGTGTGTAGAGATGAGGTTTTACCATTTTGCCCAGGCTGGGTCTCAAACTTCACCCGCCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:12 SD:i:0 SN:i:347 NH:i:1
4_941157_941637_0:0:0_0:0:0_15d 147 4 941538 199 100M = 941157 -481 ACTGGAAAGGGAAGCAAGGATCATGAGAACTTGGGGGATTTCTTAAGCTCTGTAACTTCTCCCGGGGTTATTTTGCAAACCCAACTGTTTCAAAGTGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:347 NH:i:1
-4_128218_128719_0:0:0_0:0:0_15e 99 4 128218 199 100M = 128620 502 AGAGCCTGGTGAGGCACCACCCTGACTGCAAGGACCTCCTCATCGAGGCCCTGAAGTTCCACCTGCTGCCTGAGCAGAGGGGCGTCCTAGGCACCAGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:348 NH:i:1
+4_128218_128719_0:0:0_0:0:0_15e 1123 4 128218 199 100M = 128620 502 AGAGCCTGGTGAGGCACCACCCTGACTGCAAGGACCTCCTCATCGAGGCCCTGAAGTTCCACCTGCTGCCTGAGCAGAGGGGCGTCCTAGGCACCAGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:348 NH:i:1
4_128218_128719_0:0:0_0:0:0_15e 147 4 128620 199 100M = 128218 -502 CTGGCACGTGGTGGCCTCCATGTCCACGCGCCGGGCCCGGGTGGGAGTGGCTGCGGTGGGGAACCGGCTCTATGCTGTGGGCGGGTAAGCCTGGAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:348 NH:i:1
4_479878_480382_0:0:0_0:0:0_15f 83 4 480283 199 100M = 479878 -505 ACGCTCCACGGTCTCGTGGACTTTCTTCAGGAAGTCTCGCTCCCGGCAGCGCTTGGAGTCACGGATGGTCGTGGCGTACGTGGACTCTGTGATGAGCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:349 NH:i:1
4_479878_480382_0:0:0_0:0:0_15f 163 4 479878 199 100M = 480283 505 AGTGAGGGCACGGCCAGGTGCCCAAGAGCTGCGGCCTCATAGGGACCTTAGCCTCTCATCTGCTCCCAGTCCCGTCCCAGCCGCTCTCCAGAGACAGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:349 NH:i:1
-4_412416_412959_0:0:0_0:0:0_160 99 4 412416 199 100M = 412860 544 CTCGCAGGCTCCAGGGTGGGGGCTCACACTCGGACGATCAGCCAGCCGCCCGGGCGCCCCGACGGCCAGGGACGGAGACGGGGCGGAGACCCCCACTGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:350 NH:i:1
+4_412416_412959_0:0:0_0:0:0_160 1123 4 412416 199 100M = 412860 544 CTCGCAGGCTCCAGGGTGGGGGCTCACACTCGGACGATCAGCCAGCCGCCCGGGCGCCCCGACGGCCAGGGACGGAGACGGGGCGGAGACCCCCACTGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:350 NH:i:1
4_412416_412959_0:0:0_0:0:0_160 147 4 412860 199 100M = 412416 -544 CCCTTGGGCTCAGTGGCTGGAGGAATGGGGTGGCCAGGTGGGGCTGGAAGACCCTCCAGCCGCTGGTCCTGGCCGAGGCTAACAGTGGCCCAAGGTCAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:350 NH:i:1
-4_724932_725458_0:0:0_0:0:0_161 99 4 724932 199 100M = 725359 527 CGGTCAGTCCACCCAAGGAAAGAACCTACACAGTAGAGTTCCCATTTGTCTACAAAGGTCAGGGGAGCTTAGATCCCCGTCCCTGGTCCAGGTATATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:351 NH:i:1
+4_724932_725458_0:0:0_0:0:0_161 1123 4 724932 199 100M = 725359 527 CGGTCAGTCCACCCAAGGAAAGAACCTACACAGTAGAGTTCCCATTTGTCTACAAAGGTCAGGGGAGCTTAGATCCCCGTCCCTGGTCCAGGTATATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:351 NH:i:1
4_724932_725458_0:0:0_0:0:0_161 147 4 725359 199 100M = 724932 -527 AGACGGGGGTTTCTCCATTTTGGTCAGGCTGCTCCCAAACTCTCGACCTTGGGTGATCCGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACAGGCATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:351 NH:i:1
-4_924508_924995_0:0:0_0:0:0_162 99 4 924508 199 100M = 924896 488 GAAAACTTGCACACAGCTGGCCAGGCAACGGCCCAACAAAATCCTCAAGTCCCAATGCAGAAGAACGGCCTTCCGCTGCCTCGCAGGGCCAGACAACCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:352 NH:i:1
+4_924508_924995_0:0:0_0:0:0_162 1123 4 924508 199 100M = 924896 488 GAAAACTTGCACACAGCTGGCCAGGCAACGGCCCAACAAAATCCTCAAGTCCCAATGCAGAAGAACGGCCTTCCGCTGCCTCGCAGGGCCAGACAACCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:352 NH:i:1
4_924508_924995_0:0:0_0:0:0_162 147 4 924896 199 100M = 924508 -488 GGTGGGCGGATCACCTCAAGTCAGGTGTTCGAGACCAGCCTGGCCAACATGGTGAAATCCCATCTCTACTAAAAATACAAAAATTAGACAGGCGTGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:352 NH:i:1
4_243538_244063_0:0:0_0:0:0_163 81 4 241358 193 100M = 243538 2280 CTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:6 SD:i:0 SN:i:353 NH:i:1
4_243538_244063_0:0:0_0:0:0_163 161 4 243538 199 100M = 241358 -2280 CTGAGGCTATGGGGACTCCGTGCGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGACGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:4 SD:i:0 SN:i:353 NH:i:1
4_672048_672547_0:0:0_0:0:0_164 83 4 672448 199 100M = 672048 -500 AGGATGGTCTCGATCCCCTGACTTCGTGATCTGCCCGCTTCAGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCCCGCGCCCAGCAGGATTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:354 NH:i:1
4_672048_672547_0:0:0_0:0:0_164 163 4 672048 199 100M = 672448 500 GTGAATTTCCAAGTATTTTATTCTTTCAGATGCTATTGTAAATGGAAATGGTTTCATAATTTCCCTTTCAGATTATTCATTGTTAGTATGTAGAATTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:354 NH:i:1
-4_417287_417720_0:0:0_0:0:0_165 99 4 417287 199 100M = 417621 434 CCAATATGGTGAAACCCCGTCTCTACTTAAAATTAGCTGGGTGTGGTGGCAGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:7 SD:i:0 SN:i:355 NH:i:1
+4_417287_417720_0:0:0_0:0:0_165 1123 4 417287 199 100M = 417621 434 CCAATATGGTGAAACCCCGTCTCTACTTAAAATTAGCTGGGTGTGGTGGCAGGCACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:7 SD:i:0 SN:i:355 NH:i:1
4_417287_417720_0:0:0_0:0:0_165 147 4 417621 199 100M = 417287 -434 CAAAAATTAGCCAGGCATGGTGGCGCACACTTGCGGTCCCAGATATGTAGGGGGCTGAGTTTGGAGGATCCCTTGAGCCCAGGAGGTCACCGTTACATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:355 NH:i:1
-4_6845_7386_0:0:0_0:0:0_166 99 4 6845 199 100M = 7287 542 GCCAGGACCAGTTCCCTCTGCTCTTGGGTGCCAAGGCCACCGTCAGCCAGGAGAGGCCTGCAGAATCAGCCTGTGCACCTCTCTGACGATAAGCAGAGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:356 NH:i:1
+4_6845_7386_0:0:0_0:0:0_166 1123 4 6845 199 100M = 7287 542 GCCAGGACCAGTTCCCTCTGCTCTTGGGTGCCAAGGCCACCGTCAGCCAGGAGAGGCCTGCAGAATCAGCCTGTGCACCTCTCTGACGATAAGCAGAGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:356 NH:i:1
4_6845_7386_0:0:0_0:0:0_166 147 4 7287 199 100M = 6845 -542 CCACATGCACCAGACCTGGGCGGCCGTCGGCAGCCCACCCTGAGCAACCATGGGTGGCCCTCTTGTCTCTCGGGACAAGAAATGCTTCTTTAGAAATGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:356 NH:i:1
-4_96833_97387_0:0:0_0:0:0_167 99 4 96833 199 100M = 97288 555 CCTCCCATGTGAGACTGGCCATTTGAGCCCAAAAATGAGGCTGTCACCTCCCCCTTCCCACCCTCCTAGAGACCCACAAGGAGGTGAGAATGCTGATGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:357 NH:i:1
+4_96833_97387_0:0:0_0:0:0_167 1123 4 96833 199 100M = 97288 555 CCTCCCATGTGAGACTGGCCATTTGAGCCCAAAAATGAGGCTGTCACCTCCCCCTTCCCACCCTCCTAGAGACCCACAAGGAGGTGAGAATGCTGATGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:357 NH:i:1
4_96833_97387_0:0:0_0:0:0_167 147 4 97288 199 100M = 96833 -555 TGGGGGACCCGAGTTCCTGGCTCCAGGGGGAAGCGAGTGGTAAGTCTGTGAACAGAGCCCAGCTGTGGATTCTGTCAATGGGGTCAGGTCTCACCCTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:357 NH:i:1
4_883884_884386_0:0:0_0:0:0_168 83 4 884287 199 100M = 883884 -503 TCTAAAGTTTTCACTTTCCAAGAGTCCTTTTCATCACCCATTTGAGTTAAAACACTGCAAAAAGAAAAATAATTCAGCCTACATCAGGACACAGCAAGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:358 NH:i:1
4_883884_884386_0:0:0_0:0:0_168 163 4 883884 199 100M = 884287 503 TTAATTTCAAGGCAAGTTCCCACTATATTAAAAATACTTAGAGATAGTATTATGAATATACTAATAATGAACCGAGAAAAATTAGTCCAGTTTTGCTAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:358 NH:i:1
@@ -529,19 +529,19 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_901979_902478_0:0:0_0:0:0_16b 163 4 901979 199 100M = 902379 500 CGTCACCACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACAGTGCTAGCCAGGATGATCTCGATCTCCTGACCTCATGATCCGACCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:361 NH:i:1
4_974702_975205_0:0:0_0:0:0_16c 83 4 975106 199 100M = 974702 -504 CCTCCCACCTCGGCCTCCTAAGTAGCTGGGACTCCAGGTGCGCACCACAACAACCAACTTATTTTTGTACTTTTTGTAGAGAAGGGGTCTCGCCATGTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:362 NH:i:1
4_974702_975205_0:0:0_0:0:0_16c 163 4 974702 199 100M = 975106 504 GTGGTGCAATTTCGGCTCACAGTAACCTCCACGTGCCTCAGCCTTCCAAGTAGCTGGGACTACAGCTACCATGCCCAGCTAATTTTTAAATTTTTAGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:362 NH:i:1
-4_53843_54369_0:0:0_0:0:0_16d 99 4 53843 199 100M = 54270 527 AGAAGGAAGGATGGAGGGAGAGAAAGATGGAAAGAGAGAAAGAAGGAAATGAGAGAGAGAGAAGGAGAGAAGGAAGGAAGGAGGGAGAGAAAGGGAAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:363 NH:i:1
+4_53843_54369_0:0:0_0:0:0_16d 1123 4 53843 199 100M = 54270 527 AGAAGGAAGGATGGAGGGAGAGAAAGATGGAAAGAGAGAAAGAAGGAAATGAGAGAGAGAGAAGGAGAGAAGGAAGGAAGGAGGGAGAGAAAGGGAAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:363 NH:i:1
4_53843_54369_0:0:0_0:0:0_16d 147 4 54270 199 100M = 53843 -527 TGTTCAATATTCATTTCTCAGAGCATTCAAAGGTCTATGTGTGCCCTGCCATGATGGACCATGGCTTCAGCTCTCAAACACAACCCTCAGCCAAAATATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:363 NH:i:1
4_406352_406967_0:0:0_0:0:0_16e 81 4 406868 199 100M = 406352 -616 GAGAGAGGTGGGGACGGGTCACCACCCAAGCCCACCTCGTGCCGATTGGCGCCTGCCCACACACCTCGTCGCAGGGCTGGGCTGTCCCGCCTCACTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:364 NH:i:1
4_406352_406967_0:0:0_0:0:0_16e 161 4 406352 199 100M = 406868 616 ATGGTGAGGCCCCAGGCGGTGTTCAGAAAGGCCTGGCTGGGTGCTGCCTGATCCTGGGTGCCTGCCCCCAGCCCGTTCTTGCCCAGGGTTGGCCCGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:364 NH:i:1
-4_358237_358741_0:0:0_0:0:0_16f 99 4 358237 199 100M = 358642 505 CTCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTCTACTTTGGTTACCGTGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:365 NH:i:1
+4_358237_358741_0:0:0_0:0:0_16f 1123 4 358237 199 100M = 358642 505 CTCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTCTACTTTGGTTACCGTGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:365 NH:i:1
4_358237_358741_0:0:0_0:0:0_16f 147 4 358642 199 100M = 358237 -505 GTGAGCAACTGTAGTCCCAGCTGCTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGAGCTTGCAGTGAGCCAAGATTGCGCCATTGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:365 NH:i:1
4_546499_547045_0:0:0_0:0:0_170 83 4 546946 199 100M = 546499 -547 GGAAAGCCTCAGGCAGTAGAGGCGCCCTGTGTGGAAGCTGGACACTGCCAGAGTGGTGGGACTGGAGGCTCTGGGGACAATGGGCCCTGTCTGTGTCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:366 NH:i:1
4_546499_547045_0:0:0_0:0:0_170 163 4 546499 199 100M = 546946 547 GAGATACCCACCCACCGCACCAGACGCCTCCCCATGTGGATTCCTCCTGCAGGTACTCCTGGGGCTGGAGTCACCCCTGGGCCTGGCCTTGCTGCAGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:366 NH:i:1
-4_262195_262731_0:0:0_0:0:0_171 99 4 262195 199 100M = 262632 537 GACATGGCCTCACTCTGTCACCCAGGCTGGAGCAAAGTGGTGTGACCTCAGCTCACCACAGCCTCAGTCTCCCAGGCCCAGGCCATCCTCCCACCTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:367 NH:i:1
+4_262195_262731_0:0:0_0:0:0_171 1123 4 262195 199 100M = 262632 537 GACATGGCCTCACTCTGTCACCCAGGCTGGAGCAAAGTGGTGTGACCTCAGCTCACCACAGCCTCAGTCTCCCAGGCCCAGGCCATCCTCCCACCTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:367 NH:i:1
4_262195_262731_0:0:0_0:0:0_171 147 4 262632 199 100M = 262195 -537 CTGCAACCTCTGCCTCCCGGGTTCAAGCGATTCTTGTGCCTCAGCCTCCTGAGCAGCTGGGATTACAGGCACCCACCACCATGCCTGGCTAATTTTTGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:367 NH:i:1
4_776422_777010_0:0:0_0:0:0_172 83 4 776911 199 100M = 776422 -589 CTTACTTCAGCAAAGTAAAGGAGTAAAGCCAGAAGGAGGAGGACGCAGATTCCAGGAAACAGCTGTTAACAGAAAAGCCAAACTCTGAATAAGACATTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:368 NH:i:1
4_776422_777010_0:0:0_0:0:0_172 163 4 776422 199 100M = 776911 589 GGCAGCAGGCAAGTTCCAACAAGGGACTGCCTCAAACCATCCTGAGGTGGGGCTGGTTGGTCCCAAAGAAAAGAAGCACTAAACGCCAGGGTGATCGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:368 NH:i:1
-4_77105_77664_0:0:0_0:0:0_173 99 4 77105 199 100M = 77565 560 AGGAATCGAAGAGCAGGGGGAGCCACTGAGTGGGGTGCAGGGAAGATGGGGTGTCTGGGAGCTCAGCCTCGTGCAGAGGGAGGGAAGCAGGCAGCTCCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:369 NH:i:1
+4_77105_77664_0:0:0_0:0:0_173 1123 4 77105 199 100M = 77565 560 AGGAATCGAAGAGCAGGGGGAGCCACTGAGTGGGGTGCAGGGAAGATGGGGTGTCTGGGAGCTCAGCCTCGTGCAGAGGGAGGGAAGCAGGCAGCTCCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:369 NH:i:1
4_77105_77664_0:0:0_0:0:0_173 147 4 77565 199 100M = 77105 -560 CCTCCAGGTCCTCCTGGGTCCCCTCCTTTCTTTCACTTTCAGCCCGGGAATCTCTGTGAAGTCCTAGCTGATAACTAAGGGGTTAATGAATAATCACAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:369 NH:i:1
4_928548_929027_0:0:0_0:0:0_174 83 4 928928 199 100M = 928548 -480 GGGTCACAGCCCATGGGAAACCATGAACTGAGCATTCAAGCCAATAAAGCCAGAAATAGAATAAAACAAGCTTCAGAAGAACAGAAGAGGGGCCGTGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:370 NH:i:1
4_928548_929027_0:0:0_0:0:0_174 163 4 928548 199 100M = 928928 480 CTAGCAGCTGGGACTACAGGCGTGCGCCACCACGCTCAGCTAATTTTTTTGAATTTTTAGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTCTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:370 NH:i:1
@@ -549,37 +549,37 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_240486_241045_0:0:0_0:0:0_175 163 4 240486 199 100M = 240946 560 TTGAGAGTCTGGGGGCTGAGGCTGGCAGGGCTGGGATGGAGGATGTGTGGGTCTCAGAGGAGCAGCCAGTGGCATGGGCTGCCCCGTCCCCCGCACACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:371 NH:i:1
4_24977_25345_0:0:0_0:0:0_176 83 4 25246 199 100M = 24977 -369 CCCTGAGTTATTAAACACACCTCTACTACACCTATGAAATTGTCTACAAATTGTCTTAGTTTCTTTTCAGCTTTGATTTCTAGAATTGCATGACATTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:372 NH:i:1
4_24977_25345_0:0:0_0:0:0_176 163 4 24977 199 100M = 25246 369 TTCAATAATTTCCATTATAATAATAATGTCATGAATACCTTTGTACAAGAATATTTATATTTCTAATTATTTCCTTAGTCTAGTATCCTAAAAACTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:372 NH:i:1
-4_714380_714885_0:0:0_0:0:0_177 99 4 714380 199 100M = 714786 506 CCCGGTGTGTGGCTGCATGGCCTCCTGAGTCTGGGGATGGGAGAATAAGGCTTGTGCTGGAGGCCTTGGGCTGTGACACATACCTGTGGTCCCAGCTACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:373 NH:i:1
+4_714380_714885_0:0:0_0:0:0_177 1123 4 714380 199 100M = 714786 506 CCCGGTGTGTGGCTGCATGGCCTCCTGAGTCTGGGGATGGGAGAATAAGGCTTGTGCTGGAGGCCTTGGGCTGTGACACATACCTGTGGTCCCAGCTACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:373 NH:i:1
4_714380_714885_0:0:0_0:0:0_177 147 4 714786 199 100M = 714380 -506 ACAGGGAAACCCTGTCTCTACTAAAAACACAAAAAATTAGCCAGGCGTGGTGGCAGGCGCCTGTAGTCCCGGCTACTCAGGAGGCTGAGGCGGGAGAATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:373 NH:i:1
-4_626455_626993_0:0:0_0:0:0_178 99 4 626455 199 100M = 626894 539 CCTGTCTCCAGGATGGGCACCACCTCCCTGCCCTGCGGTTTCGTGCAGGAGCCCTGTGGGCCCCAAGGGTCCCAGAGGCCGCATCCAGGGAGGTCCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:374 NH:i:1
+4_626455_626993_0:0:0_0:0:0_178 1123 4 626455 199 100M = 626894 539 CCTGTCTCCAGGATGGGCACCACCTCCCTGCCCTGCGGTTTCGTGCAGGAGCCCTGTGGGCCCCAAGGGTCCCAGAGGCCGCATCCAGGGAGGTCCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:374 NH:i:1
4_626455_626993_0:0:0_0:0:0_178 147 4 626894 199 100M = 626455 -539 TCCCAAAGTGCAGGGATTACAGATGTGAGCCACTGTGCCCGGCCTCTTTTACGTTCCTCTTCACTACGCGGAGAGCTGTGAGGAAATCCTGTACCTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:374 NH:i:1
-4_616216_616720_0:0:0_0:0:0_179 99 4 616216 199 100M = 616621 505 GGGCCGGTGTGGGCGGGGAGGCCGGGGCACACATGGGGTTCGGGCGTGGAGATTGGTAGGGCTACTGCCGGTGGGTAGGGCCGGGGGTGTGTACATGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:375 NH:i:1
+4_616216_616720_0:0:0_0:0:0_179 1123 4 616216 199 100M = 616621 505 GGGCCGGTGTGGGCGGGGAGGCCGGGGCACACATGGGGTTCGGGCGTGGAGATTGGTAGGGCTACTGCCGGTGGGTAGGGCCGGGGGTGTGTACATGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:375 NH:i:1
4_616216_616720_0:0:0_0:0:0_179 147 4 616621 199 100M = 616216 -505 GCTGGGACTACAGGCACCTGCCACCACGCCCAGCTAATTTTTTTTGTATATTTAGTAGAGATGGGGTTTCAGCGTGTCAGCCAGGATGGTGTCGATTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:375 NH:i:1
-4_56037_56596_0:0:0_0:0:0_17a 99 4 56037 199 100M = 56497 560 TCTTCGTTGCTATGACCCAGCGTCTCCTGTGTCAGTTTTCAGTCTATTGTCTCCCAGCTTCTAGTGCACCTTTCAATATGTGCACTGTGATAAACTGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:376 NH:i:1
+4_56037_56596_0:0:0_0:0:0_17a 1123 4 56037 199 100M = 56497 560 TCTTCGTTGCTATGACCCAGCGTCTCCTGTGTCAGTTTTCAGTCTATTGTCTCCCAGCTTCTAGTGCACCTTTCAATATGTGCACTGTGATAAACTGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:376 NH:i:1
4_56037_56596_0:0:0_0:0:0_17a 147 4 56497 199 100M = 56037 -560 AAGTTTCCTGGTTACATGAAGTAATTAAGACGTATTTAAAAGCCAAGAGTACAAAATTAGACCTGATGAAAAAGCAGGAGTTATCATCCCAGCCATGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:376 NH:i:1
-4_100155_100701_0:0:0_0:0:0_17b 99 4 100155 199 100M = 100602 547 GGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:377 NH:i:1
+4_100155_100701_0:0:0_0:0:0_17b 1123 4 100155 199 100M = 100602 547 GGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:377 NH:i:1
4_100155_100701_0:0:0_0:0:0_17b 147 4 100602 199 100M = 100155 -547 GAAAAGAGTTAAATGCATGTTGATTCCAAGCCCCCGCCTGCCGGGGGGACAGCGGGAGGTTGGAGCACGCAGCCCTGGTGCCTGGTGCGAGCTGCACGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:377 NH:i:1
4_937691_938221_0:0:0_0:0:0_17c 83 4 938122 199 100M = 937691 -531 AAATACAAAAATTAGCTGAGCGTGGTGGTGGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAAAATCACTTGAACCCGGGAGGTGGAGGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:378 NH:i:1
4_937691_938221_0:0:0_0:0:0_17c 163 4 937691 199 100M = 938122 531 CAAGCGTGAAGCTCAATAAAATCAACTATTGTTTTCCGTTTTCATCACGAGCGCCACTTTCCCCCTATTTGTTCACCGCCTCCACTCCACAAGAACTGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:378 NH:i:1
-4_775932_776322_0:0:0_0:0:0_17d 99 4 775932 199 100M = 776223 391 ATTACCCAGTCTCAGGCAGTTCTTCGTAAGACCGTGAGAATGGACTAACACAAAGTCCCCGTCACCTATGCTCTTTGCTTCAAAGCAGCTTAGGTGGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:379 NH:i:1
+4_775932_776322_0:0:0_0:0:0_17d 1123 4 775932 199 100M = 776223 391 ATTACCCAGTCTCAGGCAGTTCTTCGTAAGACCGTGAGAATGGACTAACACAAAGTCCCCGTCACCTATGCTCTTTGCTTCAAAGCAGCTTAGGTGGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:379 NH:i:1
4_775932_776322_0:0:0_0:0:0_17d 147 4 776223 199 100M = 775932 -391 CCACGGGTTGCAGCAGAAAGAGAGTTTTAATCCTAAGGCTGCTGTACAAGGAGATGGCGTGAAGCAAGCAGGCTGCAGGGATCTTGTGGCCATCCGGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:379 NH:i:1
-4_697943_698511_0:0:0_0:0:0_17e 99 4 697943 199 100M = 698412 569 CTTGCCACGTGGTGAACATCATGTGTCACTGTTGGACCCACCTGTGACTGGGTTTTGCCCCAGAATCCCACCCAGGACGCCACGTGACATTTAGCTGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:380 NH:i:1
+4_697943_698511_0:0:0_0:0:0_17e 1123 4 697943 199 100M = 698412 569 CTTGCCACGTGGTGAACATCATGTGTCACTGTTGGACCCACCTGTGACTGGGTTTTGCCCCAGAATCCCACCCAGGACGCCACGTGACATTTAGCTGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:380 NH:i:1
4_697943_698511_0:0:0_0:0:0_17e 147 4 698412 199 100M = 697943 -569 GTCCGGCTCCGGTCAGTGTCTCCCCACACAGTGGCTCTTGGTGAGGGGTGGGCGCTTGCAGAGGGGACGGGCACCACGTGGTCATCCCCATGGCAGGTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:380 NH:i:1
-4_562582_563084_0:0:0_0:0:0_17f 99 4 562582 199 100M = 562985 503 TAGGCCTCCCAACGTGCTGGGATTACAGGCCAGCCCGGCCAGAGGCTGTGTTTTCTAAAGGACTTCAATTAACTTCAGGTGCATAAACCCTGGGCTAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:381 NH:i:1
+4_562582_563084_0:0:0_0:0:0_17f 1123 4 562582 199 100M = 562985 503 TAGGCCTCCCAACGTGCTGGGATTACAGGCCAGCCCGGCCAGAGGCTGTGTTTTCTAAAGGACTTCAATTAACTTCAGGTGCATAAACCCTGGGCTAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:381 NH:i:1
4_562582_563084_0:0:0_0:0:0_17f 147 4 562985 199 100M = 562582 -503 AATCCTGCAGCACACAGGAGAATCCCCTACAACTAAAGATTACCCAGCCAGATGCGGTGGCTGAAATCTGTAATCCCAGTGCTTTGGGAGGCTAAGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:381 NH:i:1
-4_681027_681546_0:0:0_0:0:0_180 99 4 681027 199 100M = 681447 520 TGTTGGCTGGTATCCTATTCTTATTCATTCCCTTTCTTGCCTGCATGTGCTTGATAGCAGATCAATGACAACAGCCATTGGAAGCTTAGGAAAAGCGAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:382 NH:i:1
+4_681027_681546_0:0:0_0:0:0_180 1123 4 681027 199 100M = 681447 520 TGTTGGCTGGTATCCTATTCTTATTCATTCCCTTTCTTGCCTGCATGTGCTTGATAGCAGATCAATGACAACAGCCATTGGAAGCTTAGGAAAAGCGAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:382 NH:i:1
4_681027_681546_0:0:0_0:0:0_180 147 4 681447 199 100M = 681027 -520 TCCTTGCGTCTGCAGGTTATGCCAAGGACGCCCTGAATCTGGCACAGATGCAGGAGCAGACGCTGCAGTTGGAGCAACAGTCCAAGCTCAAAGTGAGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:382 NH:i:1
-4_768538_769051_0:0:0_0:0:0_181 99 4 768538 199 100M = 768952 514 GACTGGGCCACGGGGGCTCGTGGGCAGCCCTGGCCAGGGCTCAGTCCTCACTGTAGCAGGGAAGCTGCCGGCCCTCTAGGCAGGCCGCCCTCTGGGAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:383 NH:i:1
+4_768538_769051_0:0:0_0:0:0_181 1123 4 768538 199 100M = 768952 514 GACTGGGCCACGGGGGCTCGTGGGCAGCCCTGGCCAGGGCTCAGTCCTCACTGTAGCAGGGAAGCTGCCGGCCCTCTAGGCAGGCCGCCCTCTGGGAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:383 NH:i:1
4_768538_769051_0:0:0_0:0:0_181 147 4 768952 199 100M = 768538 -514 ACCCTGGGAGCTGGGTGCCAGTGTCAGCATCCTGGCACACGTGTCCCTTAGGGCTCCCGGGAGCCACCCTTTCCTGCCCCTGTGGGTCTCCCATTTCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:383 NH:i:1
-4_545459_546024_0:0:0_0:0:0_182 99 4 545459 199 100M = 545925 566 TCACACCACTGCACTCCAGCCTGGGTGACAGAGACAACAAAATATAATTGGCTGGGCTGGGTGGAGTGGCTCACGCCTGTAATCCCAGCACTCTGGGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:384 NH:i:1
+4_545459_546024_0:0:0_0:0:0_182 1123 4 545459 199 100M = 545925 566 TCACACCACTGCACTCCAGCCTGGGTGACAGAGACAACAAAATATAATTGGCTGGGCTGGGTGGAGTGGCTCACGCCTGTAATCCCAGCACTCTGGGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:384 NH:i:1
4_545459_546024_0:0:0_0:0:0_182 147 4 545925 199 100M = 545459 -566 CTAAGGGAACAGGAGCAGTGCATCTGGCTCTAGATGCCCGGGCACGGGCACCACTGTGAGCTGTCCCAGGTGGTAGGGCCTGCACCCCCTGCTGATGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:384 NH:i:1
-4_958014_958489_0:0:0_0:0:0_183 99 4 958014 199 100M = 958390 476 TGTCTGTGCACATAAGAAGTTCTCAGTAAATGGAGACAGTTACTATTTCTGTTATTATTGAATTTGAACAAATTCCCTGGGTATGTGTGGGGGGACACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:385 NH:i:1
+4_958014_958489_0:0:0_0:0:0_183 1123 4 958014 199 100M = 958390 476 TGTCTGTGCACATAAGAAGTTCTCAGTAAATGGAGACAGTTACTATTTCTGTTATTATTGAATTTGAACAAATTCCCTGGGTATGTGTGGGGGGACACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:385 NH:i:1
4_958014_958489_0:0:0_0:0:0_183 147 4 958390 199 100M = 958014 -476 GAGAGGGGGTGGCTTGAAGAGAGGCACGGCAGAGAGTGGAAGCGTAGGAGAAAGATGGGCTCCTGGGCATGTGGTGTCAGCAGAGGTGCCTCAAGGATAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:385 NH:i:1
4_444478_445022_0:0:0_0:0:0_184 83 4 444923 199 100M = 444478 -545 GCTCACTGCAAGCTCTGCCTCCCGGGTTCACGCCATTCTCCCGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGCTCCCGCCACAATGCCCGGCTAATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:386 NH:i:1
4_444478_445022_0:0:0_0:0:0_184 163 4 444478 199 100M = 444923 545 GCTTACATTATTTATTTCCCACTTTTTTTTTTCTTGAAAGGAACCGAGCTGTGGCCTGGGGTTCCATGGAGCAGGTGGGGCGCGGAGGAACTGAGCTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:386 NH:i:1
-4_212266_212801_0:0:0_0:0:0_185 99 4 212266 199 100M = 212702 536 CCAGGGCTCGAGCCCTTGGAGGGCAGCAGCGTGGCCACCCCTGGGCCACCTGTCGAGAGGGCTTCCTGCTACAACTCCGCGTTGGGCTGCTGCTCTGATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:387 NH:i:1
+4_212266_212801_0:0:0_0:0:0_185 1123 4 212266 199 100M = 212702 536 CCAGGGCTCGAGCCCTTGGAGGGCAGCAGCGTGGCCACCCCTGGGCCACCTGTCGAGAGGGCTTCCTGCTACAACTCCGCGTTGGGCTGCTGCTCTGATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:387 NH:i:1
4_212266_212801_0:0:0_0:0:0_185 147 4 212702 199 100M = 212266 -536 TGCTGGGGCCTGTGCCTGGGCCAGCCTGGATGCCAGGCAGATGCCAGGCAGGGCCTCACTGTACCTCCCCCACAGCCACCAAGGTGTTCCAGGGCGTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:387 NH:i:1
-4_127037_127568_0:0:0_0:0:0_186 99 4 127037 199 100M = 127469 532 TGGGTCCCTCGGGTCAGCTCGTGTAACCCGCTGTCCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:388 NH:i:1
+4_127037_127568_0:0:0_0:0:0_186 1123 4 127037 199 100M = 127469 532 TGGGTCCCTCGGGTCAGCTCGTGTAACCCGCTGTCCCCGCAGATGAGATGAGCGAGAGCCGCCAGACCCACGTGACGCTGCACGACATCGACCCTCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:388 NH:i:1
4_127037_127568_0:0:0_0:0:0_186 147 4 127469 199 100M = 127037 -532 AGGAGTTTATGCTGCTGCCCCTGAAACAGGTAACAGCTGGCGGGCCCAGCCCTCGCCCCCCACCCCACCCCACCCCAGTCTTTGTCTTTGACTCCCGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:388 NH:i:1
4_747718_748170_0:0:0_0:0:0_187 83 4 748071 199 100M = 747718 -453 CATGGTGCCCCTATTATAGTGGATCCAGCCTTGAAGAGGCCTCACCCTGCCTCAACACCTCTTCCCCAAGACCCTGACGGCCAGGACAGTGTCTTTTTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:389 NH:i:1
4_747718_748170_0:0:0_0:0:0_187 163 4 747718 199 100M = 748071 453 TGGGTGACAGAGCGGGACTTCATCTCAAAACAACAACAACAAAAAACAGACAATTGGATCCCAATTATATGATAGTATTTCTGACAAAATGGGACATGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:389 NH:i:1
@@ -587,59 +587,59 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_516551_516964_0:0:0_0:0:0_188 163 4 516551 199 100M = 516865 414 AGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:390 NH:i:1
4_650282_650786_0:0:0_0:0:0_189 83 4 650687 199 100M = 650282 -505 ACAGCCCACGCACACCCTCCCGTCCCTTCCCTTTCCCCGGATAACAGGCACCCGCACGCTGCTTCACGGGTGGGTTTTCCTGTCTGGCGCTGTACCTTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:391 NH:i:1
4_650282_650786_0:0:0_0:0:0_189 163 4 650282 199 100M = 650687 505 CCTGCCTCCTGTAACCGCGTGGCTGTGGGATTCGGGGCTGGGAATTCGGGTTCCTGTGGGGCCAGCACACGGCCCTGTGCTTCTCCCTCAGGCGGAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:391 NH:i:1
-4_606582_607046_0:0:0_0:0:0_18a 99 4 606582 199 100M = 606947 465 GGACTCTGCGTGACCCCAGGAACTGCAGCATTGAGGTGGTCTCAGTCCCTCCCTCAGGTCTGGTGGGCATTGTGCCAGGGGCCACCTTCGGGACAGCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:392 NH:i:1
+4_606582_607046_0:0:0_0:0:0_18a 1123 4 606582 199 100M = 606947 465 GGACTCTGCGTGACCCCAGGAACTGCAGCATTGAGGTGGTCTCAGTCCCTCCCTCAGGTCTGGTGGGCATTGTGCCAGGGGCCACCTTCGGGACAGCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:392 NH:i:1
4_606582_607046_0:0:0_0:0:0_18a 147 4 606947 199 100M = 606582 -465 GGCCACGGAGGCCTGAGTAGGACCCATGGGCTGTGTGTGTCACTGCAGGTGGCGTGCTCACAACTGCACTGCTGGTCGGCAGGTGGCCAGGGTTGCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:392 NH:i:1
4_877235_877812_0:0:0_0:0:0_18b 83 4 877713 199 100M = 877235 -578 GTCCAGCGTTCACTGTGCTGTTTCCATGTCTACCACTTCTGCAACAAATGTGACTTCTATTGCCAAATTCTTCTTCATTGCTGTGACAGGACACACTACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:393 NH:i:1
4_877235_877812_0:0:0_0:0:0_18b 163 4 877235 199 100M = 877713 578 GTGGACTCACTCTGAAGGCGGAGATGGGCCTGCTCGCACCTGGCCTACAGCCTTTTTCCTGGTTCACAGAACAGATCTGGGGCTACACCGATGTTCTTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:393 NH:i:1
4_975633_976238_0:0:0_0:0:0_18c 97 4 975633 199 100M = 976139 606 CATTGGAACCATCATGCGAGCTCTCATTTGCATGGGGAATTGTATGGGATACTATATAGAACAAACTAAAGATAGAATCTTTTGTTCTCTAGCTGTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:394 NH:i:1
4_975633_976238_0:0:0_0:0:0_18c 145 4 976139 199 100M = 975633 -606 TCTCTGTAAAAAACCAAAAAATATTAGCCAGGCATGGTGGCACATGCCTGTGGCCTATGCCACTTGGGAGGCTGAGTAGGAAGGGTTACTGGAGCCTAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:394 NH:i:1
-4_763148_763612_0:0:0_0:0:0_18d 99 4 763148 199 100M = 763513 465 CTGTCACAGCTCAAGTGTGTCGCCGTGGGAATGGGCTATTTCTAAGCACGGCAGGTACCGTCTTCCCCTTCCCCCGCCAACACAGCTGGCCCAGGACAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:395 NH:i:1
+4_763148_763612_0:0:0_0:0:0_18d 1123 4 763148 199 100M = 763513 465 CTGTCACAGCTCAAGTGTGTCGCCGTGGGAATGGGCTATTTCTAAGCACGGCAGGTACCGTCTTCCCCTTCCCCCGCCAACACAGCTGGCCCAGGACAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:395 NH:i:1
4_763148_763612_0:0:0_0:0:0_18d 147 4 763513 199 100M = 763148 -465 CCGGCGCCACGCAGGGGTGGGCGGAGCAAAGACACACAGGTGGGCTACAGGTGTCACACGGCACCAGCCAGGGCCCGGGGTGGCTGGGGTGAGGATGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:395 NH:i:1
4_784847_785291_0:0:0_0:0:0_18e 83 4 785192 199 100M = 784847 -445 CCTCAGCAAGTGGGGGGCTGGTTCCAGGCACAAGAAGTGAGGCCAGGGAGGAAGCCCTGCCTGTGTTCAGCCCAGTTGTGCCCCAACTCAGGTGGGCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:396 NH:i:1
4_784847_785291_0:0:0_0:0:0_18e 163 4 784847 199 100M = 785192 445 CCTCCAGAGGGTCCAGGATACTGAGCTCCTGAGCCTGGGATCCACGCGCATGTGGTCTGTCTGCACACTGGCACCGCAGAAGGGTTGGCAGGCGGGCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:396 NH:i:1
-4_560943_561407_0:0:0_0:0:0_18f 99 4 560943 199 100M = 561308 465 CAGTAGTAGAGGCACGGGCTTCCTGCCAGAGAGAGAGCACAGGGTCTGCACCACAGTCAAGCAGAGCAAGGCTTCTGAGATCCCGTCGTTACCTCCAACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:397 NH:i:1
+4_560943_561407_0:0:0_0:0:0_18f 1123 4 560943 199 100M = 561308 465 CAGTAGTAGAGGCACGGGCTTCCTGCCAGAGAGAGAGCACAGGGTCTGCACCACAGTCAAGCAGAGCAAGGCTTCTGAGATCCCGTCGTTACCTCCAACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:397 NH:i:1
4_560943_561407_0:0:0_0:0:0_18f 147 4 561308 199 100M = 560943 -465 CACAGAACCAATGAAAGCACAATTATTTAAATACAGATGGGCCAGGTGCAGTGGCTCTGCCTGTAATCCCAGCACTTTGGGAGGCGAGGCAGGCAGACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:397 NH:i:1
-4_392869_393428_0:0:0_0:0:0_190 99 4 392869 199 100M = 393329 560 GAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGTGAGCTCGTGGCCAGGCCCTGCGGCGGGGGCTGCCTCCTTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:398 NH:i:1
+4_392869_393428_0:0:0_0:0:0_190 1123 4 392869 199 100M = 393329 560 GAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGTGAGCTCGTGGCCAGGCCCTGCGGCGGGGGCTGCCTCCTTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:398 NH:i:1
4_392869_393428_0:0:0_0:0:0_190 147 4 393329 199 100M = 392869 -560 CAGTGACGGCCAAGGATGGGCCACTGACAGCACAGCTCTGCTCCCGACCAGGCCAGCAAAACAAGCCAACCCCACACACCCCCAGCCTGCGTGTGATACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:398 NH:i:1
4_8854_9394_0:0:0_0:0:0_191 83 4 9295 199 100M = 8854 -541 TCTGAGGCCCCTACTCTGAGATTCTAAGACAGGGTTTGAAGAATGTTCCAGCATTTGAACACATGAACCCTATGATGTCCTGCTGTGAGAGAGAACAATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:399 NH:i:1
4_8854_9394_0:0:0_0:0:0_191 163 4 8854 199 100M = 9295 541 AAGCTGCACGTCATCCTCTGGACCTCTTTGCTGACTGTGTGCTGTCACCACAGCAGCCGCCTTCTCTCATCTGTAAACAGGATTTTACGTAAATTTCCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:399 NH:i:1
4_492210_492740_0:0:0_0:0:0_192 83 4 492641 199 100M = 492210 -531 CGGGCTGCCCCAGGCCTGGGCGAGGACTCGAGCCCCGCTCCCTTCCACAGGTTTCTGAACAGCCTGGGCACCATCTTCTCATTCATCTCCAAGGACGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:400 NH:i:1
4_492210_492740_0:0:0_0:0:0_192 163 4 492210 199 100M = 492641 531 GGAAGCTCCATGAGGGGCATGAGTGTTCAGTGAGCGGCAATGGGATCGCAGCTATTTTGTTCCCCTCCACACACAGAAAATGAGCCACAGAGCAAGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:400 NH:i:1
-4_845071_845544_0:0:0_0:0:0_193 99 4 845071 199 100M = 845445 474 TCCCACTCTTTCATCTCTCATTCACGGGTTTTTACAGCGTTTAATCTCCACCTTCGCTCGCTGCGTCCCTGAGGTTGGCGGGTGGGGCTCAAAGCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:401 NH:i:1
+4_845071_845544_0:0:0_0:0:0_193 1123 4 845071 199 100M = 845445 474 TCCCACTCTTTCATCTCTCATTCACGGGTTTTTACAGCGTTTAATCTCCACCTTCGCTCGCTGCGTCCCTGAGGTTGGCGGGTGGGGCTCAAAGCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:401 NH:i:1
4_845071_845544_0:0:0_0:0:0_193 147 4 845445 199 100M = 845071 -474 TGCCTGTTCAGACCAGAGACCACAAAGTTGTAAGTCAAACCTTCATGTTTGGCCAGCGGCGGTGGCCCACACCTAGAACCCCAGTACTTTGGAAGGCCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:401 NH:i:1
-4_135773_136242_0:0:0_0:0:0_194 99 4 135773 199 100M = 136143 470 TCCTGAGTGTGTTCAAGAAGGGGCGGCGGAGGGTGCCTGTGAGGAACCTGGGAAAAGTTGTGCATTACGCCAAGGTCCAGCTGCGGTTCCAGCACAGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:402 NH:i:1
+4_135773_136242_0:0:0_0:0:0_194 1123 4 135773 199 100M = 136143 470 TCCTGAGTGTGTTCAAGAAGGGGCGGCGGAGGGTGCCTGTGAGGAACCTGGGAAAAGTTGTGCATTACGCCAAGGTCCAGCTGCGGTTCCAGCACAGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:402 NH:i:1
4_135773_136242_0:0:0_0:0:0_194 147 4 136143 199 100M = 135773 -470 TACCGCTGACGGAGCTGAGTGTCTGCCCGCTCGAGGGGTCCCGAGAGCACGCCTTCCAGATCACAGGTGTTTGGGATGCTTCCCGGGCCCCCAGAGGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:402 NH:i:1
4_679353_679965_0:0:0_0:0:0_195 97 4 679353 199 100M = 679866 613 GCTATAGCCCCATGCCGCCTGGTTCCCAGAGTGCCCTGACCCCTCCTCCGGCCTGTCCTTGGAAGTCACAGCAGTACATTATCCCAGAACTGTCTGTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:403 NH:i:1
4_679353_679965_0:0:0_0:0:0_195 145 4 679866 199 100M = 679353 -613 GCCCTTGGCTTCCCGAGCTTAGCAGCCAAACCCGTTGACCTGGCACTGTCTTCCCTGAGAGGCAGGTCCCGGTCTCATCTGTGCTCTGTTCACTGGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:403 NH:i:1
-4_256148_256647_0:0:0_0:0:0_196 99 4 256148 199 100M = 256548 500 GGCACCCGCAGGCCATGGCAGCGCCACCGACATTCTCTGCAAGTCTCCGGAGGCTCTCCATCAGGGGTTTCCGCTTGACCCCACAGAGGAAGTGCCCGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:404 NH:i:1
+4_256148_256647_0:0:0_0:0:0_196 1123 4 256148 199 100M = 256548 500 GGCACCCGCAGGCCATGGCAGCGCCACCGACATTCTCTGCAAGTCTCCGGAGGCTCTCCATCAGGGGTTTCCGCTTGACCCCACAGAGGAAGTGCCCGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:404 NH:i:1
4_256148_256647_0:0:0_0:0:0_196 147 4 256548 199 100M = 256148 -500 GAGGGGTCCCCACCCTCCACCCGCTCCGCCTGGGAGTCTGGAGCCGCCGCAGCTACACTCCCCGATAGCGATGCGCGGCACGCTGTGCTGTCACCGCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:404 NH:i:1
4_255823_256325_0:0:0_0:0:0_197 83 4 256226 199 100M = 255823 -503 CCCCACAGAGGAAGTGCCCGGGCCTGGCTGGCATTTCAGGGGACGAATTCACGCCCAGGTGCCCAGGTACTGCACAGGTGCGCTGGGTGCTGGCACTTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:405 NH:i:1
4_255823_256325_0:0:0_0:0:0_197 163 4 255823 199 100M = 256226 503 CCTGGGCAGCTGGCGTTGACGCCCACCACATCCACACAGCACTCGGGCAGCTGGGCCTGGAGGGGACACGGCAGTGAAACTCCAGGACGCGCCTGCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:405 NH:i:1
-4_905669_906144_0:0:0_0:0:0_198 99 4 905669 199 100M = 906045 476 TTCACAGAGGACAATTTCTAAAAGGCAAATAAGAAGCAGGAAGGGTGCATGCTCCTTCTCTACTGCCCTGTAACAGTCATTCCACACTTACCACCTCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:406 NH:i:1
+4_905669_906144_0:0:0_0:0:0_198 1123 4 905669 199 100M = 906045 476 TTCACAGAGGACAATTTCTAAAAGGCAAATAAGAAGCAGGAAGGGTGCATGCTCCTTCTCTACTGCCCTGTAACAGTCATTCCACACTTACCACCTCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:406 NH:i:1
4_905669_906144_0:0:0_0:0:0_198 147 4 906045 199 100M = 905669 -476 TCATCCAGCAAGCTCATCAGGGAGTGGGAGAGAGCAGCCAGGACAGGAGCCCAGGCCTTTCTGAACCTCATCTCAGAAGTGACATCCTTCCCTTCTGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:406 NH:i:1
4_943228_943688_0:0:0_0:0:0_199 83 4 943589 199 100M = 943228 -461 TTCACGCGGGGGCCAGAGGCACAGAGAGGCCAAGTAACTTGCTCAAGGTCACCCAGCGAGGAAAGGGAGCTGGGGGGTGGGGGTAGGGGACAGGGCGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:407 NH:i:1
4_943228_943688_0:0:0_0:0:0_199 163 4 943228 199 100M = 943589 461 AGGACTGGGACTCCCTGTGTCCTCACAAGATCCTTACCAAGCAGCCTAGTATAAGACGAAGTCTCGCTCTGTAGCTCAGGCTGGAGTGTAGTGGCATGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:407 NH:i:1
-4_214297_214793_0:0:0_0:0:0_19a 99 4 214297 199 100M = 214694 497 CTCCCGGTCGCCCTTTGCAGTGCTTGGCGCCCCTGTGCCGGCCTTCGAGGGCCGCTCCTTCCTGGCCTTCCCCACTCTCCGCGCCTACCACACGCTGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:408 NH:i:1
+4_214297_214793_0:0:0_0:0:0_19a 1123 4 214297 199 100M = 214694 497 CTCCCGGTCGCCCTTTGCAGTGCTTGGCGCCCCTGTGCCGGCCTTCGAGGGCCGCTCCTTCCTGGCCTTCCCCACTCTCCGCGCCTACCACACGCTGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:408 NH:i:1
4_214297_214793_0:0:0_0:0:0_19a 147 4 214694 199 100M = 214297 -497 CAGGTTCGGGGCCGGCGGTGCTGACCAGTGCCGTGCCGGTAGAGCCGGGCCAGTGGCACCGCCTGGAGCTGTCCCGGCACTGGCGCCGGGGCACCCTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:408 NH:i:1
-4_12899_13330_0:0:0_0:0:0_19b 99 4 12899 199 100M = 13231 432 GGCCCGGCACAAGGTAGGAACTGAGTGTGAGTGCGGGATGCACCCAGGTCTGAAATGCTCAGGAAGGACCCTAGGACATGAGCAACCTGGGCTGGTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:409 NH:i:1
+4_12899_13330_0:0:0_0:0:0_19b 1123 4 12899 199 100M = 13231 432 GGCCCGGCACAAGGTAGGAACTGAGTGTGAGTGCGGGATGCACCCAGGTCTGAAATGCTCAGGAAGGACCCTAGGACATGAGCAACCTGGGCTGGTCACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:409 NH:i:1
4_12899_13330_0:0:0_0:0:0_19b 147 4 13231 199 100M = 12899 -432 ACGCCATGCGATTCGCCCACTCTTGGGTAGGTGGAGACATTGGGGCTCAGGTGTGGGTAAGTGATTGTGTGACTCCTTGTTAAGCGGCAGAACCGGGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:409 NH:i:1
-4_747638_748179_0:0:0_0:0:0_19c 99 4 747638 199 100M = 748080 542 CTACTCAGGAGGCTGAAAATCACTTGAACCTGGGAGTCGGAGGTTGCAGTGAGCCAAGATCGCGCCACTGTACTCCAGCCTGGGTGACAGAGCGGGACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:410 NH:i:1
+4_747638_748179_0:0:0_0:0:0_19c 1123 4 747638 199 100M = 748080 542 CTACTCAGGAGGCTGAAAATCACTTGAACCTGGGAGTCGGAGGTTGCAGTGAGCCAAGATCGCGCCACTGTACTCCAGCCTGGGTGACAGAGCGGGACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:410 NH:i:1
4_747638_748179_0:0:0_0:0:0_19c 147 4 748080 199 100M = 747638 -542 CCTATTATAGTGGATCCAGCCTTGAAGAGGCCTCACCCTGCCTCAACACCTCTTCCCCAAGACCCTGACGGCCAGGACAGTGTCTTTTTGCCTTCAGACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:410 NH:i:1
-4_923597_924043_0:0:0_0:0:0_19d 99 4 923597 199 100M = 923944 447 GTGACAGACACAGATACAGAGGAGGTTACACGGTAAGGCATACATGCAATTTGAAAGATGCCAACTCCATCTGCCCAGCAGCCACACAATAGCCCTTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:411 NH:i:1
+4_923597_924043_0:0:0_0:0:0_19d 1123 4 923597 199 100M = 923944 447 GTGACAGACACAGATACAGAGGAGGTTACACGGTAAGGCATACATGCAATTTGAAAGATGCCAACTCCATCTGCCCAGCAGCCACACAATAGCCCTTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:411 NH:i:1
4_923597_924043_0:0:0_0:0:0_19d 147 4 923944 199 100M = 923597 -447 AAATGTTCACTCTGAACTCATGTCAATAAAAAAGACGTAGGCCGGGCGCAGTGGCTCACACTTGTAATCTCAGCACTTCAGGAGGCTGAGGTGGGTGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:411 NH:i:1
4_912406_912903_0:0:0_0:0:0_19e 83 4 912804 199 100M = 912406 -498 TTTAATACATTTTCAAGAAATTAATATGAAACATTAAAATTTACTTCAAAAATCCAAAGTTTTCTAGATCATTCCCATCTCACGCTGCTTTAGAGGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:412 NH:i:1
4_912406_912903_0:0:0_0:0:0_19e 163 4 912406 199 100M = 912804 498 AGCCCGGATGGAGGGGCTCTTCCAGCCCTGCTGGCCCCGGGAATGCAGGGACTCAATTCCCCCTGGTCTCAGTGGCTCTTCCGGGAGCAACACAGCCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:412 NH:i:1
-4_693122_693615_0:0:0_0:0:0_19f 99 4 693122 199 100M = 693516 494 TCATCCTCATCCCCGCCCCGCAGGTTCATGCTGGTCCTGGCCAGCAACCAACCAGAGCAGTTCGACTGGGCCATCAATGACCGCATCAATGAGATGGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:413 NH:i:1
+4_693122_693615_0:0:0_0:0:0_19f 1123 4 693122 199 100M = 693516 494 TCATCCTCATCCCCGCCCCGCAGGTTCATGCTGGTCCTGGCCAGCAACCAACCAGAGCAGTTCGACTGGGCCATCAATGACCGCATCAATGAGATGGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:413 NH:i:1
4_693122_693615_0:0:0_0:0:0_19f 147 4 693516 199 100M = 693122 -494 CCTCTTGCCTCAGCCTCCTGAGTAGCTGAGATTACAGGCGCCCACCACCATGCCTGGCTAATTTTTTTTTAATTTTTAGTAGAAATGGGGTTTTACCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:413 NH:i:1
4_444279_444851_0:0:0_0:0:0_1a0 83 4 444752 199 100M = 444279 -573 CTCTTACGTGTCTCTGTTTCTCTCTCTAGAAGTCCAACACCTCCGAGAGGACTCAAAACACAGAGTGACCAGCTCCTATGTGCATTTCCTGGACAAACCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:414 NH:i:1
4_444279_444851_0:0:0_0:0:0_1a0 163 4 444279 199 100M = 444752 573 TGGGCTCTGAATTTTTCCTGGATGGAATTTGCCCATCAGTTTAAAATGTGCCCCAAAAGACCATAATATGGAACCAGCTGGGGCACTAGGAAACCCAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:414 NH:i:1
-4_83203_83719_0:0:0_0:0:0_1a1 99 4 83203 199 100M = 83620 517 TTTCCCCTGCCTCTTGGGCTCTGCATATGATTTCAGCCAATCCCTGCTGCCCAGCCTCGCCCACCCGCTTCTGGGGCCACCCCCGCGTTAAGCAGGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:415 NH:i:1
+4_83203_83719_0:0:0_0:0:0_1a1 1123 4 83203 199 100M = 83620 517 TTTCCCCTGCCTCTTGGGCTCTGCATATGATTTCAGCCAATCCCTGCTGCCCAGCCTCGCCCACCCGCTTCTGGGGCCACCCCCGCGTTAAGCAGGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:415 NH:i:1
4_83203_83719_0:0:0_0:0:0_1a1 147 4 83620 199 100M = 83203 -517 AAATCCCTGACATGATACAGAAGTCAGTGCTGACAGAGGGCCTGCTACCCCCAGTGACCCTGCCGGTGGGGTCTCAACCCACCCACTTCTTCTGGAGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:415 NH:i:1
4_672312_672777_0:0:0_0:0:0_1a2 83 4 672678 199 100M = 672312 -466 CAGTAAAGCATTCTTGCCTGGTTCCTTACTCAGAGGAAAAGCTTCCAGTTTTTCACCACTGAGTATGTCACCTGTGGGCTTGTGATATATGGCCTTCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:416 NH:i:1
4_672312_672777_0:0:0_0:0:0_1a2 163 4 672312 199 100M = 672678 466 ACTGCAAGCTCCGCCTCTTGGGTTCACGCCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGAATACAGGCGCCTGCCACCATGCCCAGCTAATTTTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:416 NH:i:1
-4_53218_53779_0:0:0_0:0:0_1a3 99 4 53218 199 100M = 53680 562 TTTAAAAGGATGCTCAAAAATGATGTCCAATATATGTTACAAAGTAAAAACAACCCTGGGTGCAGAGCCATGTGTCTGATATGCCTCCATTCCTATAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:417 NH:i:1
+4_53218_53779_0:0:0_0:0:0_1a3 1123 4 53218 199 100M = 53680 562 TTTAAAAGGATGCTCAAAAATGATGTCCAATATATGTTACAAAGTAAAAACAACCCTGGGTGCAGAGCCATGTGTCTGATATGCCTCCATTCCTATAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:417 NH:i:1
4_53218_53779_0:0:0_0:0:0_1a3 147 4 53680 199 100M = 53218 -562 TCTATTTCTTCTTGATTCTTGAGTCAATTTTGGTACCACTTTGTACCCCACAGGGTGCTTGGGCTCAGAACACATCAGTCAAAGAAAGAGAGAAGAAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:417 NH:i:1
-4_256448_256879_0:0:0_0:0:0_1a4 99 4 256448 199 100M = 256780 432 CGGTGAGGGCGTCCAGGGGTGACTGCGGAGTGCGGGATAGAATCCACACCGCCAGGGAGGGGCCCAGGTCCGGTCCCTCAAACGCCCCAGCCAGGACAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:418 NH:i:1
+4_256448_256879_0:0:0_0:0:0_1a4 1123 4 256448 199 100M = 256780 432 CGGTGAGGGCGTCCAGGGGTGACTGCGGAGTGCGGGATAGAATCCACACCGCCAGGGAGGGGCCCAGGTCCGGTCCCTCAAACGCCCCAGCCAGGACAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:418 NH:i:1
4_256448_256879_0:0:0_0:0:0_1a4 147 4 256780 199 100M = 256448 -432 AGCAGTGGACCTGGTCAGCACCTCTGGGGCTGGGACATCAGTGTCCTAAAGGTGGAAGGGTTAGACCCTCTAGGGGAAGGGCCCACCGCCCTCCACAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:418 NH:i:1
4_671544_672056_0:0:0_0:0:0_1a5 83 4 671957 199 100M = 671544 -513 CTGCCTTGGCTCCCAAAGTGCTGGGATTACCGGCGTGGGCCACGGCGCCCGGCCAGTTTTCATTGTACAAGTCTTTCACCTTCTTGGTTAAGTGAATTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:419 NH:i:1
4_671544_672056_0:0:0_0:0:0_1a5 163 4 671544 199 100M = 671957 513 ATGGTGGTGTGTGCCTGTAATCCTACCTACCTGGGAGGCTGAGAGGCAGGAGAATCACTTGAACCCAGGAGGTGGAGGTTGCAGTGATCTGAGATCGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:419 NH:i:1
@@ -649,15 +649,15 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_989476_990019_0:0:0_0:0:0_1a7 163 4 989476 199 100M = 989920 544 TAAAAACAACAAACTCCTGGGATCTCGGGAAAGAGAAACGCATGTTCTGAGATGCTTCAAAAATGATGTTCCGGCTGGGGGCGGTGGCTGACGCCTGTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:421 NH:i:1
4_270694_271191_0:0:0_0:0:0_1a8 83 4 271092 199 100M = 270694 -498 GAGGCCGAGGTGGGTGGATCACGAGGTCAGGAGAGTGAGACCATCCTGGCTAACATGGTGAAACCCTGTCTCTACTTAAAACACAAAAAAATTAGCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:422 NH:i:1
4_270694_271191_0:0:0_0:0:0_1a8 163 4 270694 199 100M = 271092 498 GAGCAGTGGCTCACACCTGTAATCCCAGCACTTTGGAAGGCTGAGGCTGGAGGATCGCTTGAGGGCAGGAGTTTTAAGACCGGCCTGGGCAATACACCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:422 NH:i:1
-4_836506_837009_0:0:0_0:0:0_1a9 99 4 836506 199 100M = 836910 504 AGCTGAGGTCAGGGGCTGGGGCCCAGGACAGGCCTGTGGTGGCGGGTGCTGGAGAGGCTGTGGGGTGCTGGCACAGGAGGGCCCACGGACCAGGAGCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:423 NH:i:1
+4_836506_837009_0:0:0_0:0:0_1a9 1123 4 836506 199 100M = 836910 504 AGCTGAGGTCAGGGGCTGGGGCCCAGGACAGGCCTGTGGTGGCGGGTGCTGGAGAGGCTGTGGGGTGCTGGCACAGGAGGGCCCACGGACCAGGAGCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:423 NH:i:1
4_836506_837009_0:0:0_0:0:0_1a9 147 4 836910 199 100M = 836506 -504 ACCACGGATGGAGGCCTGGGATGGTGGGGCGCAGGCGGAGGGGCGGGGCCCGGGGGCCTCACCTGTGTACTCCCCCAGAATCATCCGAGACATGATCACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:423 NH:i:1
-4_792289_792827_0:0:0_0:0:0_1aa 99 4 792289 199 100M = 792728 539 CCTGGGCCGCGTCGGGAAGGTGGTGAAAGTGTTTGGAGACGGGAACCTGCGTGTAGCAGTCGCTGGTCAGCGGTGGACCTTCAGCCCCTCCTGCCTGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:424 NH:i:1
+4_792289_792827_0:0:0_0:0:0_1aa 1123 4 792289 199 100M = 792728 539 CCTGGGCCGCGTCGGGAAGGTGGTGAAAGTGTTTGGAGACGGGAACCTGCGTGTAGCAGTCGCTGGTCAGCGGTGGACCTTCAGCCCCTCCTGCCTGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:424 NH:i:1
4_792289_792827_0:0:0_0:0:0_1aa 147 4 792728 199 100M = 792289 -539 TGCTGGCACTCTTGGCAGGTGGACACCAAGAACCAAGGCAGGACCGCTCTGCAAGTGGCTGCCTACCTGGGCCAGGTGGAGTTGATACGGCTGCTGCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:424 NH:i:1
-4_100868_101366_0:0:0_0:0:0_1ab 99 4 100868 199 100M = 101267 499 TTCCCGGGCGCCCGGTGGGCCCCTCGACCCCCTTTGAGGCAGGGAGTGAGATAACTGTGATTCCCTGTGGAGGGCGTGAAGGCAGAGCCGGCTGGCTGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:425 NH:i:1
+4_100868_101366_0:0:0_0:0:0_1ab 1123 4 100868 199 100M = 101267 499 TTCCCGGGCGCCCGGTGGGCCCCTCGACCCCCTTTGAGGCAGGGAGTGAGATAACTGTGATTCCCTGTGGAGGGCGTGAAGGCAGAGCCGGCTGGCTGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:425 NH:i:1
4_100868_101366_0:0:0_0:0:0_1ab 147 4 101267 199 100M = 100868 -499 CCGCCACCTCGTTATGCCCGAGCATCAGAGCCGCTGTGAATTCCAGAGAGGCAGCCTGGAGATTGGCCTGCGACCCGCCGGTGAGGAGCACAGGGGGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:425 NH:i:1
-4_904589_905065_0:0:0_0:0:0_1ac 99 4 904589 199 100M = 904966 477 CCTGTTTCTACTAAAAATACAAAAATTAACCAGGTGTGGTGGTATGCACCTATAATCCTGGCTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:426 NH:i:1
+4_904589_905065_0:0:0_0:0:0_1ac 1123 4 904589 199 100M = 904966 477 CCTGTTTCTACTAAAAATACAAAAATTAACCAGGTGTGGTGGTATGCACCTATAATCCTGGCTACTTGGGAGGCTGAGGCAGGAGAATTGCTTGAACCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:426 NH:i:1
4_904589_905065_0:0:0_0:0:0_1ac 147 4 904966 199 100M = 904589 -477 TCACTTGAACAGGGGAGGCAGAGGTTGTGGTAAGCCAAGATTGTGCCATTGCACTCCAGCCTGGGCAACAAGAGCAAAACTCTGTCTCAAAAAAAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:426 NH:i:1
-4_634599_635081_0:0:0_0:0:0_1ad 99 4 634599 199 100M = 634982 483 GGTGGCAAGTGCCTGAAATCCCAGGTACTCAGGAGGCTGAGGCAGGAGAATCACTTGAGCCCGGGAAGTGGAGGTTGCACTGAGCTGAGACCATGGAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:427 NH:i:1
+4_634599_635081_0:0:0_0:0:0_1ad 1123 4 634599 199 100M = 634982 483 GGTGGCAAGTGCCTGAAATCCCAGGTACTCAGGAGGCTGAGGCAGGAGAATCACTTGAGCCCGGGAAGTGGAGGTTGCACTGAGCTGAGACCATGGAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:427 NH:i:1
4_634599_635081_0:0:0_0:0:0_1ad 147 4 634982 199 100M = 634599 -483 GGTTTTTCTCCTCTCTTATTTTTTTGGGTAGAGACAGGGTCCCTTTGTTGCCCAGGCTGGTCTCCAACTCCTGGGCTCAAGCAATCCTCCTGCCTCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:427 NH:i:1
4_979818_980229_0:0:0_0:0:0_1ae 83 4 980130 199 100M = 979818 -412 GGGAGGCCGAGGTGGGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACTAAAAATACAAAAAATAGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:428 NH:i:1
4_979818_980229_0:0:0_0:0:0_1ae 163 4 979818 199 100M = 980130 412 AAAAAAAAAAAAAAAAAAAAAGTCTAAAAAGGACCAGATACCCAGCCCAAAAAAACTTTTCTTCAGACTGCATTAACTAAGTACTTTGTTTTCAAGAGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:428 NH:i:1
@@ -667,31 +667,31 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_459624_460135_0:0:0_0:0:0_1b0 163 4 459624 199 100M = 460036 512 AGGAACTCACAGACGATCAAGGAGCCCTGGGGAGCCGGGGCAACTCAGACGGAGCCACGGCACCGGCCCCTCTCCGCGCCTCTGCACCCTGACCCCGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:430 NH:i:1
4_602_1128_0:0:0_0:0:0_1b1 83 4 1029 199 100M = 602 -527 CAGGCTATGTCCCATGCAGCCTGATGAAGGGTAAAAAAGAGCCCAATGCAGAGTGCTGGGGAAGCATAGAGAGAGCCGGGGCACCTGACCCAGCCTGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:431 NH:i:1
4_602_1128_0:0:0_0:0:0_1b1 163 4 602 199 100M = 1029 527 ATTCATCCATCCCTGCATCTTTCCGTCCACTTCTCTATCTTCTTGTCTCTGCTTCTCTTTGTATTTTGTTCTCACTATTCTCTATTTCATAAGGCTTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:431 NH:i:1
-4_189514_190015_0:0:0_0:0:0_1b2 99 4 189514 199 100M = 189916 502 CAGGGGCCAAGGGGAGCCTCAGGAGGGAAACAGCATGAGGAGTGGGGGAGCCCAGGAGCTTCCGGTGGGACATGTGGAGCTCTGCTGTGGAAGTAGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:432 NH:i:1
+4_189514_190015_0:0:0_0:0:0_1b2 1123 4 189514 199 100M = 189916 502 CAGGGGCCAAGGGGAGCCTCAGGAGGGAAACAGCATGAGGAGTGGGGGAGCCCAGGAGCTTCCGGTGGGACATGTGGAGCTCTGCTGTGGAAGTAGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:432 NH:i:1
4_189514_190015_0:0:0_0:0:0_1b2 147 4 189916 199 100M = 189514 -502 CCCACCTCCCAGGGCACAGGATGGCGTCTGGGCAGCCCCTAGTCTTCAGAAGCTACCCCTGAGACCACAGGGTCCCACCTTGTATGGGAACCCCATCCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:432 NH:i:1
-4_670916_671386_0:0:0_0:0:0_1b3 99 4 670916 199 100M = 671287 471 ACCCTCCACAAGAGGTGGAGGAGTAGAGTCTTCTCTAAACTCCCCCGGGGAAAGGGAGACTCCCCTTCCCAGTCTGCTAAGTAGCGGGTGTTTTCCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:433 NH:i:1
+4_670916_671386_0:0:0_0:0:0_1b3 1123 4 670916 199 100M = 671287 471 ACCCTCCACAAGAGGTGGAGGAGTAGAGTCTTCTCTAAACTCCCCCGGGGAAAGGGAGACTCCCCTTCCCAGTCTGCTAAGTAGCGGGTGTTTTCCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:433 NH:i:1
4_670916_671386_0:0:0_0:0:0_1b3 147 4 671287 199 100M = 670916 -471 ATATAATCATATCTAAGATCTATATCTGGTATAACTATTCCTATTTTATATATTTTATTACACTGGAACAGCTCGTGCCCTCGGTCTCTTGCCTCAGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:433 NH:i:1
4_588012_588572_0:0:0_0:0:0_1b4 83 4 588473 199 100M = 588012 -561 CACGCCCACCAGCACCAGGACCATTTACAGTTGCCATGGCAATGCCGGGAAGTTACCCTAGATGGTCTAAAAGGGGGAAGAACCCTCACTTCCGGGAATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:434 NH:i:1
4_588012_588572_0:0:0_0:0:0_1b4 163 4 588012 199 100M = 588473 561 GAAACTACCTCACGCCAGCTTCCCAGGGGTACAGCGGCTAGAATGGACCTCGAGGAGGCCCCTCCCAAGACTCCCGTCTCCCGGGTTCCACCGCAGTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:434 NH:i:1
4_38871_39370_0:0:0_0:0:0_1b5 83 4 39271 199 100M = 38871 -500 CTTTGCCCCACTGACCAGCCAGGGCAGCCAGCAGTACCGGGCCTTGACTGTGGCTGAGCTCACCCAGCAGATGTTTGATGCTAAGAACATGATGGCTGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:435 NH:i:1
4_38871_39370_0:0:0_0:0:0_1b5 163 4 38871 199 100M = 39271 500 CCCACTCCCTGGGTTGGGGGACTGGGTCTGGGATGGGTACCCTTCTCATTAGTAAGATCCGGGAGGAGTACCCAGACAGGATCATAAACACATTCAGCGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:435 NH:i:1
-4_476226_476704_0:0:0_0:0:0_1b6 99 4 476226 199 100M = 476605 479 TCTGGGTCACAGGTACGGAGGATGACGGCTGTGCTGGTGGGTCACGGGCGGCTCTGGGTCACAGGTACGGAGGATGACGGCTGTGCTGGTGGGTCACGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:436 NH:i:1
+4_476226_476704_0:0:0_0:0:0_1b6 1123 4 476226 199 100M = 476605 479 TCTGGGTCACAGGTACGGAGGATGACGGCTGTGCTGGTGGGTCACGGGCGGCTCTGGGTCACAGGTACGGAGGATGACGGCTGTGCTGGTGGGTCACGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:436 NH:i:1
4_476226_476704_0:0:0_0:0:0_1b6 147 4 476605 199 100M = 476226 -479 CTGACGGCAGCAACTGGGGGTGAAGCAGCCCCCCGCCCAGGGCTGCAGTCCCTCAGGCCAGCCCGTAGCCCGTCCTGGGCTGGCCCACTCCCTGGTCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:436 NH:i:1
4_167337_167791_0:0:0_0:0:0_1b7 83 4 167692 199 100M = 167337 -455 CGAACTCCTGGCCCGAGACGATGCTCCTGCCTCGGCCTCCCAACGTGCTGGGACTACAGGCGTGAGTCTCCTCCCTGCCAAGCGTGGCTGGGACTGATCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:437 NH:i:1
4_167337_167791_0:0:0_0:0:0_1b7 163 4 167337 199 100M = 167692 455 CCTCGGCGCTGGAGAGGCTCGGACCGGCCCCGTAGCCCAGGAGGTGGTGTGGATGGTGTCCCCGTCCCGCGTCCCTCCCCGGCCTGAGCGCTGGGCCCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:437 NH:i:1
-4_731886_732347_0:0:0_0:0:0_1b8 99 4 731886 199 100M = 732248 462 ACACTTCCATTAACACTAAACTCCTCTTACAGGCCTGTCATTCTTGTTCAGAAAAGTGCTGCTGCTTCGCTCTTGAGCAAGAATGACACACAGACACGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:438 NH:i:1
+4_731886_732347_0:0:0_0:0:0_1b8 1123 4 731886 199 100M = 732248 462 ACACTTCCATTAACACTAAACTCCTCTTACAGGCCTGTCATTCTTGTTCAGAAAAGTGCTGCTGCTTCGCTCTTGAGCAAGAATGACACACAGACACGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:438 NH:i:1
4_731886_732347_0:0:0_0:0:0_1b8 147 4 732248 199 100M = 731886 -462 GAGCTGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAAGAGCGAGACCCTGTCTCAAAAAAAAAAAAAAATTACTGAATGTTGAATGAGTAACAGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:438 NH:i:1
-4_386740_387306_0:0:0_0:0:0_1b9 99 4 386740 199 100M = 387207 567 GCGCCTGCAACCCCAGGACGGGGGGAGGCCGAGCGGGTCACTTGAACCCTCTGAAATCCAAGAGTTTGAGACCTACCTAGGCAACATGGGGAGACCTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:439 NH:i:1
+4_386740_387306_0:0:0_0:0:0_1b9 1123 4 386740 199 100M = 387207 567 GCGCCTGCAACCCCAGGACGGGGGGAGGCCGAGCGGGTCACTTGAACCCTCTGAAATCCAAGAGTTTGAGACCTACCTAGGCAACATGGGGAGACCTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:439 NH:i:1
4_386740_387306_0:0:0_0:0:0_1b9 147 4 387207 199 100M = 386740 -567 TTGAGACCAACCCGGCCAACACGGCAGAATCTATCCGTACAAAAAATACAAAAATTAGCCAGGCGCGGTGATGGTGCCTGTGGTCCCAGCTTCTGAGGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:439 NH:i:1
-4_207799_208392_0:0:0_0:0:0_1ba 99 4 207799 199 100M = 208293 594 TGAGGCAGAGGCGGGGCCTTGTCCAGGCTCCCTGGTCCCTGACCTCAGCCACGCCCTCCCTGGGAGTCCTCTGGCCTGTCCGCATGTCTCCACGCTGTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:440 NH:i:1
+4_207799_208392_0:0:0_0:0:0_1ba 1123 4 207799 199 100M = 208293 594 TGAGGCAGAGGCGGGGCCTTGTCCAGGCTCCCTGGTCCCTGACCTCAGCCACGCCCTCCCTGGGAGTCCTCTGGCCTGTCCGCATGTCTCCACGCTGTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:440 NH:i:1
4_207799_208392_0:0:0_0:0:0_1ba 147 4 208293 199 100M = 207799 -594 CTGGGTGTGACTCTGGGGGTGCAGGCTCCTCCCACCCACAGAGAGCCCCCCCACATGCATGGGTGTCCTGGGGATGCTGGTGGTCAGGGGTCAGTGGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:440 NH:i:1
-4_749643_750134_0:0:0_0:0:0_1bb 99 4 749643 199 100M = 750035 492 AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGCCAATTAATTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTGGCCCAGGCTGGAGTGCAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:7 SD:i:0 SN:i:441 NH:i:1
+4_749643_750134_0:0:0_0:0:0_1bb 1123 4 749643 199 100M = 750035 492 AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGCCAATTAATTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTGTGGCCCAGGCTGGAGTGCAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:7 SD:i:0 SN:i:441 NH:i:1
4_749643_750134_0:0:0_0:0:0_1bb 147 4 750035 199 100M = 749643 -492 GTCGACCAGGCTGGAGTGCAGTGGCATGATCTTGGCTCAGTGCAACCTCTGCGTCCCGGGTTCAAGCAATTTTCTTGCCTCAGCCTCCTGAGTAGTTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:441 NH:i:1
-4_497211_497721_0:0:0_0:0:0_1bc 99 4 497211 199 100M = 497622 511 GCCTGTGGTGGCCATGAAGCCCAGCCTCATGTTCCTGGCCAAGGCAGGCAGCCGCGACATCGCCGCCTACTGCAACTACACGCAGTACCAGCCCCGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:442 NH:i:1
+4_497211_497721_0:0:0_0:0:0_1bc 1123 4 497211 199 100M = 497622 511 GCCTGTGGTGGCCATGAAGCCCAGCCTCATGTTCCTGGCCAAGGCAGGCAGCCGCGACATCGCCGCCTACTGCAACTACACGCAGTACCAGCCCCGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:442 NH:i:1
4_497211_497721_0:0:0_0:0:0_1bc 147 4 497622 199 100M = 497211 -511 GCGACGACGAGTACGGCCGGCAGGGCCTGAGCATCTTCTCGGCCCTGGCCGCGGCACGCGGCATCTGCATCGCGCACGAGGGCCTGGTGCCGCTGCCCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:442 NH:i:1
4_192656_193180_0:0:0_0:0:0_1bd 83 4 193081 199 100M = 192656 -525 TCCCTTGGGGCGGCAGTGAGGACAGGAGAGGGTCCTGGGTCACAGTCCCTCACATGCCTGCGGGACCTCGAGACTGACACAGATGCCCACCCCTGCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:443 NH:i:1
4_192656_193180_0:0:0_0:0:0_1bd 163 4 192656 199 100M = 193081 525 TTTCCAGCTGTGTGGCCTGCGTGTGTGCATGTGTTTGTGTGTGTGTGTACACGCATGTGTGTGCACGTGCCTGCAGCCTCAGATGTACCAGCCTGTGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:443 NH:i:1
-4_34920_35446_0:0:0_0:0:0_1be 99 4 34920 199 100M = 35347 527 CATCTATCTATCTACCTGCCTATCATCTATCTATCTATTTACTATCTATCTTTTCTACCTTTCGCCATCAAGAGCTTCAACACCTGGACACATTCCGCAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:444 NH:i:1
+4_34920_35446_0:0:0_0:0:0_1be 1123 4 34920 199 100M = 35347 527 CATCTATCTATCTACCTGCCTATCATCTATCTATCTATTTACTATCTATCTTTTCTACCTTTCGCCATCAAGAGCTTCAACACCTGGACACATTCCGCAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:444 NH:i:1
4_34920_35446_0:0:0_0:0:0_1be 147 4 35347 199 100M = 34920 -527 CTCCCGCGGCTCCGGAGCCGGCTGCCACCAGGGGGCGCGCCCGCGGTGTCCGGGAGCCTGGCGGCGCCTGTGCAGCGGCCAGTGCACCTGCTCCTGCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:444 NH:i:1
4_41793_42237_0:0:0_0:0:0_1bf 83 4 42138 199 100M = 41793 -445 AGTTCCCACTTGAGTCTGAGGGGGGATCAGGCTTAGTGCCCATGTATTTCCCAATTACCTGGTTCCATCTGGGGGCTTCATGGACAGGAGTGGTGCTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:445 NH:i:1
4_41793_42237_0:0:0_0:0:0_1bf 163 4 41793 199 100M = 42138 445 CACGCTCCCCCATACTCGCCCACGCTCCCCCATACTCGCCCACGCTCCCCCACGCTCCCCCATACTCCCCCACACTCCCCCATACTCCCCCAAACTGTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:445 NH:i:1
@@ -699,13 +699,13 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_475999_476579_0:0:0_0:0:0_1c0 163 4 475999 199 100M = 476480 581 TGTCACTCTCCCGTCCAAGGCCACATAGTAGGCTGAGGATGGCAAACACTGCCCCACAGACACCTACAGGTACGTATTTTTCCAGGAAGCTGCGGTTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:446 NH:i:1
4_582058_582507_0:0:0_0:0:0_1c1 83 4 582408 199 100M = 582058 -450 AGGAGAATGGCGTGAACCCGGGAGGCGGAGCTTGCAGTGAGCCGAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGTGAGACTCCGTCTCAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:447 NH:i:1
4_582058_582507_0:0:0_0:0:0_1c1 163 4 582058 199 100M = 582408 450 TTTTAAAACTTGCTTAAACTCTTAAAAACAATAATTTTTTTAACCTTTTAATGTAGGTAAAAATCCATATTTTTATGCCCCTTTATAATTTTTTTTACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:447 NH:i:1
-4_440207_440743_0:0:0_0:0:0_1c2 99 4 440207 199 100M = 440644 537 CTCTGGCCCTCTGGGACACCTTGGTGGGTCTGAGACGTTGCTGCAGGCCGTGGGGCAGAAGCCAGGAGGAGTGTCCACGCAGCCTCAGCCCGTGGGAATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:448 NH:i:1
+4_440207_440743_0:0:0_0:0:0_1c2 1123 4 440207 199 100M = 440644 537 CTCTGGCCCTCTGGGACACCTTGGTGGGTCTGAGACGTTGCTGCAGGCCGTGGGGCAGAAGCCAGGAGGAGTGTCCACGCAGCCTCAGCCCGTGGGAATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:448 NH:i:1
4_440207_440743_0:0:0_0:0:0_1c2 147 4 440644 199 100M = 440207 -537 GAGCATGGCTGAGTCACACCCCAGGGAGAAGGTGCCCCGGACAGGTGTGGCCAGCACGTCTTGCCAAGACCCCACCACCCCACCCACCCGCATCTGTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:448 NH:i:1
-4_684003_684541_0:0:0_0:0:0_1c3 99 4 684003 199 100M = 684442 539 CGCCTTCCCGTCACAGGGACACACACACACACCCGCACATGGGCACACACCCACACACGGGCGTACACACCCCCCCCCACAGGCACGCACAACCCCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:449 NH:i:1
+4_684003_684541_0:0:0_0:0:0_1c3 1123 4 684003 199 100M = 684442 539 CGCCTTCCCGTCACAGGGACACACACACACACCCGCACATGGGCACACACCCACACACGGGCGTACACACCCCCCCCCACAGGCACGCACAACCCCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:449 NH:i:1
4_684003_684541_0:0:0_0:0:0_1c3 147 4 684442 199 100M = 684003 -539 TAGGCTGTCTGCTCTCCTGGCTGGGGCGGAGGTGGCGGGGGCTGCTTGTGGACCCGGCGTGCACTCTGAGCCTGAGTTCTGCCGCCCGGCCCCTCATAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:449 NH:i:1
4_389650_390202_0:0:0_0:0:0_1c4 83 4 390103 199 100M = 389650 -553 ATAATCCCAGCCCTTTGGGAGGCTAAGACAGGTGGATCACTTAAGCCCAGAAGTTCAAGACCAGCTTGGGCAACATAGGGAGACCCCACCTCTACAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:450 NH:i:1
4_389650_390202_0:0:0_0:0:0_1c4 163 4 389650 199 100M = 390103 553 ATGGACCCTCCCCCTCTGTCACACAGAATCACCACCAGACCCCACTCTCCAACCACAGCTTCCAAAAGTCACACTCTTCACGTTTTATTTTATTTTTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:450 NH:i:1
-4_12073_12634_0:0:0_0:0:0_1c5 99 4 12073 199 100M = 12535 562 AAATACACCAAAGCAATGAGGCCGGGCATGCATACAGCCAAAGCCAAATGATATTTGTGTGAGTCAGGTAAATTTGAGATCTGAAAAATGATCACTCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:451 NH:i:1
+4_12073_12634_0:0:0_0:0:0_1c5 1123 4 12073 199 100M = 12535 562 AAATACACCAAAGCAATGAGGCCGGGCATGCATACAGCCAAAGCCAAATGATATTTGTGTGAGTCAGGTAAATTTGAGATCTGAAAAATGATCACTCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:451 NH:i:1
4_12073_12634_0:0:0_0:0:0_1c5 147 4 12535 199 100M = 12073 -562 ATACAGAGAGCCATGTTTCCCATAATTAGTTGGCCCCAGAAACAGTTTCTTCTGGGAACATTTTGTAAGATTAGTGCTCCTTGAAACACACTGAGGGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:451 NH:i:1
4_323381_323870_0:0:0_0:0:0_1c6 83 4 323771 199 100M = 323381 -490 CTGCCCCGCCCAGCGAGACCTCGGGCCCAGCCAGGGCTGGGGGCTGGGGGCTGGGTACCCTCTGGCCGGCTCTGCCACCAGCCGCCAGCCTGGTCCCCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:452 NH:i:1
4_323381_323870_0:0:0_0:0:0_1c6 163 4 323381 199 100M = 323771 490 GGCCTGGCACCCTGGGTGGGGCACCGTCCCCAGGGAGGGTTGGGCAGGCGCCCCCAGCCTCCAGCCATCCCGCCCGCTGTGGGCGCCCCGGTGGCCGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:452 NH:i:1
@@ -719,17 +719,17 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_119679_120095_0:0:0_0:0:0_1ca 163 4 119679 199 100M = 119996 417 CTTACAGCTGGACAGACACAGGGGCTTAGGAGGACCCTCTCAAGTGCATTAGCTCCTCCTTCAGTGGCCTTGAACTCCTGTCTGCCCTGCCCAGGGTTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:456 NH:i:1
4_998877_999294_0:0:0_0:0:0_1cb 83 4 999195 199 100M = 998877 -418 AGTAGCTGGACTACAGGGGCGTGCCACCACACGTGGCTAAATCTTTTCCTTTGTATTTTCGTAGAGACGGGGTTTCACCATGTTGGCCAGGATGGTCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:457 NH:i:1
4_998877_999294_0:0:0_0:0:0_1cb 163 4 998877 199 100M = 999195 418 CCTTGGCCTCCTGAGTAGCTGGGACTTCAGGTGCCCATCACCACGCCTGGCTAATTTTTTGTATTTTCAGTAGAGACGGGGTTTCATCGTGTTAGCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:457 NH:i:1
-4_201094_201615_0:0:0_0:0:0_1cc 99 4 201094 199 100M = 201516 522 CGGTCAGGGGGCTCCGCTCTATTGTCCTCCCTTCTCCCTGTTCAGCAAGGGGTGGGGCTGGACAGAGGCTGGAAGTGGACAGGAGCTTGCAGGGAAGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:458 NH:i:1
+4_201094_201615_0:0:0_0:0:0_1cc 1123 4 201094 199 100M = 201516 522 CGGTCAGGGGGCTCCGCTCTATTGTCCTCCCTTCTCCCTGTTCAGCAAGGGGTGGGGCTGGACAGAGGCTGGAAGTGGACAGGAGCTTGCAGGGAAGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:458 NH:i:1
4_201094_201615_0:0:0_0:0:0_1cc 147 4 201516 199 100M = 201094 -522 CCTGCCCCATCACTGGGTCTCACTTGCGTTCAGTTGTGTGTGCAGGGCTGTCTGTGAGGGCTGTGCTGAGGCCTTCCTGACCAGCACATGGGGTGGGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:458 NH:i:1
-4_948530_948894_0:0:0_0:0:0_1cd 99 4 948530 199 100M = 948795 365 GAATGGTCTTCCAGTCTCCATGGCATCCACATGCTGTTTTAAACAGAGTTTAAAGAAATGTGAAAAGAGGCAGAGAATCTAAGTGCAGACGCACAGCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:459 NH:i:1
+4_948530_948894_0:0:0_0:0:0_1cd 1123 4 948530 199 100M = 948795 365 GAATGGTCTTCCAGTCTCCATGGCATCCACATGCTGTTTTAAACAGAGTTTAAAGAAATGTGAAAAGAGGCAGAGAATCTAAGTGCAGACGCACAGCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:459 NH:i:1
4_948530_948894_0:0:0_0:0:0_1cd 147 4 948795 199 100M = 948530 -365 TCTCCTCAGAGAAGGGCATTTGGGCTGCTGCATTACCTACTGGCGTTAGTTCCAGATCTTGAGGAAGCTATCCCAGGACCCTGTCGCCACAGCCATGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:459 NH:i:1
-4_562155_562640_0:0:0_0:0:0_1ce 99 4 562155 199 100M = 562541 486 GAATAGCTGGGATTACAGGCACCCGCAACCACACCCGGCTAATTTTTTTTTTATTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:460 NH:i:1
+4_562155_562640_0:0:0_0:0:0_1ce 1123 4 562155 199 100M = 562541 486 GAATAGCTGGGATTACAGGCACCCGCAACCACACCCGGCTAATTTTTTTTTTATTTTTTTTTGAGACGGAGTCTTGCTCTGTCGCCCAGGCTGGAGTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:460 NH:i:1
4_562155_562640_0:0:0_0:0:0_1ce 147 4 562541 199 100M = 562155 -486 AGGCTGGTCTCGAACTCCTGACCTTAGGTGATCCACCTGCCTAGGCCTCCCAACGTGCTGGGATTACAGGCCAGCCCGGCCAGAGGCTGTGTTTTCTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:460 NH:i:1
-4_446561_446984_0:0:0_0:0:0_1cf 99 4 446561 199 100M = 446885 424 GGCTGACTCAGAGTGACCCTGAGTGGGGGACGCTCACCCCAGGGGAGGAAACGGGGCTCTGGGATAGGAGGTCCTGGCTGGGGTCCCACAGATGCCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:461 NH:i:1
+4_446561_446984_0:0:0_0:0:0_1cf 1123 4 446561 199 100M = 446885 424 GGCTGACTCAGAGTGACCCTGAGTGGGGGACGCTCACCCCAGGGGAGGAAACGGGGCTCTGGGATAGGAGGTCCTGGCTGGGGTCCCACAGATGCCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:461 NH:i:1
4_446561_446984_0:0:0_0:0:0_1cf 147 4 446885 199 100M = 446561 -424 CCACAGGACCCCCACATGCCGGGAGCTGGGTTCGCCCCACCCCACCCCCTGCACCGGGCCAGCGAGGGGATGGCCCAGAAGAGGGGGAGGACCATGTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:461 NH:i:1
-4_669515_670047_0:0:0_0:0:0_1d0 99 4 669515 199 100M = 669948 533 GCAATCTCTGCCTTCCAGGTTCAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCACCGACCACCACACCCGGCTAATTTTTGGATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:462 NH:i:1
+4_669515_670047_0:0:0_0:0:0_1d0 1123 4 669515 199 100M = 669948 533 GCAATCTCTGCCTTCCAGGTTCAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCACCGACCACCACACCCGGCTAATTTTTGGATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:462 NH:i:1
4_669515_670047_0:0:0_0:0:0_1d0 147 4 669948 199 100M = 669515 -533 AAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAATCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGAGGTTGCGGTGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:462 NH:i:1
-4_225454_225945_0:0:0_0:0:0_1d1 99 4 225454 199 100M = 225846 492 GTGGGGTCAGAGTCGGGCAGAACGGAACCAACAGTCCTGGCAGGCCCAGGCGGTTGGAGGAGAAGTGGTCGGCTCTGGACACCTTTGTTGTTTTTTGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:463 NH:i:1
+4_225454_225945_0:0:0_0:0:0_1d1 1123 4 225454 199 100M = 225846 492 GTGGGGTCAGAGTCGGGCAGAACGGAACCAACAGTCCTGGCAGGCCCAGGCGGTTGGAGGAGAAGTGGTCGGCTCTGGACACCTTTGTTGTTTTTTGTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:463 NH:i:1
4_225454_225945_0:0:0_0:0:0_1d1 147 4 225846 199 100M = 225454 -492 AGGATGAGAGGAACTCATGGATCGGAGCCGGCAGGGAGCGCCGAGGGGGGCCGGGGGCAGGGCGGCCACCCTTGCCTGTGTGGGGAAGATGAAGAAGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:463 NH:i:1
4_434236_434797_0:0:0_0:0:0_1d2 83 4 434698 199 100M = 434236 -562 TGGTGATCATCATGCAAGAGAGGAGGCTGGATCAGGGACCCCAAAGCCCCTCAGCCACACCCGAGACTGGGGAGAGGGGTGGTGATCATCATGCAAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:464 NH:i:1
4_434236_434797_0:0:0_0:0:0_1d2 163 4 434236 199 100M = 434698 562 TCCCACCTCAGCCTCCTGAGTAGCTGAGACTACAGACGCATGCCACCACGCCTAGCTAATTTTTTTTTTTTTGTAGAGCTGGGATCTCACTATGTTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:464 NH:i:1
@@ -739,43 +739,43 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_387285_387822_0:0:0_0:0:0_1d4 163 4 387285 199 100M = 387723 538 TGTGGTCCCAGCTTCTGAGGTAGGAGGATCGCCTGAGCCCTGGAAGGTACAGCTGCAGGGAGCCGAGACTGCACCACTGCACTGCAGGCTGGGTAGCAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:466 NH:i:1
4_364184_364670_0:0:0_0:0:0_1d5 83 4 364571 199 100M = 364184 -487 AGAGCCAGTGAGGAACCACCCCAGGCCAGGGGCAGCCCCCACCAACATGGTCCTTTCGCTGGGGCCTGGAGTGGGGGTTCCCAGGTGAGGGGCTGGGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:467 NH:i:1
4_364184_364670_0:0:0_0:0:0_1d5 163 4 364184 199 100M = 364571 487 TCCCAATGGCTGGCTCAAGGTGTCTGTGCCCCCCAAGTCGAGACCTGAGGATCGGGGGGCCCCGCCCCCAGGCCCACCCCCCACATGCCCTGCCTCCGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:467 NH:i:1
-4_306265_306713_0:0:0_0:0:0_1d6 99 4 306265 199 100M = 306614 449 ACTGCTCCAGAACCCCTGGCAGCCACAGAGCTCAGGCTTCAACGCTGGCCCGACACCAGGGCCAGGCCAGAAATCAGCCTGGGAGAAGCTGGGTCCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:468 NH:i:1
+4_306265_306713_0:0:0_0:0:0_1d6 1123 4 306265 199 100M = 306614 449 ACTGCTCCAGAACCCCTGGCAGCCACAGAGCTCAGGCTTCAACGCTGGCCCGACACCAGGGCCAGGCCAGAAATCAGCCTGGGAGAAGCTGGGTCCGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:468 NH:i:1
4_306265_306713_0:0:0_0:0:0_1d6 147 4 306614 199 100M = 306265 -449 CTGCCCCCATCCCCAACGCCTTCCTCACACCATCCCCCGATCCCCATCCCTGTCCCCACCCCCGCCCCACCCCCATTCCCCCATCCCCACCCCCGTCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:468 NH:i:1
4_392464_392959_0:0:0_0:0:0_1d7 83 4 392860 199 100M = 392464 -496 CGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGTGAGCTCGTGGCCAGGCCCTGCGGCGGGGGCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:469 NH:i:1
4_392464_392959_0:0:0_0:0:0_1d7 163 4 392464 199 100M = 392860 496 CCATCTGAGGAGAGGTGCAAGGGGACACCACACCTGGTGCCCAGGTCCTCCAGCTGGCACCGTGCCTCCGTCCCACCAGCTCACCTGTGCTCCGCCCGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:469 NH:i:1
4_797407_797965_0:0:0_0:0:0_1d8 83 4 797866 199 100M = 797407 -559 CTCACGTCTGTGGGTCTGGTCTATCGGGGGGCCCGCAGGACGTGTGCGGGGTGTGCGGGGTGTTTGAGGGTCCGGATGTGCGTCCCGAGGTCGGAGGGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:470 NH:i:1
4_797407_797965_0:0:0_0:0:0_1d8 163 4 797407 199 100M = 797866 559 TGCCCCTGGCCCACGGGACGCGGGTCCCCATGCCCCCGGCTGGAGACAGCAGGCAGCTCGGCCAACGGACGGGCCAGCTGGTTCCCTTCAGGGGCAGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:470 NH:i:1
-4_325884_326397_0:0:0_0:0:0_1d9 99 4 325884 199 100M = 326298 514 CTGTGAGGCGTTCCCTGGCATGACCTCACCTGTGTGCTCTCTCTCCCTGTCTCAGTCAGAGCTGAACCCAGGCTCCTGTGGACCAAGAGGGGGTCCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:471 NH:i:1
+4_325884_326397_0:0:0_0:0:0_1d9 1123 4 325884 199 100M = 326298 514 CTGTGAGGCGTTCCCTGGCATGACCTCACCTGTGTGCTCTCTCTCCCTGTCTCAGTCAGAGCTGAACCCAGGCTCCTGTGGACCAAGAGGGGGTCCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:471 NH:i:1
4_325884_326397_0:0:0_0:0:0_1d9 147 4 326298 199 100M = 325884 -514 GCTGCAGCCCATGGACTCATGAGGGGGGCTTCTGGGGTCTAAGGCCAGAAGTGACCTTTCTTCTCACGGAGGCACCCCCACATCACAGGCCCCAAGCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:471 NH:i:1
4_487026_487538_0:0:0_0:0:0_1da 83 4 487439 199 100M = 487026 -513 CACAGAGAGGGGATCTCAGGTTCTCTTCCAGGCCTGTTCCTGCCCCCGCTGGGTGTGAGCTCTCTGAGAGCAGGGTCTCAGCTGTCTCCTCCCTCTGCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:472 NH:i:1
4_487026_487538_0:0:0_0:0:0_1da 163 4 487026 199 100M = 487439 513 TGCGGCTGTGGAGGGGCGCCACACAGCCTCAGCCATGCCATAGCGAACACGGTGCCAGTGGTCCAGCAGTGGCCAGCGTGGCAACACGAGCCTCACACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:472 NH:i:1
-4_6699_7150_0:0:0_0:0:0_1db 99 4 6699 199 100M = 7051 452 CACAGGGTTAAACCTCGGTACAGAAGCGAGACCCTGCTGGGCAGAGAAAGGGTGACCTGGAAGGTGTGTGAGGGCCCCGCCCATGGGCCCCTGTGATGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:473 NH:i:1
+4_6699_7150_0:0:0_0:0:0_1db 1123 4 6699 199 100M = 7051 452 CACAGGGTTAAACCTCGGTACAGAAGCGAGACCCTGCTGGGCAGAGAAAGGGTGACCTGGAAGGTGTGTGAGGGCCCCGCCCATGGGCCCCTGTGATGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:473 NH:i:1
4_6699_7150_0:0:0_0:0:0_1db 147 4 7051 199 100M = 6699 -452 CTGAATCAAAATCTTCATTGTGGTAAGACCCTCGGTGACACACAGGCCTGGGAAGCACAGGTTAGCGTGTCACCTTGGGCAAAGCTCTCAGCATTGTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:473 NH:i:1
-4_429853_430284_0:0:0_0:0:0_1dc 99 4 429853 199 100M = 430185 432 CAACTGCCAACACCGGCCAACACCGGCCATCATCTTTCCCCTTGCTCTGCCCCAGGACATGTGTGCCATCACTTTAAAACTAATGGGGGCTGGGCTCGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:474 NH:i:1
+4_429853_430284_0:0:0_0:0:0_1dc 1123 4 429853 199 100M = 430185 432 CAACTGCCAACACCGGCCAACACCGGCCATCATCTTTCCCCTTGCTCTGCCCCAGGACATGTGTGCCATCACTTTAAAACTAATGGGGGCTGGGCTCGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:474 NH:i:1
4_429853_430284_0:0:0_0:0:0_1dc 147 4 430185 199 100M = 429853 -432 CTGTCTCTATTAAAAATACAAAAATTAGCCAGCCATGGTGGCACACACCTGTAATCCCAGCTACTCAGGAGGCTGAGCCAGGAGAATCGCTTGAACCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:474 NH:i:1
4_167258_167764_0:0:0_0:0:0_1dd 83 4 167665 199 100M = 167258 -507 ATCTCCCTATGTTGCCCAGGCTGGTCTCGAACTCCTGGCCCGAGACGATGCTCCTGCCTCGGCCTCCCAACGTGCTGGGACTACAGGCGTGAGTCTCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:475 NH:i:1
4_167258_167764_0:0:0_0:0:0_1dd 163 4 167258 199 100M = 167665 507 GGGGGCTGGGCGGGCGTCTCGGAGGTGGCCCCGCGAGCACTTAAGCCCCGGCTCTCCTGCCCCGACCTCTCTGCGCGCGCCTCGGCGCTGGAGAGGCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:475 NH:i:1
4_340654_341155_0:0:0_0:0:0_1de 83 4 341056 199 100M = 340654 -502 CTCCCGTCACCCCCACAGTCACCATCACCACAGCCCTGCAGACTGCGCAGTGTGTGGCGGGTGCTGTCCTGACATTACATGTGGAAACTCCATCTTTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:476 NH:i:1
4_340654_341155_0:0:0_0:0:0_1de 163 4 340654 199 100M = 341056 502 CATAAAGTTAGAAAAGCCCAAAGGAGGCCTCAGGCTTACGATGAAATCAGCACCTGGCTGCCTCTTCCAGCTGCTGCTTCTTCTGCAGCAGAAGAGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:476 NH:i:1
-4_90353_90927_0:0:0_0:0:0_1df 99 4 90353 199 100M = 90828 575 GGCCGCGAGACGGCCAAGGACTGGAAGCGCAGCATCCGCCACAAAGGTGCCGCCGCCCCTCCCTTCGCTGCCGGGACCCGCGGGCCCCGACCCCACCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:477 NH:i:1
+4_90353_90927_0:0:0_0:0:0_1df 1123 4 90353 199 100M = 90828 575 GGCCGCGAGACGGCCAAGGACTGGAAGCGCAGCATCCGCCACAAAGGTGCCGCCGCCCCTCCCTTCGCTGCCGGGACCCGCGGGCCCCGACCCCACCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:477 NH:i:1
4_90353_90927_0:0:0_0:0:0_1df 147 4 90828 199 100M = 90353 -575 GAGCGGCGGCGCCAGGTCACACAACCTGTTTTGGCGCCTGCGGGCGCCTGGGCCCAAGGGTGCGACGCGGGGGCGCCTGAGCCGGGACACAGGGGGTGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:477 NH:i:1
4_906394_906820_0:0:0_0:0:0_1e0 83 4 906721 199 100M = 906394 -427 GAAAATGCGTGAGTGGAGTGGAAAGCCTTCCTACTCCTGCCTCAGCGACCCCTTCTAAAATACTGCCTCGTTTGGCCTGAAAATGTGATTTGCAGGCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:478 NH:i:1
4_906394_906820_0:0:0_0:0:0_1e0 163 4 906394 199 100M = 906721 427 TCAGGCAAAATGTGTCCCAAATCTTCAAACCACGCCCCAGAACTCAGACCTCCCCCTGGGAGTTCGTCCCAAGGAAACCACCTGCAAGAGGCTCAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:478 NH:i:1
-4_788895_789378_0:0:0_0:0:0_1e1 99 4 788895 199 100M = 789279 484 CGCCCAACATGGACCCAGACCCCCAGGCGGGCGTGCAGGTGGGCATGCGGGTGGTGCGCGGCGTGGACTGGAAGTGGGGCCAGCAGGACGGCGGCGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:479 NH:i:1
+4_788895_789378_0:0:0_0:0:0_1e1 1123 4 788895 199 100M = 789279 484 CGCCCAACATGGACCCAGACCCCCAGGCGGGCGTGCAGGTGGGCATGCGGGTGGTGCGCGGCGTGGACTGGAAGTGGGGCCAGCAGGACGGCGGCGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:479 NH:i:1
4_788895_789378_0:0:0_0:0:0_1e1 147 4 789279 199 100M = 788895 -484 GATGCGCTGGAAGTGCCGTGTGTGCCTGGACTACGACCTCTGCACGCAGTGCTACATGCACAACAAGCATGAGCTCGCCCACGCCTTCGACCGCTACGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:479 NH:i:1
-4_521157_521650_0:0:0_0:0:0_1e2 99 4 521157 199 100M = 521551 494 GACTCGGACACCACGGAGCTGCCAGCAGCGGCGGGTACCGAGGACCCTGGTGGGGGAGGGGAGTCGGTGGGGGAAGGGGTGGGCCCCCTAGGGCCCTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:480 NH:i:1
+4_521157_521650_0:0:0_0:0:0_1e2 1123 4 521157 199 100M = 521551 494 GACTCGGACACCACGGAGCTGCCAGCAGCGGCGGGTACCGAGGACCCTGGTGGGGGAGGGGAGTCGGTGGGGGAAGGGGTGGGCCCCCTAGGGCCCTGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:480 NH:i:1
4_521157_521650_0:0:0_0:0:0_1e2 147 4 521551 199 100M = 521157 -494 GGGGGTGTGGGCCCCTGAGGCCCCTCGTGGTGGGGGAGATTCCATGGGGGAGGGGCATGCCCCCTAGGGCCCATGTGGGGGAAGGGGAATCATTGGGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:480 NH:i:1
4_288648_289185_0:0:0_0:0:0_1e3 83 4 289086 199 100M = 288648 -538 CTGCACACACACCTGAAAACACACCTGCGCACACCTGAGCACACATCTGCACTCACCTGTGCACACACCTACACACACACCTGAGCACACCCCACCCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:481 NH:i:1
4_288648_289185_0:0:0_0:0:0_1e3 163 4 288648 199 100M = 289086 538 GCAAACCCGCAGATGGGACGCAGAGGCTTCCAGGAAAACAGCAACACCAAACACACCGGACGGGAGTCCTGGCTGCAGGCCGTTCTACATCCAGGGCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:481 NH:i:1
4_977523_978058_0:0:0_0:0:0_1e4 83 4 977959 199 100M = 977523 -536 GCTTCTCAGAATTGAAATTCAGGTAATTCTATGTAGCCTAATTTTTTTTTCTTTAACTTACAATTTAACCTCAATTAAAGCACATCCCCTAAAGCATTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:482 NH:i:1
4_977523_978058_0:0:0_0:0:0_1e4 163 4 977523 199 100M = 977959 536 GCACTGAACCCCTTCCGTACCTCTTACCAAGTCTCAGTCTGGCTGGTCATCAGGAACCTGTGTGCCCTGCCCTCCAATGCACGCACACGAACATGGCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:482 NH:i:1
-4_36207_36743_0:0:0_0:0:0_1e5 99 4 36207 199 100M = 36644 537 TGTCATTGGAGAGTACTCCTGTCTTCTTGGCATTATTGATAATTCGATTCTAATTGCTAATAAGTCAGAAAAATTAGGAACACCAAATTTCAGTCGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:483 NH:i:1
+4_36207_36743_0:0:0_0:0:0_1e5 1123 4 36207 199 100M = 36644 537 TGTCATTGGAGAGTACTCCTGTCTTCTTGGCATTATTGATAATTCGATTCTAATTGCTAATAAGTCAGAAAAATTAGGAACACCAAATTTCAGTCGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:483 NH:i:1
4_36207_36743_0:0:0_0:0:0_1e5 147 4 36644 199 100M = 36207 -537 ACACTGATTTTTAGATTTGTATTGGTAGGATAATTCCACTTGGTTATATTGTCTAACTTTTTTCTAATTTTCTTTAATTTTTATTAGAGATGAGGCCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:483 NH:i:1
-4_823937_824458_0:0:0_0:0:0_1e6 99 4 823937 199 100M = 824359 522 ACTGACCCGTCTGAGTGATCACCCAGGAGCGCGGCGGCAGCAAGCAGAGCTCACCGGATTTGGGACAAGGATTTTAAAGGCAGCTACAAAGCTGAGCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:484 NH:i:1
+4_823937_824458_0:0:0_0:0:0_1e6 1123 4 823937 199 100M = 824359 522 ACTGACCCGTCTGAGTGATCACCCAGGAGCGCGGCGGCAGCAAGCAGAGCTCACCGGATTTGGGACAAGGATTTTAAAGGCAGCTACAAAGCTGAGCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:484 NH:i:1
4_823937_824458_0:0:0_0:0:0_1e6 147 4 824359 199 100M = 823937 -522 ACACACCCAGTGGAAGTAACCACACCCGGTGTGTTCCTAGAAGCTCACCTGTGACAGTTCAACAAGAACTTACTATTCCAGAAAAGTATTACACAAAGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:484 NH:i:1
4_252469_253014_0:0:0_0:0:0_1e7 83 4 252915 199 100M = 252469 -546 TGTGAGAAGGGAGCCGGGGGGAGCCGGGCAGACCTACAGCTTCTACACTCGGAGCCGTTGTAGGCTGGGAGGGTTCCGTTCCCACAGCGGACGCAGCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:485 NH:i:1
4_252469_253014_0:0:0_0:0:0_1e7 163 4 252469 199 100M = 252915 546 GCTGTTTTGCAGAACCCTGAGCCTGCTGTGGCTGCGTCAAGGGTGAGGGTCTGCACTGGCTCCGACGGCCCCCAGCCCCCAGGACACCGGCAGAGGTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:485 NH:i:1
-4_414558_415019_0:0:0_0:0:0_1e8 99 4 414492 199 100M = 414524 132 GGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:486 NH:i:1
+4_414558_415019_0:0:0_0:0:0_1e8 1123 4 414492 199 100M = 414524 132 GGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:486 NH:i:1
4_414558_415019_0:0:0_0:0:0_1e8 147 4 414524 199 100M = 414492 -132 CGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:486 NH:i:1
4_900727_901200_0:0:0_0:0:0_1e9 83 4 838243 199 100M = 837869 -474 GGCTGGGCAGCATGGGTCACCGTGACCGCCCGGGGGTGGGGCCGCAGCAGGGACTCCGGGCGCCAGGAACGAGGCCACCAGGGCCTCTCCCAGGCAAAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:487 NH:i:1
4_900727_901200_0:0:0_0:0:0_1e9 163 4 837869 199 100M = 838243 474 GCCCGAGCCGCCGCCTACCTAGCATGCTGGGCTCGCCTCCCAGCAGGGACAGGATGTACTTGTTGAGGAAGAGCGTGCAGAAGCTGAAGAAGAACCACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:487 NH:i:1
@@ -785,19 +785,19 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_741687_742256_0:0:0_0:0:0_1eb 163 4 741687 199 100M = 742157 570 TTTTCCCCTTTTGATTGAGATCTTTCTCCAAATGCATCACTAATGAGTCATCCTGTAATTAGGTTTTGATAGTCCCCGTGCTGGGATGAACTTGTCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:489 NH:i:1
4_864592_865124_0:0:0_0:0:0_1ec 83 4 865025 199 100M = 864592 -533 CGAAGCGCTTGCGGAGGTTGTTGTAGGGGTGCTCGCTGAAGGTCATCTTTTTGACTACTGGGAGCTCACTGTAGCCGGGCCAGATTTTCTCACTGGGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:490 NH:i:1
4_864592_865124_0:0:0_0:0:0_1ec 163 4 864592 199 100M = 865025 533 CTGGCTGTAGCCCAGGCCTCCCTCAGGGGGCCTCGGGCTGGTGCCCCGCTTCACACGCTGCTGCTCGCTCTTGGCGGGCCACGTGGGGAACATGGAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:490 NH:i:1
-4_284177_284624_0:0:0_0:0:0_1ed 99 4 284177 199 100M = 284525 448 AGCAGGGATGCTAGGTCTGAAAGCCACAGGGGAGCATCGTAAGTCGCAGGTGCCCACAGATCACACCAGGGACCCCACCTCTGCCAACCGGGCTGTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:491 NH:i:1
+4_284177_284624_0:0:0_0:0:0_1ed 1123 4 284177 199 100M = 284525 448 AGCAGGGATGCTAGGTCTGAAAGCCACAGGGGAGCATCGTAAGTCGCAGGTGCCCACAGATCACACCAGGGACCCCACCTCTGCCAACCGGGCTGTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:491 NH:i:1
4_284177_284624_0:0:0_0:0:0_1ed 147 4 284525 199 100M = 284177 -448 GCCTTGGCCTCCCACAGCACCTGGGGCCACAGGTGTGAGCTGCCGTGCCCAGCCCGGCATTCATAATGCTCAAGAGAACTTGTCCTGAGAGAGGGTCGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:491 NH:i:1
-4_695472_696023_0:0:0_0:0:0_1ee 99 4 695472 199 100M = 695924 552 TAGCAGTGCTTTTGAATGAACGTGTGACAGCTTAATGAAGCAGCCGAGTACCTTGATTTGAATGTTGGAGCCGGGGTTCACAGGGGGCTGTATTAGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:492 NH:i:1
+4_695472_696023_0:0:0_0:0:0_1ee 1123 4 695472 199 100M = 695924 552 TAGCAGTGCTTTTGAATGAACGTGTGACAGCTTAATGAAGCAGCCGAGTACCTTGATTTGAATGTTGGAGCCGGGGTTCACAGGGGGCTGTATTAGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:492 NH:i:1
4_695472_696023_0:0:0_0:0:0_1ee 147 4 695924 199 100M = 695472 -552 CAGTGAGCTGTGATTGCCACTGCACTCCAGCCTGGATTACAGAGCAAGACCCTGTCTTAAAAACTAAGAATAATGGCCGGGCGCGGTGGCTCACGCCTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:492 NH:i:1
4_23246_23766_0:0:0_0:0:0_1ef 83 4 23667 199 100M = 23246 -521 TTTCTGAGGAGCTGAACTGTTCCAACCTGAGTATTCTGAATAAGGACAGTGGTCGAGCATGAGTGATGCCATCTGGGCTTAGAAATAAGTGGGCCTAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:493 NH:i:1
4_23246_23766_0:0:0_0:0:0_1ef 163 4 23246 199 100M = 23667 521 GCTTGCGACAGGCGGTGTATAAAAACTAATGTCAGTTTAATTTTAAAACCTTAGCCATTTTCTGGAACTTAAATATCAAAGAGAAAATGTCCACATATGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:493 NH:i:1
-4_949101_949634_0:0:0_0:0:0_1f0 99 4 949101 199 100M = 949535 534 GGAATTAACCTGCAGCATATGGCCAGCCTTGTTAAAATTCAAAGACACGCACACACACGCAATCGATAAATCCAGAGCCCCTGGCATTGACTTCTCAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:494 NH:i:1
+4_949101_949634_0:0:0_0:0:0_1f0 1123 4 949101 199 100M = 949535 534 GGAATTAACCTGCAGCATATGGCCAGCCTTGTTAAAATTCAAAGACACGCACACACACGCAATCGATAAATCCAGAGCCCCTGGCATTGACTTCTCAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:494 NH:i:1
4_949101_949634_0:0:0_0:0:0_1f0 147 4 949535 199 100M = 949101 -534 CTCAAAAAAAAAAAAAAAAAATCAAAACCGCCCCAATCTCAAAGCAATCTGTAACACAGGAGCTTAGAGGCATGAGCCGTTTTTCCTTTGTCTGTGATCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:494 NH:i:1
-4_20983_21475_0:0:0_0:0:0_1f1 99 4 20983 199 100M = 21376 493 TTTGTGCTTGCATGTGCTGCGTGTTTGTGTTGGTGGGATGTGGGGAGGCTGCTGTGGAGACAAAAGCTGGAGAGAACTTGGTAGAGTAACCCAGGTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:495 NH:i:1
+4_20983_21475_0:0:0_0:0:0_1f1 1123 4 20983 199 100M = 21376 493 TTTGTGCTTGCATGTGCTGCGTGTTTGTGTTGGTGGGATGTGGGGAGGCTGCTGTGGAGACAAAAGCTGGAGAGAACTTGGTAGAGTAACCCAGGTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:495 NH:i:1
4_20983_21475_0:0:0_0:0:0_1f1 147 4 21376 199 100M = 20983 -493 TTAAATACTATGGGCCACAGACGCCTTGTCTCCATATCTACTCTTTTATAAATGAAGTAAAGTTCTTGGGAAATACTTTTCTCACTCTAAAGCTCACAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:495 NH:i:1
4_962939_963386_0:0:0_0:0:0_1f2 83 4 963287 199 100M = 962939 -448 CAATAAGAAATATAAATGTGCAAAAGCATATTCCAAAGGCGGGCAGAAAAAACCAAGACCCAACAATGATGCGAGGTCTGTAACAGACTATTTTCAAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:496 NH:i:1
4_962939_963386_0:0:0_0:0:0_1f2 163 4 962939 199 100M = 963287 448 CACAGGTGTGATGCATATGAAAACCACAAGAGTGAGCAAGGGCAGACAGCAGGCCCGTGTGGCTGTGAGTTCCTTACGTTTCCTATCAACTAGGACCAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:496 NH:i:1
-4_998229_998741_0:0:0_0:0:0_1f3 99 4 998229 199 100M = 998642 513 TTTGTATTTTTAGTAGAGATGGGGTTTCCCCATGTTGGCCAGGTTGTTCTCGAACTCCCAACCTCAGGTGATCCGCCTGCCTGGGCCTCCCAAAGTGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:497 NH:i:1
+4_998229_998741_0:0:0_0:0:0_1f3 1123 4 998229 199 100M = 998642 513 TTTGTATTTTTAGTAGAGATGGGGTTTCCCCATGTTGGCCAGGTTGTTCTCGAACTCCCAACCTCAGGTGATCCGCCTGCCTGGGCCTCCCAAAGTGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:497 NH:i:1
4_998229_998741_0:0:0_0:0:0_1f3 147 4 998642 199 100M = 998229 -513 TATAAATGTTTTCTTTTACGCATTCTATCTTTGATCTCACATCTAAAAACTGTTTGCTTAATTCAAAATCTCATTTTTTCTAATTTTACGTTTTATATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:497 NH:i:1
4_482953_483434_0:0:0_0:0:0_1f4 83 4 483335 199 100M = 482953 -482 GTCTCGCGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGTGGGATCTTGGCTCACTGCAAGCTCCGCCTCCCAGGTTCACGCCATTCTCCTGCCTTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:498 NH:i:1
4_482953_483434_0:0:0_0:0:0_1f4 163 4 482953 199 100M = 483335 482 AGGAAACAGGAAAGACAAACAGATGACGGAGTTTCAAGAGGTTGAACGTCAGGCTCTCGGGGTCCCTGGAGGAGCAGGAACGGGGCAGAAACAACACTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:498 NH:i:1
@@ -805,41 +805,41 @@ chr3_727319_727786_0:0:0_0:0:0_c9 163 chr3 727319 199 100M = 727687 468 CCAAAGCA
4_986954_987413_0:0:0_0:0:0_1f5 163 4 986954 199 100M = 987314 460 CTCACTCATCTTCCGATCTTAGTGCTCTTCAATGCCACCTTCAAGAATGTGAGATCTGAAACAGAAAGACAAGCACAATCAATAAACAACTGTTGGATAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:499 NH:i:1
chr5_295694_296191_0:0:0_0:0:0_322 83 chr5 296092 199 100M = 295694 -498 AAATTCCGGCACGCCAGGTATTTCTGAGGGGCCTCCGGGAGCCTCCTGCCCTGCCCGGGGCTGGTCGGCTCCCTCGGGCCTGGGGCAGGGCTGGCGTCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:800 NH:i:1
chr5_295694_296191_0:0:0_0:0:0_322 163 chr5 295694 199 100M = 296092 498 GTCAAACACTTCTGCAAGGCTTGGACACATATGATCTTCAGGCCGCGCCGAGGCCACCAGCTCCGTGTGCTGGGAGGCTCCTGTGTTTACTTCCAACGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:800 NH:i:1
-chr5_389377_389858_0:0:0_0:0:0_323 99 chr5 389377 199 100M = 389759 482 ATCTCCTTGGCACTGATCTTCCGGTCAGTGTTCACATCCACCCTGCAAGACAGCAAATGGGCAGGTGGCCGTCAGACTGGGCCCCCAGGACCCCGAACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:801 NH:i:1
+chr5_389377_389858_0:0:0_0:0:0_323 1123 chr5 389377 199 100M = 389759 482 ATCTCCTTGGCACTGATCTTCCGGTCAGTGTTCACATCCACCCTGCAAGACAGCAAATGGGCAGGTGGCCGTCAGACTGGGCCCCCAGGACCCCGAACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:801 NH:i:1
chr5_389377_389858_0:0:0_0:0:0_323 147 chr5 389759 199 100M = 389377 -482 TCACTCTGTCACCCAGGCTGGAGTGTGGTGGCGCAATCTTTGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTACTGCCTCAGCCTCCCGAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:801 NH:i:1
chr5_516369_516890_0:0:0_0:0:0_324 81 chr5 515951 199 100M = 516649 798 AGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:802 NH:i:1
chr5_516369_516890_0:0:0_0:0:0_324 161 chr5 516649 199 100M = 515951 -798 ACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:802 NH:i:1
-chr5_644_1075_0:0:0_0:0:0_325 99 chr5 644 199 100M = 976 432 TTGTCTCTGCTTCTCTTTGTATTTTGTTCTCACTATTCTCTATTTCATAAGGCTTCCTGAAAGCGGCATCCATTTATTCCTTGTAGCAAGATCCCTAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:803 NH:i:1
+chr5_644_1075_0:0:0_0:0:0_325 1123 chr5 644 199 100M = 976 432 TTGTCTCTGCTTCTCTTTGTATTTTGTTCTCACTATTCTCTATTTCATAAGGCTTCCTGAAAGCGGCATCCATTTATTCCTTGTAGCAAGATCCCTAGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:803 NH:i:1
chr5_644_1075_0:0:0_0:0:0_325 147 chr5 976 199 100M = 644 -432 CGATGGTGTGCCTCCGAAGCCACGCGGCTTACTTCACAGCCAGGATCCGGGATCAGGCTATGTCCCATGCAGCCTGATGAAGGGTAAAAAAGAGCCCAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:803 NH:i:1
-chr5_139510_140008_0:0:0_0:0:0_326 99 chr5 139510 199 100M = 139909 499 CAGCCCCCACGCCCGTGTGCCCCGGGCTCCGGGCTGGCCGGGGGTCTGGTGTGGGGCCTCTTGGGACTCTGAGGGAGCAGGGAGGGAAACAGGAGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:804 NH:i:1
+chr5_139510_140008_0:0:0_0:0:0_326 1123 chr5 139510 199 100M = 139909 499 CAGCCCCCACGCCCGTGTGCCCCGGGCTCCGGGCTGGCCGGGGGTCTGGTGTGGGGCCTCTTGGGACTCTGAGGGAGCAGGGAGGGAAACAGGAGGACGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:804 NH:i:1
chr5_139510_140008_0:0:0_0:0:0_326 147 chr5 139909 199 100M = 139510 -499 AGGAGCCGGGACCCCGGCTACGACCACCTCTGGGACGAGACTTTGTCTTCCTCCCACCAGAAGTGCCCCCAGCTTGGAGGGCCTGAGGCCAGTGGGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:804 NH:i:1
-chr5_227494_227905_0:0:0_0:0:0_327 99 chr5 227494 199 100M = 227806 412 CCGCCTCCCTCCCTCCCTCCCTTGTCCCCGTTCCCTCCGTCCCTCTCCCCCTTCCTTCCCTCCCTCCCTCACCACCATTCCCTCCCTCCCACATCCCCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:805 NH:i:1
+chr5_227494_227905_0:0:0_0:0:0_327 1123 chr5 227494 199 100M = 227806 412 CCGCCTCCCTCCCTCCCTCCCTTGTCCCCGTTCCCTCCGTCCCTCTCCCCCTTCCTTCCCTCCCTCCCTCACCACCATTCCCTCCCTCCCACATCCCCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:805 NH:i:1
chr5_227494_227905_0:0:0_0:0:0_327 147 chr5 227806 199 100M = 227494 -412 CATGGGGGGCTGCAGACTAGGAAGGTCCTGGGACGGGGGGGCTGTTCACCAGGAAGGGGCAGGGCTGCAGCCTCAGCCTCCCCTCCAGATGCCGGCAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:805 NH:i:1
-chr5_390368_390851_0:0:0_0:0:0_328 99 chr5 390368 199 100M = 390752 484 AAGACAAAACAAAATCCTTCATATAAATTTAAGTTGTGAGATTTAGGAAGAAGACCGTGTGGGGACTTAACATTGTGTGCAAGGATGGAAGAGACCACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:806 NH:i:1
+chr5_390368_390851_0:0:0_0:0:0_328 1123 chr5 390368 199 100M = 390752 484 AAGACAAAACAAAATCCTTCATATAAATTTAAGTTGTGAGATTTAGGAAGAAGACCGTGTGGGGACTTAACATTGTGTGCAAGGATGGAAGAGACCACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:806 NH:i:1
chr5_390368_390851_0:0:0_0:0:0_328 147 chr5 390752 199 100M = 390368 -484 TAGGGTGCCCTGGCCCCTTGCCTCTATGCTCCGCTCAGGGGCAGCCGCGGCCACGTTATGAGGTGGCAAGCAGGCCCTCGTCAGGCAGGTGTGGCCTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:806 NH:i:1
-chr5_17167_17656_0:0:0_0:0:0_329 99 chr5 17167 199 100M = 17557 490 GTAATGAGGAGAAAAATGCAGCCATGTTGCAGCCTGATAAAATACTCTGCCAGTGTCCTGCCATAAAATGACAGGCGAATTTTCAGTAGCAGATTCTCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:807 NH:i:1
+chr5_17167_17656_0:0:0_0:0:0_329 1123 chr5 17167 199 100M = 17557 490 GTAATGAGGAGAAAAATGCAGCCATGTTGCAGCCTGATAAAATACTCTGCCAGTGTCCTGCCATAAAATGACAGGCGAATTTTCAGTAGCAGATTCTCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:807 NH:i:1
chr5_17167_17656_0:0:0_0:0:0_329 147 chr5 17557 199 100M = 17167 -490 CCAGGTAACATCACATGATCGTTCAAAAAATAATCATCCAAGTCCAGGATGCAGCATCTGTCTCAGTTTTTGGCATAGGATGACAGTCTGAAATACACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:807 NH:i:1
-chr5_484940_485422_0:0:0_0:0:0_32a 99 chr5 484940 199 100M = 485323 483 AGGGGAGTGCCCCGCAGTGGTCCAGGTGGAAGTGGCTAGGGGGACGCAGCACAGGTCAGCCTGGGCCCACCCTGCCCCAGCCTTCACCCCCGCTCTGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:808 NH:i:1
+chr5_484940_485422_0:0:0_0:0:0_32a 1123 chr5 484940 199 100M = 485323 483 AGGGGAGTGCCCCGCAGTGGTCCAGGTGGAAGTGGCTAGGGGGACGCAGCACAGGTCAGCCTGGGCCCACCCTGCCCCAGCCTTCACCCCCGCTCTGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:808 NH:i:1
chr5_484940_485422_0:0:0_0:0:0_32a 147 chr5 485323 199 100M = 484940 -483 GCAAGGGCAGCGCCTGGCAAGCCCCAGGAAGGCACTGGGGACCCCACCGTGTCTGAGAAGAAACCACAACCACGAGGGGCTCAGGGGCAGCTGAGGACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:808 NH:i:1
-chr5_321589_322070_0:0:0_0:0:0_32b 99 chr5 321589 199 100M = 321971 482 AGTCCCCACTCGGGAAACTCCCACTCAAACTACACCTGCTCAGCTGGAATCTGCAGGAAATCTGTGCCGACGACACCAGGGAAGAGTGTGCACATTCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:809 NH:i:1
+chr5_321589_322070_0:0:0_0:0:0_32b 1123 chr5 321589 199 100M = 321971 482 AGTCCCCACTCGGGAAACTCCCACTCAAACTACACCTGCTCAGCTGGAATCTGCAGGAAATCTGTGCCGACGACACCAGGGAAGAGTGTGCACATTCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:809 NH:i:1
chr5_321589_322070_0:0:0_0:0:0_32b 147 chr5 321971 199 100M = 321589 -482 AGGGTGGCTGTGGAGGACTGGGGCAGATCTCAATGAATAACTGAGAAGAAAACAAAGGGACAGGCTGCAGATCAGGCTCCCGGGAGAGGGCTGTGTCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:809 NH:i:1
chr5_62946_63439_0:0:0_0:0:0_32c 83 chr5 63340 199 100M = 62946 -494 GTAGGGATGAACCTCTCCCTCCTTTGCCGCTTCAGCTTTAGCTGGCAAACAAACCTGGTCTGTAACCTCTTCTGTTACGTCGCTATACTCTCCTTCCTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:810 NH:i:1
chr5_62946_63439_0:0:0_0:0:0_32c 163 chr5 62946 199 100M = 63340 494 GCCCAATGATTTCCTTTTTTACATTTTGGACATATTTCAGCTTAGCAGTTCTCTTTTTTCCCCTATCTGACAGCCGACTCACTGATTTTTTTTCTACATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:810 NH:i:1
-chr5_171568_172090_0:0:0_0:0:0_32d 99 chr5 171568 199 100M = 171991 523 TCCCTGCTCATGGGCCTGCCCCTCCACTCTATCCTCCCTGACGCAGCCAGTGGCATGCTTCTAACCTGCAGATGCCCTCCCCAGGGGCCAAGCCCACCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:811 NH:i:1
+chr5_171568_172090_0:0:0_0:0:0_32d 1123 chr5 171568 199 100M = 171991 523 TCCCTGCTCATGGGCCTGCCCCTCCACTCTATCCTCCCTGACGCAGCCAGTGGCATGCTTCTAACCTGCAGATGCCCTCCCCAGGGGCCAAGCCCACCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:811 NH:i:1
chr5_171568_172090_0:0:0_0:0:0_32d 147 chr5 171991 199 100M = 171568 -523 GAAAGGCGGCCAACCCAGGGTCTCCAGTGACACGTTTCCGGGTCATCATCCGGTGGCCACAGGGAATCACAGGCTCGGTGTGACTTTCTCAGAGCGCCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:811 NH:i:1
-chr5_209986_210509_0:0:0_0:0:0_32e 99 chr5 209986 199 100M = 210410 524 TGTGGACGTGTGTATTGTGTTGTAAGTGAGCATCGTCCAGTGTTGGTGCCTGTGCACATTTGTGCAGCTGCGTGTGTGGGCGTGTGTGTCCATCAGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:812 NH:i:1
+chr5_209986_210509_0:0:0_0:0:0_32e 1123 chr5 209986 199 100M = 210410 524 TGTGGACGTGTGTATTGTGTTGTAAGTGAGCATCGTCCAGTGTTGGTGCCTGTGCACATTTGTGCAGCTGCGTGTGTGGGCGTGTGTGTCCATCAGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:812 NH:i:1
chr5_209986_210509_0:0:0_0:0:0_32e 147 chr5 210410 199 100M = 209986 -524 GGATCTGCATTTACAGACATGTTCTTGCGTAAGATGTGGGAGCCTCACCTGTGTCCTCAGCCCCAGGCTCCCGGGCTCCTCTGGGAGCTGGGATCGGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:812 NH:i:1
-chr5_94617_95181_0:0:0_0:0:0_32f 99 chr5 94617 199 100M = 95082 565 CCGGCGCTGGTGTGCTGTGCAGGGTGCCGCGGGCACGTCCGCCGCGTGTGTGCGTCAGCTCGGGGCTCGGCTGTGCTCTGCAGGGACCACAGCGGGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:813 NH:i:1
+chr5_94617_95181_0:0:0_0:0:0_32f 1123 chr5 94617 199 100M = 95082 565 CCGGCGCTGGTGTGCTGTGCAGGGTGCCGCGGGCACGTCCGCCGCGTGTGTGCGTCAGCTCGGGGCTCGGCTGTGCTCTGCAGGGACCACAGCGGGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:813 NH:i:1
chr5_94617_95181_0:0:0_0:0:0_32f 147 chr5 95082 199 100M = 94617 -565 GCGCATTTGAGTGCACGTCCACCAGCACCAGCCCCAGGCCACAGGCAGATCCCAGGAGACACGCAGGGGCCCTAAGAAGGGAGCTGGGAATGAGGGGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:813 NH:i:1
-chr5_553726_554265_0:0:0_0:0:0_330 99 chr5 553726 199 100M = 554166 540 ACTTCGGGAGGCCGAGGAGGGCGGATCACAAAGTCAATACATTGAGACCATCCTGCCCAACATGGTGAAACCCCATCTCTACTAAAACTACAAAAATTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:814 NH:i:1
+chr5_553726_554265_0:0:0_0:0:0_330 1123 chr5 553726 199 100M = 554166 540 ACTTCGGGAGGCCGAGGAGGGCGGATCACAAAGTCAATACATTGAGACCATCCTGCCCAACATGGTGAAACCCCATCTCTACTAAAACTACAAAAATTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:814 NH:i:1
chr5_553726_554265_0:0:0_0:0:0_330 147 chr5 554166 199 100M = 553726 -540 ATCCCAGATAGCGCCACTGCACTCCAGGCTGGGCAACAGAGTAAGACTCCGTCTCAGAAAAAAAAAAAAAAAAAAAAAAGCTGGGTATAGTGGCTCCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:814 NH:i:1
chr5_716767_717289_0:0:0_0:0:0_331 83 chr5 717190 199 100M = 716767 -523 AGGGACATCCTGAACGTTTTGCCCATCCTTACGCCAACGTCACATCGCCCCGAAGGCCTGAGGGAGGGTGGTGGGGCCCGAGGCCTGTGCTCACCCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:815 NH:i:1
chr5_716767_717289_0:0:0_0:0:0_331 163 chr5 716767 199 100M = 717190 523 CCACAGCTCTCCAGCCTGCCAGGCATGGGGAGGACCCCAGAGGCTGCAAAGGGAACAGCAGCAGGGTGAGCCCCTGAGCAGGCACGCTGGGCAGCTTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:815 NH:i:1
-chr5_317178_317640_0:0:0_0:0:0_332 99 chr5 317178 199 100M = 317541 463 GCCACGGTCCGTTCTGTGGCCTCGGCTCTCCAGGACTCGGAGGCTCCACGCCCCTTCCTGCTCTGCCCAGCCTGGGGCTGCCCCGCTCCCCTCTGCCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:816 NH:i:1
+chr5_317178_317640_0:0:0_0:0:0_332 1123 chr5 317178 199 100M = 317541 463 GCCACGGTCCGTTCTGTGGCCTCGGCTCTCCAGGACTCGGAGGCTCCACGCCCCTTCCTGCTCTGCCCAGCCTGGGGCTGCCCCGCTCCCCTCTGCCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:816 NH:i:1
chr5_317178_317640_0:0:0_0:0:0_332 147 chr5 317541 199 100M = 317178 -463 AGCAGCTAAACCCCACACGCTGGCCAGCCTTTTTCCCTCCTGTCCTTTCCGTCCCAGAACCCCATCCAGGCCCCGTGGCCTCGTCACCTCTCCTTGGTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:816 NH:i:1
chr5_211369_211877_0:0:0_0:0:0_333 83 chr5 211778 199 100M = 211369 -509 GCCCCACCCGCCCTGAGCCACCTGACCCTGTCCCAACCGGTCCCCCCGCCAACCTCCCTCTCCTTGCAGAGGCTGTTGCTCCCAGCACTCACCCGACATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:817 NH:i:1
chr5_211369_211877_0:0:0_0:0:0_333 163 chr5 211369 199 100M = 211778 509 GTGGGGTCACCCGAGCCACAGAGGTTTCCCATGCCCGTGCCCCAGACGCTTCTGCGCCTGCGACCTGTGCGGAGATGCGCTGTGAGTTCGGTGCGCGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:817 NH:i:1
-chr5_458564_459123_0:0:0_0:0:0_334 99 chr5 458564 199 100M = 459024 560 GATGGGCCACCCTTTCTGCCCTGGGAGGAGGCTCTGGGGTGTGCAGGAGGGAGCCAGCTAACAGGTCAGTGTTCTGGGCAGAAGTGGGGCACGAAGTAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:818 NH:i:1
+chr5_458564_459123_0:0:0_0:0:0_334 1123 chr5 458564 199 100M = 459024 560 GATGGGCCACCCTTTCTGCCCTGGGAGGAGGCTCTGGGGTGTGCAGGAGGGAGCCAGCTAACAGGTCAGTGTTCTGGGCAGAAGTGGGGCACGAAGTAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:818 NH:i:1
chr5_458564_459123_0:0:0_0:0:0_334 147 chr5 459024 199 100M = 458564 -560 TCCAGGTGGAGGCTGATGAACTCCTGGATACACCTGCGGAACTGGAGCTCCGTGGGGCTGCCCGCCAGGGCGCCCGGGGGACCAGGGGCAGCCTCGGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:818 NH:i:1
chr5_488519_489007_0:0:0_0:0:0_335 83 chr5 488908 199 100M = 488519 -489 GCAGTGGCACAATCATGGCTCACTGCAGCCTCAACCTCCTGGGCTCAAGCAATCCTCACAACTCAGCCTCCCAAGTAGCTTGAACCACAGGCACACACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:819 NH:i:1
chr5_488519_489007_0:0:0_0:0:0_335 163 chr5 488519 199 100M = 488908 489 AGTGGGGCCCATGCAAACAGCTGCAAAATGCCATGGGGTGTGCACAGGCCAGGGGTGTTCACAGCACAGTGTCCACACCGGGGCGGGGGGCACCCTCCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:819 NH:i:1
@@ -849,81 +849,81 @@ chr5_199317_199781_0:0:0_0:0:0_337 83 chr5 199682 199 100M = 199317 -465 GACCGAG
chr5_199317_199781_0:0:0_0:0:0_337 163 chr5 199317 199 100M = 199682 465 TTTTGTGCGGCGCTCTCCCGTCCGCATCCGCCCGATCCTCTGGAACTCCCCGCGGACGCCGGGGTCCCTGGAGGCGGCTTCCTTTGTCTCTGCTCCCGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:821 NH:i:1
chr5_409604_410114_0:0:0_0:0:0_338 83 chr5 410015 199 100M = 409604 -511 CGGGGAGCACTCAGGGCAGAACCAGGGAAGGCATCGCCGGCCAGGAGAAGGACCCACGGCCGCCCTTGGGGGAGTCAGAACCCTGACGTCCCAATGCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:822 NH:i:1
chr5_409604_410114_0:0:0_0:0:0_338 163 chr5 409604 199 100M = 410015 511 GGCACGTCTGGTCTCTGGGCAGTGCAGGGCGGCTGACCTTTCAGCAGCTCCTGAAACTCGTGAAGCAGAGTCTCCGCGGTCACTTCTGCACCTGGAGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:822 NH:i:1
-chr5_77232_77752_0:0:0_0:0:0_339 99 chr5 77232 199 100M = 77653 521 AGATTCAATGCTGGATTTGTATAGTGTATAGATTTTCAATAGTTATTTAAAATCATCAAAAACAATCGTGTTTACAATTTACTGGGCAGCCACTATATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:823 NH:i:1
+chr5_77232_77752_0:0:0_0:0:0_339 1123 chr5 77232 199 100M = 77653 521 AGATTCAATGCTGGATTTGTATAGTGTATAGATTTTCAATAGTTATTTAAAATCATCAAAAACAATCGTGTTTACAATTTACTGGGCAGCCACTATATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:823 NH:i:1
chr5_77232_77752_0:0:0_0:0:0_339 147 chr5 77653 199 100M = 77232 -521 AATAATCACAGTGATGAGCTCTGGAGGAGCCATGGCCCAGAGCAGCTGTGGCCTCCTTTGATTAAACCACAGAAGTCCGGAGGATCAGGCCTCAAACCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:823 NH:i:1
-chr5_237734_238333_0:0:0_0:0:0_33a 99 chr5 237734 199 100M = 238234 600 CCTCACCACCGGGGCGGCCCACAGGCTGAGCAGCCGCCAGCCGGGCCAGGCACTCCAGGCAGAAGACGTGGGTGCAGGAGAGCTCCTTGGGTGTCTTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:824 NH:i:1
+chr5_237734_238333_0:0:0_0:0:0_33a 1123 chr5 237734 199 100M = 238234 600 CCTCACCACCGGGGCGGCCCACAGGCTGAGCAGCCGCCAGCCGGGCCAGGCACTCCAGGCAGAAGACGTGGGTGCAGGAGAGCTCCTTGGGTGTCTTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:824 NH:i:1
chr5_237734_238333_0:0:0_0:0:0_33a 147 chr5 238234 199 100M = 237734 -600 TGAGCCTCCCACAGGGCCCCAGGCCTGCTCCGGGAATGCAGGCCGTGTGTAGGGGGGTCTCACTGACCGCTCGGCAGACACCTCCTGTTGGCCCTGCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:824 NH:i:1
chr5_181739_182181_0:0:0_0:0:0_33b 83 chr5 182082 199 100M = 181739 -443 TCCCGAGGAGCTGGGATTACAGGTGCGTGTCACCACACCTGGCTAATTTTGTATTTGTTTTGTTTTGTGTTTCTTTGAGATGGAGTTTCGCTCTGTCGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:825 NH:i:1
chr5_181739_182181_0:0:0_0:0:0_33b 163 chr5 181739 199 100M = 182082 443 CTGACCTCATGATCCGCCCGCTTCAGCCTTCCAAAGTGCTAGGGTTACAAGTGTGAGCCCCCGCACCCGGCCTAATTTTTGTATTTTTAGTAGAGATGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:825 NH:i:1
-chr5_351134_351625_0:0:0_0:0:0_33c 99 chr5 351134 199 100M = 351526 492 CTTGGTCACCCCGGCCAGGCCCCGGAAGCAGCAGCCAGGGATGGTCCTTCCTCACCCACAGGCGTGTGTCACTGTGGCTGGGACGGCAGCGCTCAGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:826 NH:i:1
+chr5_351134_351625_0:0:0_0:0:0_33c 1123 chr5 351134 199 100M = 351526 492 CTTGGTCACCCCGGCCAGGCCCCGGAAGCAGCAGCCAGGGATGGTCCTTCCTCACCCACAGGCGTGTGTCACTGTGGCTGGGACGGCAGCGCTCAGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:826 NH:i:1
chr5_351134_351625_0:0:0_0:0:0_33c 147 chr5 351526 199 100M = 351134 -492 CGTATCCTTTTCTTTTCTTTTTTTTTTTTTTGAGACAGAGTCTCACTCTGTTGCCCAGGCTGGAGTACAGTGGCACAATCTCAGCTCACTGCAACCTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:826 NH:i:1
chr5_610747_611204_0:0:0_0:0:0_33d 81 chr5 662971 199 100M = 610747 -52324 TAAAGGAATTGCCTAAATCTTAAACCAGCAATCAATACTTTGTATTTAAAATTAAGCCAGGTTAGACTGGGCTAGGTGGCTCACGCCTGTAATCCCAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:827 NH:i:1
chr5_610747_611204_0:0:0_0:0:0_33d 161 chr5 610747 199 100M = 662971 52324 CTGCAGTGCTTGGAGGGAGGCTGTCTACCTCTGCGCGCTCTCTCCATTTCTCTCTTTTTCCTTTTAGAGATGGGTTCTTACTCTGAGCCCAGGCTGGAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:827 NH:i:1
chr5_938917_939398_0:0:0_0:0:0_33e 83 chr5 939299 199 100M = 938917 -482 ATTGGAATTTTCTGGCAAGTACTGGGCACTAAATCAGAGATGTGTTTTTAGAGAATTCCATGCCAATACTGCTGTATAGAATCTTTTATTCATACTTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:828 NH:i:1
chr5_938917_939398_0:0:0_0:0:0_33e 163 chr5 938917 199 100M = 939299 482 TAAGGCAAACATGTCAGTTTCAGGCCAACTAAGGCTGTCAGAGAGACATGGAACACAAGAAGAGAAGAGTTTTGAAATGTATTCCCATCAAGAGCCAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:828 NH:i:1
-chr5_813487_813978_0:0:0_0:0:0_33f 99 chr5 813487 199 100M = 813879 492 GACAAGGTCTCCCTACGTTGCCCAGGCTGGCGTCATGCGGTCCTCTGGCCTTAGCCTCCCACGTAGCATATGCTACCATACCCAGTTCTGGATTGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:829 NH:i:1
+chr5_813487_813978_0:0:0_0:0:0_33f 1123 chr5 813487 199 100M = 813879 492 GACAAGGTCTCCCTACGTTGCCCAGGCTGGCGTCATGCGGTCCTCTGGCCTTAGCCTCCCACGTAGCATATGCTACCATACCCAGTTCTGGATTGGCTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:829 NH:i:1
chr5_813487_813978_0:0:0_0:0:0_33f 147 chr5 813879 199 100M = 813487 -492 ACAAGAACCAGCGCCCTCTCATCATCTTTACATGCTGTGTACCCAGCCCTGGCTGCTGGCAAACATCGCTGCACACGCCCTTCCGCTTCAGACTCCTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:829 NH:i:1
chr5_520905_521489_0:0:0_0:0:0_340 83 chr5 521390 199 100M = 520905 -585 GGAGGGGGAGTCACTGGGGGAGGGGCGGACCCTGATGGTCCTGATGGGGGAGGGTGAATCAGTGGGGGAGGGGCGGGCTCTGAGGACCCTGGTGGAGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:830 NH:i:1
chr5_520905_521489_0:0:0_0:0:0_340 163 chr5 520905 199 100M = 521390 585 ATGAGCAGCGAGAAGTTGCCGTCGTCGAAGGCCGAGGCCGAGAGCTCCAGGCGGCCGCGGTCCCGCGCCTCGTACACGCGCTGCTCGCCCGCCGAGTACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:830 NH:i:1
chr5_280472_280982_0:0:0_0:0:0_341 83 chr5 280883 199 100M = 280472 -511 GAAGGCGAGGCGAACGCGCCCCCTTTCTTCCCCCGGGCGCCCGTCCCATCCCCTCCCCAGGGACCCCCTCCCTGCCCCGAGAACCCCCTCCCCTCCCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:831 NH:i:1
chr5_280472_280982_0:0:0_0:0:0_341 163 chr5 280472 199 100M = 280883 511 GGAAAAATAAAAGAGGAAGACGCAGCTCTGCCTTCTCTGCACACCCTGGTAATACTAAGAAAAATGAAAACCAAAAAAAGGAGGCCAAGTTTGACATCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:831 NH:i:1
-chr5_951923_952420_0:0:0_0:0:0_342 99 chr5 951923 199 100M = 952321 498 GACTCGTGGCCAGTGAAGGTCTGCCGGCACATGCCTTCTCGCACATCCCAGAGTTTGGCTGAAGCATCACAAGCACCAGAGACGAACAGTCTGGTGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:832 NH:i:1
+chr5_951923_952420_0:0:0_0:0:0_342 1123 chr5 951923 199 100M = 952321 498 GACTCGTGGCCAGTGAAGGTCTGCCGGCACATGCCTTCTCGCACATCCCAGAGTTTGGCTGAAGCATCACAAGCACCAGAGACGAACAGTCTGGTGTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:832 NH:i:1
chr5_951923_952420_0:0:0_0:0:0_342 147 chr5 952321 199 100M = 951923 -498 GGGCCTGGGCTTCAGAATGACGGGATCGCTACCTCAACTCAAATGCCAGCAAACAGGGAGCTGGGGGCACTTTTCAAGCAGCACCACTGAGGCTGTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:832 NH:i:1
chr5_131798_132360_0:0:0_0:0:0_343 83 chr5 132261 199 100M = 131798 -563 CGGCGTGCACGGTGGCTGTGGTCTGGGAGCGTGGCTCTGCCCGCGCGTGTGTGCCGTGTGTCCGTGCAGCTCAGGGTCTTCCCCTCGCCCCCGGGGCGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:833 NH:i:1
chr5_131798_132360_0:0:0_0:0:0_343 163 chr5 131798 199 100M = 132261 563 GTGGAGGGCCCAGGGCAGAGACCTGCACGGCTCAGGTAGGGGCCAGGCCTCTTCCAGAAAGGCTCCACCCTCGCCCAGGCCCTGGTGCCGGGCGGGACGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:833 NH:i:1
-chr5_204921_205329_0:0:0_0:0:0_344 99 chr5 204921 199 100M = 205230 409 CCCCTCTGACATGACTTATTAATGATAATTGGACCCAATTAAACATTTATTAAATCCAGACAGCTGCAGCCGCTAACGGACCCTTCCCTGATCCTGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:834 NH:i:1
+chr5_204921_205329_0:0:0_0:0:0_344 1123 chr5 204921 199 100M = 205230 409 CCCCTCTGACATGACTTATTAATGATAATTGGACCCAATTAAACATTTATTAAATCCAGACAGCTGCAGCCGCTAACGGACCCTTCCCTGATCCTGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:834 NH:i:1
chr5_204921_205329_0:0:0_0:0:0_344 147 chr5 205230 199 100M = 204921 -409 GAGGAGGCAAGAGTGAGGTGGGGGGCCGGGGGATGCCCAGGGAGGAGGGGGCGTGTGGGTGCCCCAGGGATCCCAGGGAGCGATGGGAGGCTGCAGAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:834 NH:i:1
-chr5_965007_965532_0:0:0_0:0:0_345 99 chr5 965007 199 100M = 965433 526 TCTCGAACCCCTGGCTCCAAGCTATCCTCCTACCTTGGCCTCCCAGAGTGTTGCAGTTACAGGCATGAGCCACCATGTCCAGCGCAAATCCACAGGTTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:835 NH:i:1
+chr5_965007_965532_0:0:0_0:0:0_345 1123 chr5 965007 199 100M = 965433 526 TCTCGAACCCCTGGCTCCAAGCTATCCTCCTACCTTGGCCTCCCAGAGTGTTGCAGTTACAGGCATGAGCCACCATGTCCAGCGCAAATCCACAGGTTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:835 NH:i:1
chr5_965007_965532_0:0:0_0:0:0_345 147 chr5 965433 199 100M = 965007 -526 GTGAGCCAAGAATGCGCTGCTGCACTCCAGCCTGGGTGACATAGTGAGACTCTGTCTTTAAAAAAAAAAAAAAAAAAAAAGAAAAAAAGGCTGGGTGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:835 NH:i:1
chr5_865151_865723_0:0:0_0:0:0_346 83 chr5 865624 199 100M = 865151 -573 CCGTACTCCCGCGCCAGCCCAAAATCACCCACCTGCAACGACAGATGGGCGGCTGTGGGTGGGCCTGGGCGGGTCACCCTGGGATGGGCCACTCGGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:836 NH:i:1
chr5_865151_865723_0:0:0_0:0:0_346 163 chr5 865151 199 100M = 865624 573 TTCAACAGCCACACCAAGTGGCCCACAGTGTTGGCACCTGTGTCCCGTCAGAGAAGACAAGCCACCAGGAGGGCTCTCAGTGGCCCTGGTCCCCATCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:836 NH:i:1
chr5_249485_249918_0:0:0_0:0:0_347 83 chr5 249819 199 100M = 249485 -434 GGGGATCATTGCAGCCTTGAAAAGGAGAGAAAGGCAGAGTGAGGACTCGGGATGGCCTGGCACAGGCACACGCGGCCAGGGACGCTCAGCCCGGATGATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:837 NH:i:1
chr5_249485_249918_0:0:0_0:0:0_347 163 chr5 249485 199 100M = 249819 434 ATTGGGGGTCTGCCCTGTCGTGGTGGGTCCTCCTTTCCTGCAGACCCCACGTCTCGGGAACAGGAAAGAGCATGGAAGTCACGGGGATTAAAGGGGGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:837 NH:i:1
-chr5_225547_226036_0:0:0_0:0:0_348 99 chr5 225547 199 100M = 225937 490 TTTGTTTTTTGTTTTTGAGATGGCGTCTCACTGTGTCACCCAGGCTGGAGTGCACTGGCGCGGTCTTGGCTCACTGCAACCTCCACCTCCCGGGTTTATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:838 NH:i:1
+chr5_225547_226036_0:0:0_0:0:0_348 1123 chr5 225547 199 100M = 225937 490 TTTGTTTTTTGTTTTTGAGATGGCGTCTCACTGTGTCACCCAGGCTGGAGTGCACTGGCGCGGTCTTGGCTCACTGCAACCTCCACCTCCCGGGTTTATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:838 NH:i:1
chr5_225547_226036_0:0:0_0:0:0_348 147 chr5 225937 199 100M = 225547 -490 AAGAAGAGGGTGTGGGCCGCGGGGGCAAGATCAGGAGCTTTGGGTTTTGAAGATGTTAGGAAAGACGTGCTGCTGACTTTGCAAACGGAGGTGCTGAAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:838 NH:i:1
chr5_574057_574605_0:0:0_0:0:0_349 83 chr5 574506 199 100M = 574057 -549 AGGAGAATCGCTTGAACCCGGGAGGCAGAGTTTGCGGTGAGCCGAGATCCCGCCATTCATTGCACTCCAGCCTCGGAAACTAGCAAAACTCCATCTTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:839 NH:i:1
chr5_574057_574605_0:0:0_0:0:0_349 163 chr5 574057 199 100M = 574506 549 TTGCTGTGTATTTACTCTTAGGAACCACACCGTAGAATGGTCAAAATCCGGACCCAGCCCACACTCCAGATTTAACTCGCTTCAAATGTCTTTTATACCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:839 NH:i:1
-chr5_479321_479820_0:0:0_0:0:0_34a 99 chr5 479321 199 100M = 479721 500 TCGGTCAGCCCCGTGGAGAAGTAGATGGGCACCTTCAGGTTCATGCGCTCCCTGGGGACCACCGGCCCAGTCAGCACAGTGGCCACAGGGGAGAATGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:840 NH:i:1
+chr5_479321_479820_0:0:0_0:0:0_34a 1123 chr5 479321 199 100M = 479721 500 TCGGTCAGCCCCGTGGAGAAGTAGATGGGCACCTTCAGGTTCATGCGCTCCCTGGGGACCACCGGCCCAGTCAGCACAGTGGCCACAGGGGAGAATGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:840 NH:i:1
chr5_479321_479820_0:0:0_0:0:0_34a 147 chr5 479721 199 100M = 479321 -500 TCCTTAAAGAGCCGTCCTGGCGGCACCTACCAGAAGGTCTCCAGGAGGATGCAGAGCTCCTGGGCGCGGCCCAGCGCGAACACAGGTATCAGCACCTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:840 NH:i:1
chr5_196169_196730_0:0:0_0:0:0_34b 83 chr5 196631 199 100M = 196169 -562 TGCTGAGTGTGAGATCAGCATGTGTGTGTGCAGTGCATGGTGCTGTGAGTGTATCAGCATGTCTGTGTGTGTGCAGTGCATGGTGCTGAGTGTGAGATCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:841 NH:i:1
chr5_196169_196730_0:0:0_0:0:0_34b 163 chr5 196169 199 100M = 196631 562 CAGCATGTGTGTGTGTGTGTGTGCAGCGCATGGTGCTGTGAGATCAGCATGTGTGTGTGTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCATGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:841 NH:i:1
chr5_915605_916043_0:0:0_0:0:0_34c 83 chr5 915944 199 100M = 915605 -439 GCAGACACCCTCCGTCTCACCCAGCCGGGCTCTCCGGAAGGTCCTCATCCCTGGGACCGAAGTCGCCCCACCCTGGGCCCCTCACCGAGGCCGAGGCGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:842 NH:i:1
chr5_915605_916043_0:0:0_0:0:0_34c 163 chr5 915605 199 100M = 915944 439 CGAGTGGGGGCAGATGGGCGTGATCATGATGGCCGGCACGTTGGGGTGGATCATGGAGGCCCCGGCCGCGGCCGCATACGCCGTGCTGCCCGTCGGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:842 NH:i:1
-chr5_515178_515724_0:0:0_0:0:0_34d 99 chr5 515178 199 100M = 515465 387 GGGGTCCAGGCAGCAGGCCGAGCGGATGCGGGGCGCGCGGGGGCGCGGGCGTGCACGGAGGCGTCGGAGCGCCCGGCGGCGGGAGGGATTTAGTCCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:843 NH:i:1
+chr5_515178_515724_0:0:0_0:0:0_34d 1123 chr5 515178 199 100M = 515465 387 GGGGTCCAGGCAGCAGGCCGAGCGGATGCGGGGCGCGCGGGGGCGCGGGCGTGCACGGAGGCGTCGGAGCGCCCGGCGGCGGGAGGGATTTAGTCCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:843 NH:i:1
chr5_515178_515724_0:0:0_0:0:0_34d 147 chr5 515465 199 100M = 515178 -387 GGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:843 NH:i:1
chr5_735631_736138_0:0:0_0:0:0_34e 83 chr5 736039 199 100M = 735631 -508 GACTGCCAAATTCTCCCGTTTGACTGGGGTATTATAGAAAATTATTTGTATGAATAATGAAAATAAGCCATCTCGTGGCAAAAAAAGGAAACTAATGATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:844 NH:i:1
chr5_735631_736138_0:0:0_0:0:0_34e 163 chr5 735631 199 100M = 736039 508 TGCTTTGGGGCGCTGAGGCAGGAGGATCGTTTTAGCCCAGATATTCAAGGGTGAAGTGGTGAGCTATGATCACGCCACTGCACTCCAGCCTTACTCTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:844 NH:i:1
-chr5_33785_34253_0:0:0_0:0:0_34f 99 chr5 33785 199 100M = 34154 469 AGGTTAGAACTGGTGGTCTAGAGAATTCATTTCATTCCAGAGAGAGAAAGAGAGGAATTTCTTGGGTTCCTTCAGGAATGCGTCTAGCTTTGCCTCATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:845 NH:i:1
+chr5_33785_34253_0:0:0_0:0:0_34f 1123 chr5 33785 199 100M = 34154 469 AGGTTAGAACTGGTGGTCTAGAGAATTCATTTCATTCCAGAGAGAGAAAGAGAGGAATTTCTTGGGTTCCTTCAGGAATGCGTCTAGCTTTGCCTCATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:845 NH:i:1
chr5_33785_34253_0:0:0_0:0:0_34f 147 chr5 34154 199 100M = 33785 -469 AGTTTTCCTCCCTCAGAACACAATCCCTAGAGACAACCTACCTCAGATGAGATATTGCCTAATTATTTTCAAAAGACAGTGAAACATCATGGATGTAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:845 NH:i:1
-chr5_58178_58740_0:0:0_0:0:0_350 99 chr5 58178 199 100M = 58641 563 GTATTAAAAAAAATCACAAAAACAAATATTCCTTTTTTTCCTGTGTTTATAGCTTTATAACCTTCATGCCAAACCCTAGCACCTTAAAATATCTAGCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:846 NH:i:1
+chr5_58178_58740_0:0:0_0:0:0_350 1123 chr5 58178 199 100M = 58641 563 GTATTAAAAAAAATCACAAAAACAAATATTCCTTTTTTTCCTGTGTTTATAGCTTTATAACCTTCATGCCAAACCCTAGCACCTTAAAATATCTAGCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:846 NH:i:1
chr5_58178_58740_0:0:0_0:0:0_350 147 chr5 58641 199 100M = 58178 -563 AGCCTGGGGAACAGAGTGAGACCTTAGAGAGAGACCTTGTCTCAAAAAGAGAAAAAAATAAAGAATTATTTCATTCTTTTGTTTTTCTTCAGCCAAGTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:846 NH:i:1
chr5_230116_230628_0:0:0_0:0:0_351 83 chr5 230349 199 100M = 230116 -333 GGGCCCTGGGGGGGTGTGGTGTGGTCTGCGGGGCCCTGGGGGGGTGTGGTGGGGTCTGCGGGGCCCTGGGGGGGTGTGGTGGGGTCTGCGGGGCCCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:847 NH:i:1
chr5_230116_230628_0:0:0_0:0:0_351 163 chr5 230116 199 100M = 230349 333 AGGGAAGACCCCCTGCCAGGGAGACCCCAGGCGCCTGAATGGCCACGGGAAGGAAAACCTACCAGCCCCTCCGTGTGTCCTCCTGGCACATGGCGACCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:847 NH:i:1
chr5_46173_46676_0:0:0_0:0:0_352 83 chr5 46577 199 100M = 46173 -504 TCTCTATTTCACTTATTTCCACTGCTATCTTTATCATTTTTAAAATTTTGCTAGCTTTTAGTTGTCCCTCTTTAATAGTCCCTCTTTTTCCCTCTGTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:848 NH:i:1
chr5_46173_46676_0:0:0_0:0:0_352 163 chr5 46173 199 100M = 46577 504 TTAATTCTGCTTTAAACGTTGGGTAGAATTCAACAGTGTAGCCATCTGGTCCAGGCTTTTCTTTGTTGCTGGGTTTTTTATTACTGATGCAATCTTCCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:848 NH:i:1
-chr5_241495_242040_0:0:0_0:0:0_353 99 chr5 241495 199 100M = 241643 248 TGGGAGGCTGAGGCTATGGGGACTCCATCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:849 NH:i:1
+chr5_241495_242040_0:0:0_0:0:0_353 1123 chr5 241495 199 100M = 241643 248 TGGGAGGCTGAGGCTATGGGGACTCCATCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:849 NH:i:1
chr5_241495_242040_0:0:0_0:0:0_353 147 chr5 241643 199 100M = 241495 -248 CAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:849 NH:i:1
chr5_783963_784529_0:0:0_0:0:0_354 83 chr5 784430 199 100M = 783963 -567 CAGTGGCAGGTCTTGGGTGACTGCTGGCCCTGGGGCAATGGTGAGAAAGCCAGGCAGGCAGCTGCAGGAAGGAGCTGAGGAGAAAGGCGGCAGAGCCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:850 NH:i:1
chr5_783963_784529_0:0:0_0:0:0_354 163 chr5 783963 199 100M = 784430 567 GGGTGTGCCAAGCGCCAGCCCCACATCACCTGCTCCCAGGCCTGCCCAGGGGATGGGTCCTGTGGCCAGTACCCTCGGGGTCAGCTTGACCCAGACCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:850 NH:i:1
-chr5_474754_475312_0:0:0_0:0:0_355 99 chr5 474754 199 100M = 475213 559 TGTGCCTTCCCGACTCCATCTGTCGCGGGCGGAGGCTGGAGATCTGGACAGACTTCCTTGTCTGGTCGGAGCTCGAGGGGGAAGGAGAGCCAATGTGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:851 NH:i:1
+chr5_474754_475312_0:0:0_0:0:0_355 1123 chr5 474754 199 100M = 475213 559 TGTGCCTTCCCGACTCCATCTGTCGCGGGCGGAGGCTGGAGATCTGGACAGACTTCCTTGTCTGGTCGGAGCTCGAGGGGGAAGGAGAGCCAATGTGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:851 NH:i:1
chr5_474754_475312_0:0:0_0:0:0_355 147 chr5 475213 199 100M = 474754 -559 AGCCCGGTGCCGAGCCCCGTGCGAACGCTGCGCCGGGTCTCCGTTTCCCCAGGCCAAGCCAGCCCCTTGGTCACCCCCGAGGAGAGCAGGTGAGGAAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:851 NH:i:1
chr5_483692_484112_0:0:0_0:0:0_356 83 chr5 484013 199 100M = 483692 -421 GTGGGAGATCACCTCAACCCGGGAGGCCGAGGCTGCAGTGAGCCAAGATCACACTCCAGCCTGGGTAACAAAGTGAGACACTGTCAAAAAAAAAAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:852 NH:i:1
chr5_483692_484112_0:0:0_0:0:0_356 163 chr5 483692 199 100M = 484013 421 ATCCCAAGTAAAAAGTAGAAAAAATGCAGGAAGCCCAACAAAAAGGATAAATATATAAATAAAACTAAGCAAATGTTGGTGTTACCAGGCATTAGAAATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:852 NH:i:1
-chr5_356417_356900_0:0:0_0:0:0_357 99 chr5 356417 199 100M = 356801 484 AGGCACCTGCCACCACGCCTGGCTAATTTTGTTTGTATTATTAGTAGAGATGGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:853 NH:i:1
+chr5_356417_356900_0:0:0_0:0:0_357 1123 chr5 356417 199 100M = 356801 484 AGGCACCTGCCACCACGCCTGGCTAATTTTGTTTGTATTATTAGTAGAGATGGGGGTTTCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:853 NH:i:1
chr5_356417_356900_0:0:0_0:0:0_357 147 chr5 356801 199 100M = 356417 -484 GGAGGCTGAGATGGAAACATTGCTTAAGCCCAGGAGTTTGAGTCCAGCCTGGGCAACACAGCAAAACCCCATCTTTAAAAAAACAACAAACTTACTATTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:853 NH:i:1
chr5_126611_127104_0:0:0_0:0:0_358 83 chr5 127005 199 100M = 126611 -494 AAGTACCCGCCTGGGCGGCGCTGGGGGCTCCGTGGGTCCCTCGGGTCAGCTCGTGTAACCCGCTGTCCCCGCAGATGAGATGAGCGAGAGCCGCCAGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:854 NH:i:1
chr5_126611_127104_0:0:0_0:0:0_358 163 chr5 126611 199 100M = 127005 494 GTCAGGCGAGGGCTGCCGGCGCCCCCGTCGCACCAGGGGCTGGGTCCCCGCGGGCTGCCCGGGCCCCCCAGCGGCTCCAGGGCGGGCGGGCGGCTCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:854 NH:i:1
chr5_621235_621689_0:0:0_0:0:0_359 83 chr5 621590 199 100M = 621235 -455 AGCCTCCACACTCCGGGTGGAGTGTGCAGGCTTTGCAGAGGCGGAGGGAACATCTGTTCTGTCTCCCCTCACTCTTCTTGTCCAGAAACTCGCCCTGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:855 NH:i:1
chr5_621235_621689_0:0:0_0:0:0_359 163 chr5 621235 199 100M = 621590 455 CGGCAGCCCAGCCTGGAAGCACGGGTGCGCGACATCGCCATAATGACAAGGAACATCAAGAAGAACCGGGGCCTGTACAGGCACATCCTGCTGTACGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:855 NH:i:1
-chr5_178669_179167_0:0:0_0:0:0_35a 99 chr5 178669 199 100M = 179068 499 GTGCATCCTGTGAAGGATCTGGAATGCGCGATATTTAGGTGTTTCCAGGGTGTTGGGTGGGGGTGGGGATGCCGTCCGCTGTCCGGAGTCCCCGCCACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:856 NH:i:1
+chr5_178669_179167_0:0:0_0:0:0_35a 1123 chr5 178669 199 100M = 179068 499 GTGCATCCTGTGAAGGATCTGGAATGCGCGATATTTAGGTGTTTCCAGGGTGTTGGGTGGGGGTGGGGATGCCGTCCGCTGTCCGGAGTCCCCGCCACTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:856 NH:i:1
chr5_178669_179167_0:0:0_0:0:0_35a 147 chr5 179068 199 100M = 178669 -499 CAGCCAATTTTCGTCTCCCTCCCCCAGCCAAGGTCTCCCAGGGGTGCAGGGAGAGCGGAGCTGCTCAGAGCTTGGCCAGGTTCTAAGTGTGCTCCTGAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:856 NH:i:1
-chr5_816140_816645_0:0:0_0:0:0_35b 99 chr5 816140 199 100M = 816546 506 GGTATCTTTCAGCATTCTACTTTATTTGTGCAAAATCTTTTTTTTCCCTTTTTTTTTTTTAGAGGCGGGGTCTTGCTCTGTCGCCCAGGCTGGGGTGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:857 NH:i:1
+chr5_816140_816645_0:0:0_0:0:0_35b 1123 chr5 816140 199 100M = 816546 506 GGTATCTTTCAGCATTCTACTTTATTTGTGCAAAATCTTTTTTTTCCCTTTTTTTTTTTTAGAGGCGGGGTCTTGCTCTGTCGCCCAGGCTGGGGTGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:857 NH:i:1
chr5_816140_816645_0:0:0_0:0:0_35b 147 chr5 816546 199 100M = 816140 -506 GCTGATCAGCCCGCCTTGGCCTCCGAAAGTGCTGAGATTACAAGTGTGAGCCACCACACCTGGCCTGTGCAAAATCCTATATAGTATATTTTATTAATCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:857 NH:i:1
chr5_412144_412590_0:0:0_0:0:0_35c 83 chr5 412491 199 100M = 412144 -447 AGACGGGGCGGAGACCCCCACTGACCCCGGCCGCTGCTTCTTGCCTCCCGTCCCCAGACCCCACCCTGCCCCCATTGCTTCCCCAAAGGTCCACCTCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:858 NH:i:1
chr5_412144_412590_0:0:0_0:0:0_35c 163 chr5 412144 199 100M = 412491 447 TCCGTCCCGAGGCGGCTCAGCGCGGCGAGTCTCGGCGCCAGGGCGCAGTCATGGGGACGGCCGGCGCTCAGAGCCCCCGCGCGGGGGCGAGGCCCAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:858 NH:i:1
chr5_471786_472320_0:0:0_0:0:0_35d 83 chr5 472221 199 100M = 471786 -535 TGCACAGACACGCATCAACATGTGTACCTAGTGATGGAACACGCGTGTACAAGGTGATGTGTCTATTGCTGACATGGCTAACACAGTACGTGAACCCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:859 NH:i:1
chr5_471786_472320_0:0:0_0:0:0_35d 163 chr5 471786 199 100M = 472221 535 CCAGGGAGAAGGTGGGTGCTTCAGCCAGGGTCCAGCCCCCAGCAGGGAGGCCATGCCCACACCTCCCCACAGCCCGGGTCCTCAATGCAGGATCCTAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:859 NH:i:1
-chr5_890249_890743_0:0:0_0:0:0_35e 99 chr5 890249 199 100M = 890644 495 CACACCCAGTGGAAGTAACCACACCCGGTGTGTTCCTAGAAGCTCATCTGTGACAGTTCAACAAGAACTTACTATTCTAGAAAAGTATTACACAAAGTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:860 NH:i:1
+chr5_890249_890743_0:0:0_0:0:0_35e 1123 chr5 890249 199 100M = 890644 495 CACACCCAGTGGAAGTAACCACACCCGGTGTGTTCCTAGAAGCTCATCTGTGACAGTTCAACAAGAACTTACTATTCTAGAAAAGTATTACACAAAGTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:860 NH:i:1
chr5_890249_890743_0:0:0_0:0:0_35e 147 chr5 890644 199 100M = 890249 -495 CAAACAGAAAATCCAAACAGGATGGCAGCTCCTTGTGAGGGTGGAGGGGAGGGCACCAGATTCTGTGCGGCTGGAAATTCCAAGGTGCTCAGAACCAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:860 NH:i:1
chr5_294773_295184_0:0:0_0:0:0_35f 83 chr5 295085 199 100M = 294773 -412 GAGCTGCACTCTGCGTGGCCCTGGGATGGGAAAATCCAGGGAAACAGACACTGTTATGGATGTGAGCATCGGCTGCCAGAACGAGAGGAAGCAGAAGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:861 NH:i:1
chr5_294773_295184_0:0:0_0:0:0_35f 163 chr5 294773 199 100M = 295085 412 CAGAGGGGGAGAGAGACTGAGAAGCCATGATTTACATTTCCTGGCAGAGAGCTCTTGCAGGGAAACCAAATTTATTCCAGTTGACAAGGTGCGTGGGCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:861 NH:i:1
@@ -931,27 +931,27 @@ chr5_683182_683599_0:0:0_0:0:0_360 83 chr5 683500 199 100M = 683182 -418 CCCACTT
chr5_683182_683599_0:0:0_0:0:0_360 163 chr5 683182 199 100M = 683500 418 ACAAGCTGGCCCGGCAGCGCTACGAGGACCAACTGAAGCAGCAGGTGAGCTCAGCCTCCCCTGCGAGGCGCCTGCGTCCCTGAGAACGTAGGTGGCTTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:862 NH:i:1
chr5_24196_24706_0:0:0_0:0:0_361 83 chr5 24607 199 100M = 24196 -511 AAGCACATTCTTATTATTAAAATTTTAGAAAGTACAGAGAAGGAAATTTTAATAACCTACACCTAGAACACTAAGATACTGGTAACGTAGAAAAATGTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:863 NH:i:1
chr5_24196_24706_0:0:0_0:0:0_361 163 chr5 24196 199 100M = 24607 511 TGGTGAGCCAGGTGAGTGTGGAGCCGCCCAGGGCACCGTGCTGTGCGATAGCTCAGGCATCGGGTGCTTCTGCCAAACCCTTCAGCAGCTGGTTTGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:863 NH:i:1
-chr5_165942_166423_0:0:0_0:0:0_362 99 chr5 165942 199 100M = 166324 482 TCGGGAAGGAAATTCCCCCAGTGGCGCAGGGTCCGGCGGCGCCGAGGGGTGGGCGAGCCTCGGTCTCGAGCCTCTTGGCTTCCTCCGCCCGTCCCCACTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:864 NH:i:1
+chr5_165942_166423_0:0:0_0:0:0_362 1123 chr5 165942 199 100M = 166324 482 TCGGGAAGGAAATTCCCCCAGTGGCGCAGGGTCCGGCGGCGCCGAGGGGTGGGCGAGCCTCGGTCTCGAGCCTCTTGGCTTCCTCCGCCCGTCCCCACTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:864 NH:i:1
chr5_165942_166423_0:0:0_0:0:0_362 147 chr5 166324 199 100M = 165942 -482 TGACGTCACCAGGCACCCGGGGCGATGACGCAAGCCTGCTGGGATGACGTCATGTGGCCCGTGTCGCCACGTGCTTCCAGGTGACCCGCTGGCGAACTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:864 NH:i:1
-chr5_565250_565652_0:0:0_0:0:0_363 99 chr5 565250 199 100M = 565553 403 ACGCGCATGCCCGGTGTCCCCCGGCCGCCGTCGGCGTCGTGTCTTCAGTCCTTATCGACTCCAGACTTTGGGGATGCGTATTAACGCCAGACTCCTGCAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:865 NH:i:1
+chr5_565250_565652_0:0:0_0:0:0_363 1123 chr5 565250 199 100M = 565553 403 ACGCGCATGCCCGGTGTCCCCCGGCCGCCGTCGGCGTCGTGTCTTCAGTCCTTATCGACTCCAGACTTTGGGGATGCGTATTAACGCCAGACTCCTGCAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:865 NH:i:1
chr5_565250_565652_0:0:0_0:0:0_363 147 chr5 565553 199 100M = 565250 -403 ACGGCGAGGGGGTTCGTGCGTTTGTAACGTCTTGACATCGCTGATCCTCCCGCAGGCGGTCTCACGCCCTGCCCGTCCTGGGTTCACGGTTTTTCATCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:865 NH:i:1
chr5_346556_347167_0:0:0_0:0:0_364 97 chr5 346556 199 100M = 347068 612 TCCTTTAAAGGGCCAAACCCCTGTCCCATAAACCCACCCTCTTCCTCGTGGCTGAACGGGGAAGATCCCACACGTCCCCAACCCACTGGGCCTGACACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:866 NH:i:1
chr5_346556_347167_0:0:0_0:0:0_364 145 chr5 347068 199 100M = 346556 -612 CCTCCTTCCACCACAACTCACAGTCCGCCCACTCACCACCCATGCCCCATCCCCCAGTCCCCACCCGCTCCAAGCACCATGAGCTGGCCCCTCAGTCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:866 NH:i:1
-chr5_923309_923815_0:0:0_0:0:0_365 99 chr5 923309 199 100M = 923716 507 CATGAAGTGCTAGAATCTTCCTCAACACAGCGATCCCACAACTCCACACACATCCCGAGGACTCCCCCATCCCATGGCCCCCGGCACTCACATGATGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:867 NH:i:1
+chr5_923309_923815_0:0:0_0:0:0_365 1123 chr5 923309 199 100M = 923716 507 CATGAAGTGCTAGAATCTTCCTCAACACAGCGATCCCACAACTCCACACACATCCCGAGGACTCCCCCATCCCATGGCCCCCGGCACTCACATGATGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:867 NH:i:1
chr5_923309_923815_0:0:0_0:0:0_365 147 chr5 923716 199 100M = 923309 -507 GGCTGCACTGGAGCGGCACCTGTGAGGAGCACGCATGCCCACGCGCCGCTCAGATCATGACCCAAGCCGGGGAGAGCTTCAGCCCAAACAAGGAAAATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:867 NH:i:1
-chr5_302275_302675_0:0:0_0:0:0_366 99 chr5 302275 199 100M = 302576 401 GGGGGGAGGCAGCAGGAGGGGCGGCCGGGTGGTCAGTTTGAGGACCTGGCCCCACGTGCCTTTGTGGGATGGTGGGGACCCCACACCCCCGCTGGCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:868 NH:i:1
+chr5_302275_302675_0:0:0_0:0:0_366 1123 chr5 302275 199 100M = 302576 401 GGGGGGAGGCAGCAGGAGGGGCGGCCGGGTGGTCAGTTTGAGGACCTGGCCCCACGTGCCTTTGTGGGATGGTGGGGACCCCACACCCCCGCTGGCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:868 NH:i:1
chr5_302275_302675_0:0:0_0:0:0_366 147 chr5 302576 199 100M = 302275 -401 GGCGGTGCCGGGAGGGCGGGTCCTGGGGGTCCCCCTGCCCCCTCTGCGGTGCGTTGGGAGCTCCCGTCCTCCTGCGCACACTCAGGGAAAGACCGGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:868 NH:i:1
chr5_536452_536974_0:0:0_0:0:0_367 83 chr5 536875 199 100M = 536452 -523 AACCCCAAAATTGCTTCCCAAGGTTGTGGTTGGGCAACAAAGGCTTCCAAGCCAAGGCAGGGTGAGCTCTGCAGGCTGGGCAGGATGTGGGTCTGCAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:869 NH:i:1
chr5_536452_536974_0:0:0_0:0:0_367 163 chr5 536452 199 100M = 536875 523 CGCAGAAGACACCAGCTTGCCAGGGACGCAGCAGCCAGAGGAAGAGGCGTGGGGCTGAGTTCACCAACGTGCCCCATTATGGAGGCCACTGATGAATGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:869 NH:i:1
-chr5_609473_609936_0:0:0_0:0:0_368 99 chr5 609473 199 100M = 609837 464 GGTCTTCCTGTTACCCAGGCTGTTCTCAAACTCCTGACCACAAAGGATTCTCTTGCCTTGGCCTCCCTCAGTGCTGGGACTGCAGACAGGAGCCACCGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:870 NH:i:1
+chr5_609473_609936_0:0:0_0:0:0_368 1123 chr5 609473 199 100M = 609837 464 GGTCTTCCTGTTACCCAGGCTGTTCTCAAACTCCTGACCACAAAGGATTCTCTTGCCTTGGCCTCCCTCAGTGCTGGGACTGCAGACAGGAGCCACCGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:870 NH:i:1
chr5_609473_609936_0:0:0_0:0:0_368 147 chr5 609837 199 100M = 609473 -464 CCAGAACGAGTGGGGAAGGAGCAGGGGGCCCATGCTGGCCCAGGTTGAGACCAGAACTACTGGGTCATTTTGGAAGCTGGAGAGAAGGAATAGGAATGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:870 NH:i:1
chr5_603119_603608_0:0:0_0:0:0_369 83 chr5 603509 199 100M = 603119 -490 CCCAGCCAAGCCCTGAAGGAGCTGGGGCTGGAGATGAGGAACCGGCCACTGGCTCAGCCTGCTGCAGCCACTGAAAAATGCCAACCCTGGGGATCTCACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:871 NH:i:1
chr5_603119_603608_0:0:0_0:0:0_369 163 chr5 603119 199 100M = 603509 490 CGTGTACCCTGCTGAGACGGGGGCAGGCTTGAGTAGCCACCTCCAGGTGTAGCTCCCTGCTGATGTGTCCAGCCCAGACCTCGAGGCCCCAGGGAGTGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:871 NH:i:1
chr5_787139_787632_0:0:0_0:0:0_36a 83 chr5 787533 199 100M = 787139 -494 TGTGTCCTGGAGTTGGCAGGGCCCCCTCCCATCCTCAGCCACATCCACCCAGTCTCAGGGGTCCCTCCAGCCTCCTGGGACTGCCGTGTCCCCCACCTCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:872 NH:i:1
chr5_787139_787632_0:0:0_0:0:0_36a 163 chr5 787139 199 100M = 787533 494 GTCCTGGGCAGGGTGAGGAGGGAGCATTGGAGAGGCCAAGCTGGCCAGGAAGTTTGTGCCCCTGAGAATTAGAGGGAGTCCTGTGCTCCAACTCGCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:872 NH:i:1
-chr5_703565_704074_0:0:0_0:0:0_36b 99 chr5 703565 199 100M = 703975 510 CATCTTGACATGTCCCTGAGCACACGGCACTGTATATTGTGACATACATGTACTTTGCATACACATATATGTTGTCAATATACAAGGGCACTCTGTACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:873 NH:i:1
+chr5_703565_704074_0:0:0_0:0:0_36b 1123 chr5 703565 199 100M = 703975 510 CATCTTGACATGTCCCTGAGCACACGGCACTGTATATTGTGACATACATGTACTTTGCATACACATATATGTTGTCAATATACAAGGGCACTCTGTACGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:873 NH:i:1
chr5_703565_704074_0:0:0_0:0:0_36b 147 chr5 703975 199 100M = 703565 -510 CAGCCTCCGGGAGGCAGTGGAGCCGACCTCAGGCCTTTGCTCAGCAGTCGGACCTGCCAGCCAGGGACACGGGGCTCTCTGGTAGCATGTGGCCACCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:873 NH:i:1
-chr5_784725_785184_0:0:0_0:0:0_36c 99 chr5 784725 199 100M = 785085 460 TGAGGTAGGAGAATGGCGTGAACTTGGGAGGCGGAGCTTGCAGTGAGCTGAGATTGCGCCACTGCGCTCCAGCCTGGGCGACAGAGCGAGATTACGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:874 NH:i:1
+chr5_784725_785184_0:0:0_0:0:0_36c 1123 chr5 784725 199 100M = 785085 460 TGAGGTAGGAGAATGGCGTGAACTTGGGAGGCGGAGCTTGCAGTGAGCTGAGATTGCGCCACTGCGCTCCAGCCTGGGCGACAGAGCGAGATTACGTCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:874 NH:i:1
chr5_784725_785184_0:0:0_0:0:0_36c 147 chr5 785085 199 100M = 784725 -460 GGGTCTACAGGCCCCTGTGTGCTCACATCCCAGCCCCCGGGGAGAGGTGGGAGTGGGTGGGCGCACCCAGCTCCCCTGTCCAGGCTCGTCCCCCTTGTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:874 NH:i:1
chr5_571553_572016_0:0:0_0:0:0_36d 83 chr5 571917 199 100M = 571553 -464 AAATGCGGGGGGTTCCGGGTGGGCAGAGCTTGGCAGCAGGCGTGGTCTCTTCGAGGAGCACCACGGCAGCAGATAGTAGCCCTGGAGGGTTGGCAAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:875 NH:i:1
chr5_571553_572016_0:0:0_0:0:0_36d 163 chr5 571553 199 100M = 571917 464 CCCAGCACTTTGGGAGGGCAAGGCAGGCAGATCATTTCAGATCTGGAGTTCGAGACCAGCCTGACCAACATGGCGGAACCCGGTCTCTACTAAAAATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:875 NH:i:1
@@ -959,7 +959,7 @@ chr5_209799_210233_0:0:0_0:0:0_36e 83 chr5 210134 199 100M = 209799 -435 TGCGGGG
chr5_209799_210233_0:0:0_0:0:0_36e 163 chr5 209799 199 100M = 210134 435 GGGGTCACCTACAGCACCGAGTGTGAGCTGAAGAAGGCCAGGTGTGAGTCACAGCGAGGGCTCTACGTAGCGGCCCAGGGAGCCTGCCGAGGTGAGCCGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:876 NH:i:1
chr5_544321_544843_0:0:0_0:0:0_36f 83 chr5 544744 199 100M = 544321 -523 CAGGAGGCTGAGACAGGAGGATCACTTAAACACAGGAGGCAGAGTGAGCCAAGGCCATGCCACGGTGAGTGATGAAGGAAAAGCTGTATAATGGCTACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:877 NH:i:1
chr5_544321_544843_0:0:0_0:0:0_36f 163 chr5 544321 199 100M = 544744 523 TGTGCAAGATTAGCAAATGTCTACCCTAATTTTAGACCCACTCCTAAAAACTCACTCTTGGGAGCTATGTTTGGAATGGGGACCCTCTTCCTCTGGTATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:877 NH:i:1
-chr5_976353_976893_0:0:0_0:0:0_370 99 chr5 976353 199 100M = 976794 541 GGCTCACGCCTGTAATCCCATCACTTTGGGAGGCCGAGGCGGGTGGATCGTGAAGTCAGGAGATCCAGACCATCCTGGCTAACATGGTGAAACCCCATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:878 NH:i:1
+chr5_976353_976893_0:0:0_0:0:0_370 1123 chr5 976353 199 100M = 976794 541 GGCTCACGCCTGTAATCCCATCACTTTGGGAGGCCGAGGCGGGTGGATCGTGAAGTCAGGAGATCCAGACCATCCTGGCTAACATGGTGAAACCCCATCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:878 NH:i:1
chr5_976353_976893_0:0:0_0:0:0_370 147 chr5 976794 199 100M = 976353 -541 AATGCTGTTCTAAGGCTCAATACACTTAATCCCTCAGGATCCTCGGGGCAGGTTCTGTGATTTCCTGAGCCAGGGCACTGACATCACAAGAGGGCGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:878 NH:i:1
chr5_271377_271898_0:0:0_0:0:0_371 83 chr5 271799 199 100M = 271377 -522 AAAAAAACAGAAATAAATGTTGGCAAACGCATGTGGAGAGAATGGAACCCTTGTGCATGCTGCTGAGAATATAAAATGGTGTGAAAAACAGTATGTCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:879 NH:i:1
chr5_271377_271898_0:0:0_0:0:0_371 163 chr5 271377 199 100M = 271799 522 TAAGTGTGTTGTAGAATTTTCTAGCATACCTAGAAGTGAAATGCGTGCCGACAAGAGCACGGAATATAACTGGAGTCCGCTCGCTGAGGTTCTGTGAAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:879 NH:i:1
@@ -969,23 +969,23 @@ chr5_961424_961967_0:0:0_0:0:0_373 83 chr5 961868 199 100M = 961424 -544 CTACAAC
chr5_961424_961967_0:0:0_0:0:0_373 163 chr5 961424 199 100M = 961868 544 CTACACACACGGCTGACCAGATATCCGACACCTGAAGTGCACAGGCATGCAGCCCAGCCAGCGCAGAAAAGCTGAACAGAGGTTTCTGCTGCTGCCACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:881 NH:i:1
chr5_579015_579478_0:0:0_0:0:0_374 83 chr5 579379 199 100M = 579015 -464 TAGAGACTGGGCTCCATGTTCGGCTAATTGCAAAAACAAAGTTTTAGTTTTTGCTGGAATCTCAGGTACTGACACATTTAGTTCATCATAGAAAGTCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:882 NH:i:1
chr5_579015_579478_0:0:0_0:0:0_374 163 chr5 579015 199 100M = 579379 464 TGGGGGCCGTCCAGTCCCGGTGGAGTTCTGGGTAGGCCCAAACAGTCTGCAACTTTGGAAATTTACTGAATGGATTTCTTTCTGTGTAATTGGAACTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:882 NH:i:1
-chr5_532039_532608_0:0:0_0:0:0_375 99 chr5 532039 199 100M = 532509 570 TTAGTGTGAGAACGAGTGTGAAATGAGTGTAAATGAGTGGGAATTAGTGAATTGGTGTGAAGTGTGGATGAGTGTGGATGGGTGTTAATTGGTGAGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:883 NH:i:1
+chr5_532039_532608_0:0:0_0:0:0_375 1123 chr5 532039 199 100M = 532509 570 TTAGTGTGAGAACGAGTGTGAAATGAGTGTAAATGAGTGGGAATTAGTGAATTGGTGTGAAGTGTGGATGAGTGTGGATGGGTGTTAATTGGTGAGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:883 NH:i:1
chr5_532039_532608_0:0:0_0:0:0_375 147 chr5 532509 199 100M = 532039 -570 TGGATTGGTGAGTGAATTGGTGAGTTGAATTGGTGTGTGTAGTGTGGATGAGTGTGAATTGGCGAGTGTGGATGAGTGTGAATTGGTGAGTGTGTGAATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:883 NH:i:1
-chr5_472161_472694_0:0:0_0:0:0_376 99 chr5 472161 199 100M = 472595 534 CATGCACAAACATGCCTGCACACACTGGTACCACACATACATGTGTGTTCAAGGAGACACTGCACAGACACGCATCAACATGTGTACCTAGTGATGGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:884 NH:i:1
+chr5_472161_472694_0:0:0_0:0:0_376 1123 chr5 472161 199 100M = 472595 534 CATGCACAAACATGCCTGCACACACTGGTACCACACATACATGTGTGTTCAAGGAGACACTGCACAGACACGCATCAACATGTGTACCTAGTGATGGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:884 NH:i:1
chr5_472161_472694_0:0:0_0:0:0_376 147 chr5 472595 199 100M = 472161 -534 TTGTGCACACGTCTGTGAACATGTAGTTCACGCAGGTGTACACGCCTGCACGCCACGAATGATGGAAAACATGCAGACTCGGTGTGCACACAACCCCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:884 NH:i:1
-chr5_825491_826021_0:0:0_0:0:0_377 99 chr5 825491 199 100M = 825922 531 GGAGTTCAAGACCAGCCTGGGCAACAAAGCGAGAGCCCAGCTCAACAAAAAAATAGCCAGGCATGGTGGCACGTGCCTGTGGTTCCAGCCACATGGGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:885 NH:i:1
+chr5_825491_826021_0:0:0_0:0:0_377 1123 chr5 825491 199 100M = 825922 531 GGAGTTCAAGACCAGCCTGGGCAACAAAGCGAGAGCCCAGCTCAACAAAAAAATAGCCAGGCATGGTGGCACGTGCCTGTGGTTCCAGCCACATGGGAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:885 NH:i:1
chr5_825491_826021_0:0:0_0:0:0_377 147 chr5 825922 199 100M = 825491 -531 ACTCAGGACCACTGGCGCCCCATGCTGCCTCTGACAAGCCCTGGAGCTCTGGGTCTCAAAGGCTGGCTGGCAACAGGCTGCACCGGGCATGGGAATCCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:885 NH:i:1
chr5_767729_768198_0:0:0_0:0:0_378 83 chr5 768099 199 100M = 767729 -470 GCGGTGGCCTTTCACGCGCACACGCTTCCCCACCTGGAGCGCCGTCTCCAATGCTGTTTGTACACACACCAGCATTCTGACAGTCACTAAATGTTTTCTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:886 NH:i:1
chr5_767729_768198_0:0:0_0:0:0_378 163 chr5 767729 199 100M = 768099 470 CCCGGCGTGTCGTGGCTGCAGACCCTGCAGACCCCTATGAAGATGGTCCTGCCTGCCTTGCATCGGGCCTCTAGCTAGGGACTGTGGTTGCAGACGTAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:886 NH:i:1
-chr5_660858_661377_0:0:0_0:0:0_379 99 chr5 660858 199 100M = 661278 520 CACCTCGGGGCCCTGGCGTGCATTAAGGGTGGCGGGTTCCCATAGCGGCCTCCCTCAGCTCCCTCTCTCTTCACTAGGCCACGGCATATGCCTCCAAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:887 NH:i:1
+chr5_660858_661377_0:0:0_0:0:0_379 1123 chr5 660858 199 100M = 661278 520 CACCTCGGGGCCCTGGCGTGCATTAAGGGTGGCGGGTTCCCATAGCGGCCTCCCTCAGCTCCCTCTCTCTTCACTAGGCCACGGCATATGCCTCCAAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:887 NH:i:1
chr5_660858_661377_0:0:0_0:0:0_379 147 chr5 661278 199 100M = 660858 -520 GGGAGGGGCAGGCCTCCTTCCTGCCCCTCGAGACACTCTTGGGAGATGCATTTTCCGTCTGGCTCACAGGGGGAGGGTGAGGCTTTGTACCCCAGCCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:887 NH:i:1
chr5_438461_439004_0:0:0_0:0:0_37a 83 chr5 438905 199 100M = 438461 -544 GACATGACCCTGACCCGGAGCTCCGGGAGCCCGCCGTAACCGACCTCGGGTTCCGCGCAGGCAGGCTCGGGCCGGGGCCCCGCGGGCGGCGGGCGCGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:888 NH:i:1
chr5_438461_439004_0:0:0_0:0:0_37a 163 chr5 438461 199 100M = 438905 544 AAGAGGATGGCAACCGGACTAGAGTTAAATACACACGCACTAAGTGGCAGGCACTGAAAAGGACGCCAGGAGGTGACACCTGGCCAGGGGCTGAAAGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:888 NH:i:1
-chr5_156821_157203_0:0:0_0:0:0_37b 99 chr5 156821 199 100M = 157104 383 AGACAGGGTCTCACTATGTTGCTCAGGCTTGTCTTGAACTCTTGGGCTCAAGTGATCCTCCCGCCTTGGCCTCCCAAAGTGCTGGGATCACGTGCAGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:889 NH:i:1
+chr5_156821_157203_0:0:0_0:0:0_37b 1123 chr5 156821 199 100M = 157104 383 AGACAGGGTCTCACTATGTTGCTCAGGCTTGTCTTGAACTCTTGGGCTCAAGTGATCCTCCCGCCTTGGCCTCCCAAAGTGCTGGGATCACGTGCAGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:889 NH:i:1
chr5_156821_157203_0:0:0_0:0:0_37b 147 chr5 157104 199 100M = 156821 -383 AGTAGCTGGGATTACAGGCATGCACCACCATGCCTGGCTAATTTTGTATTCTTAGTAGAGACAGGGATCACCATGTTGGTCAGGCTGGTCTCGAACTCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:889 NH:i:1
-chr5_199892_200377_0:0:0_0:0:0_37c 99 chr5 199892 199 100M = 200278 486 GTCCGCCGCCCCAGGGGTCCCAGGATGGGGCGAGCAGCCCTGAAGAGGCCCAAGGGCACCCCGTGAGGAGCCCCCACGCTCTGAGAGTGGGGCGCAGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:890 NH:i:1
+chr5_199892_200377_0:0:0_0:0:0_37c 1123 chr5 199892 199 100M = 200278 486 GTCCGCCGCCCCAGGGGTCCCAGGATGGGGCGAGCAGCCCTGAAGAGGCCCAAGGGCACCCCGTGAGGAGCCCCCACGCTCTGAGAGTGGGGCGCAGAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:890 NH:i:1
chr5_199892_200377_0:0:0_0:0:0_37c 147 chr5 200278 199 100M = 199892 -486 ACTCTGCGGACCCCTCGGTCCCTGGAGGCCGTCCTTGGGCTCTCAGTGGGCCTGGCCCTCACCCAAAGCTGCAGAAACACTTTCTGCGGGAGCTGGGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:890 NH:i:1
-chr5_262009_262488_0:0:0_0:0:0_37d 99 chr5 262009 199 100M = 262389 480 TATTTGCAGGAGTTTTCTTGGTAGGTAGGTTCTCCGGTAATTAATTCAATTTCCTGAACAGAAAGAGGACCATGCAGATTTCCATTGCTTTTGACAAGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:891 NH:i:1
+chr5_262009_262488_0:0:0_0:0:0_37d 1123 chr5 262009 199 100M = 262389 480 TATTTGCAGGAGTTTTCTTGGTAGGTAGGTTCTCCGGTAATTAATTCAATTTCCTGAACAGAAAGAGGACCATGCAGATTTCCATTGCTTTTGACAAGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:891 NH:i:1
chr5_262009_262488_0:0:0_0:0:0_37d 147 chr5 262389 199 100M = 262009 -480 AGCATGATGTTGCTGGTCTTGAACTCCTGGGCTCAAACAATCCTCCTGCCTCGGTCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACCAGAGATGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:891 NH:i:1
chr5_794073_794603_0:0:0_0:0:0_37e 83 chr5 794504 199 100M = 794073 -531 GGGCTGCCAGGTGCCAGGAGACGCCTCCCTCGGGCCTGCCCCGGCGCCCGCCCTCACCGGCGTCTGTCCTGCCGCCCAGCTACAGGCCTCGGGCCTCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:892 NH:i:1
chr5_794073_794603_0:0:0_0:0:0_37e 163 chr5 794073 199 100M = 794504 531 GGACACAGCCCTGCACGTGGCGCTGCAGCGTCATCAGCTGCTGCCCCTGGTGGCTGATGGGGCCGGGGGGGACCCAGGGCCCTTGCAGCTGCTGTCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:892 NH:i:1
@@ -993,9 +993,9 @@ chr5_325596_326126_0:0:0_0:0:0_37f 83 chr5 326027 199 100M = 325596 -531 GAGCAGC
chr5_325596_326126_0:0:0_0:0:0_37f 163 chr5 325596 199 100M = 326027 531 TTGCTCCAAGCGTTTCGTCTGCGGGTGGAGCTTTGGAACTGATGGAGAAGGCAGAGTCCTGACCAGGGGCCGCCATCAGAGCGCAGGGACGGTCGGGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:893 NH:i:1
chr5_394649_395105_0:0:0_0:0:0_380 83 chr5 395006 199 100M = 394649 -457 TCCATCTCTCTCTCGAGCTGTCAGCACCTCCAGGGGCTAACCTGTCCCCATCGCCCAACACCTGCAGCACAGCTTTCCTGTGGGGCCCGGCTCTGTCCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:894 NH:i:1
chr5_394649_395105_0:0:0_0:0:0_380 163 chr5 394649 199 100M = 395006 457 TTTATTTTTAGAAACAGGGTCTTGCTCTGTCACCCAGGCTGGAGTACAGCGGTGACCACAGCTCACCGTGACCACGAACTCCTGGGCTCAGGTGATGCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:894 NH:i:1
-chr5_716986_717451_0:0:0_0:0:0_381 99 chr5 716986 199 100M = 717352 466 CCAAATGCTACCTGTACACTCAGATTCCCAGAGCCCATCCCCACTGGTCAGGGAGGGGGAGGCCTGACAGCTGCTGCACTGGGCGCTGCTTCCCGAGCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:895 NH:i:1
+chr5_716986_717451_0:0:0_0:0:0_381 1123 chr5 716986 199 100M = 717352 466 CCAAATGCTACCTGTACACTCAGATTCCCAGAGCCCATCCCCACTGGTCAGGGAGGGGGAGGCCTGACAGCTGCTGCACTGGGCGCTGCTTCCCGAGCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:895 NH:i:1
chr5_716986_717451_0:0:0_0:0:0_381 147 chr5 717352 199 100M = 716986 -466 GTGACTCAGATCCTGTCCATAATGCAACAGAGCACAGCAAAGTCAGCCCCTAAACTCATCAGGAACCTTCCAATTCAGGGGATCCCCAAGCCGGCACCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:895 NH:i:1
-chr5_26446_26983_0:0:0_0:0:0_382 99 chr5 26446 199 100M = 26884 538 GGGGGGGTTAATCTTTTAACCTCAGGCCTGATCATCAGGGGCACCAGCTGGTCTTGCCACTGACTTCATTCCTGTTGTTTTTCAACTTTTACTTCCTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:896 NH:i:1
+chr5_26446_26983_0:0:0_0:0:0_382 1123 chr5 26446 199 100M = 26884 538 GGGGGGGTTAATCTTTTAACCTCAGGCCTGATCATCAGGGGCACCAGCTGGTCTTGCCACTGACTTCATTCCTGTTGTTTTTCAACTTTTACTTCCTCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:896 NH:i:1
chr5_26446_26983_0:0:0_0:0:0_382 147 chr5 26884 199 100M = 26446 -538 TGAGGAGAATTTATTAGAGTAAATGACTGTTTTCACCTGTCATGTCACATTAAAATGTGGAGAGAACTGGTTGTAGGGGCAGTCAAACTGGCTGTGAGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:896 NH:i:1
chr5_4441_5017_0:0:0_0:0:0_383 83 chr5 4918 199 100M = 4441 -577 CACACCTAGACACACACACCTGGACAAACACACCTGGACACACACACCTAGACACACACACCTGGACACACACACGTAGACACACACACCTAGAGACACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:897 NH:i:1
chr5_4441_5017_0:0:0_0:0:0_383 163 chr5 4441 199 100M = 4918 577 AATGAAAAGTTCACTCAGGGGCCCAGCAGTAAATTGGAACAGTCAGAAGAAAGAACCAGCAGACTTGAAGTTATGTCAATAGAGATTATACAACCTGAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:897 NH:i:1
@@ -1003,15 +1003,15 @@ chr5_942751_943141_0:0:0_0:0:0_384 83 chr5 943042 199 100M = 942751 -391 ACCTTCT
chr5_942751_943141_0:0:0_0:0:0_384 163 chr5 942751 199 100M = 943042 391 GGAGCCTGGGGAAAGGATGACTGGGAAGGACTGAGGCTCCCAGTGAAGGACATCAGTATGGCAGGCTATCTGGTCATCTCATCCAGCCTCCAGCCAGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:898 NH:i:1
chr5_477842_478236_0:0:0_0:0:0_385 83 chr5 478137 199 100M = 477842 -395 GTAGCAGTTGACCCCTGGACCCCGGGGGAAGAGAGAGCCTCAGCCCAGGCTGCCCGTGCTGACCCGAGGTGGGCCCCACGCCGCCCGCCCGGCTGCCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:899 NH:i:1
chr5_477842_478236_0:0:0_0:0:0_385 163 chr5 477842 199 100M = 478137 395 GAGCGCAGCAGCGGCCCTCCCCCCTCCAGAGACCGTCCTGGCACTCACGCTGTCCTTCATGATCAGGGTGCCGTGCAGGAGCCGAGGCTTCTTGGCCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:899 NH:i:1
-chr5_393796_394277_0:0:0_0:0:0_386 99 chr5 393796 199 100M = 394178 482 CTTCCCAGCCCGGCAGGTCCCGACACAGGGCCCGGCGCCCCCCACCGCGCAAAGCCAGGATAGGAACACACAGGCGAGCGCACGCGGACAGCCCCCCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:900 NH:i:1
+chr5_393796_394277_0:0:0_0:0:0_386 1123 chr5 393796 199 100M = 394178 482 CTTCCCAGCCCGGCAGGTCCCGACACAGGGCCCGGCGCCCCCCACCGCGCAAAGCCAGGATAGGAACACACAGGCGAGCGCACGCGGACAGCCCCCCAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:900 NH:i:1
chr5_393796_394277_0:0:0_0:0:0_386 147 chr5 394178 199 100M = 393796 -482 GCAGCACGGAGCCAGGCCAATGAGGGGACCCCACCTGGACGCCATCGCCACCCAGGGCCAGACCATGGGGCGGGCTGCAGGGTGTGGGCCAGGTGCTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:900 NH:i:1
chr5_972375_972894_0:0:0_0:0:0_387 83 chr5 972795 199 100M = 972375 -520 CCATGTCAGCCAGGCTGGTCTCGAACTCCTGAAGTCAAGCGATCCACCTGACTTGGCCTCCCAAAGTGCTGGGGTTACAGGTGTGAGCCACCGCGTCTAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:901 NH:i:1
chr5_972375_972894_0:0:0_0:0:0_387 163 chr5 972375 199 100M = 972795 520 CCATATTGGGCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCACCCGCCTTAGCCTCCCAAAGTGCTGGGATTATAGGCGTGAACCACGGTGCCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:901 NH:i:1
chr5_351457_351999_0:0:0_0:0:0_388 83 chr5 351900 199 100M = 351457 -543 GGACACAGTTTTATTATTGAGCTGTTATGAATAATGTTGCTATAAATATTCACACAAAAATGTTTGTATGAATATATATTTTCAGTGCTCCTGGGAAAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:902 NH:i:1
chr5_351457_351999_0:0:0_0:0:0_388 163 chr5 351457 199 100M = 351900 543 GGACCTTCTCTCTCTCTCCTGTCTCCTTTCTCTTACCGTGGTGTTTTCAGGGTTCATCCACGTAGTTCACGTATCCTTTTCTTTTCTTTTTTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:902 NH:i:1
-chr5_618670_619187_0:0:0_0:0:0_389 99 chr5 618670 199 100M = 619088 518 AGAGACAGGCTCTCACTCTGTCTTCCAGGCTGGAGTGCGGTGGCGCCACCTCGGCTCACTGCAACCTGCGCCTCCCGGGTTCAAGCGATTCTCTTGCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:903 NH:i:1
+chr5_618670_619187_0:0:0_0:0:0_389 1123 chr5 618670 199 100M = 619088 518 AGAGACAGGCTCTCACTCTGTCTTCCAGGCTGGAGTGCGGTGGCGCCACCTCGGCTCACTGCAACCTGCGCCTCCCGGGTTCAAGCGATTCTCTTGCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:903 NH:i:1
chr5_618670_619187_0:0:0_0:0:0_389 147 chr5 619088 199 100M = 618670 -518 AAACCATTTTTTTTTTTTTGAGATGGAGTTTCCCTGTTGTTGCCCAGGCTTGAGTGTGACGGCGCAGTCTGGGCTCACTGCAACCTCCGACTCGCGGGTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:903 NH:i:1
-chr5_457582_458132_0:0:0_0:0:0_38a 99 chr5 457582 199 100M = 458033 551 TCGCGTCCACCCCTCTTCCCACCAGGAAACTCAGTCTAGGACAGAGAAGGGTAGAAGGGCGAGACAATGGGGACCAGGGGCATCAGGACCCAGATTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:904 NH:i:1
+chr5_457582_458132_0:0:0_0:0:0_38a 1123 chr5 457582 199 100M = 458033 551 TCGCGTCCACCCCTCTTCCCACCAGGAAACTCAGTCTAGGACAGAGAAGGGTAGAAGGGCGAGACAATGGGGACCAGGGGCATCAGGACCCAGATTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:904 NH:i:1
chr5_457582_458132_0:0:0_0:0:0_38a 147 chr5 458033 199 100M = 457582 -551 CAGGGCAGCATTGCATAGTGACCAGCCGACTGGCCTTAGCCTGACCTGCGCTGAGTGGGGGAGGCCCTGCCTCCCTGCCGGGCACCTTTGGGGAAGGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:904 NH:i:1
chr5_640421_640858_0:0:0_0:0:0_38b 83 chr5 640759 199 100M = 640421 -438 TGCTGGCGGGTGAGAGAGGTTTCTCCGGAGTTGACTGCCCCCTTTCCCCGGGTGCCCCCTGCCCTGCCCCTCTGCCTGGTAGCCCCTCTGCAAGCCCGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:905 NH:i:1
chr5_640421_640858_0:0:0_0:0:0_38b 163 chr5 640421 199 100M = 640759 438 TTGAAACTTCTCCCACTTCCAGAAGCTGAATCGTAGTGATCAGCCTGCGGCCCCCTGCAGTGGTACAGGCCTGGTTGAGCAGCCTCAGGCCGCCTCCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:905 NH:i:1
@@ -1019,37 +1019,37 @@ chr5_26639_27169_0:0:0_0:0:0_38c 83 chr5 27070 199 100M = 26639 -531 ATGTGTGTATG
chr5_26639_27169_0:0:0_0:0:0_38c 163 chr5 26639 199 100M = 27070 531 GGACTTCTCTACTCGCTGGCCTGGCGCTGATGCCCAGAGGCCCCCTGTGCTTGTGGTCGTGGCCTTAGACAGTTCCCTGCCCTTTGTAAGTACTAACTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:906 NH:i:1
chr5_5958_6426_0:0:0_0:0:0_38d 83 chr5 6327 199 100M = 5958 -469 TCACAGGCAAAGGCGTGCTTTGCTGTGCTGTGTAGGTACATATCAAACGCTCACCAAGAAATTATAAAATATTATTCATAGCAGTTTTTTTCTTCAGTAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:907 NH:i:1
chr5_5958_6426_0:0:0_0:0:0_38d 163 chr5 5958 199 100M = 6327 469 AAAATTAGCTACATAACTTCCCCAGAAATATTCATCCGCAGCAAATATTCATCAGAGACAGAATCCAAACCCAGGTCTGTGTGAATCTAAAGTTTATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:907 NH:i:1
-chr5_700642_701051_0:0:0_0:0:0_38e 99 chr5 700642 199 100M = 700952 410 AAAGAAGAGACAGCACCTTCCACTGGACTCTCCCGGCCGGCCACAGCCCCGGACAACCTGGGCCCAGGGCTGCTGTCCAGTCCCGCCGGCCCGGGCGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:908 NH:i:1
+chr5_700642_701051_0:0:0_0:0:0_38e 1123 chr5 700642 199 100M = 700952 410 AAAGAAGAGACAGCACCTTCCACTGGACTCTCCCGGCCGGCCACAGCCCCGGACAACCTGGGCCCAGGGCTGCTGTCCAGTCCCGCCGGCCCGGGCGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:908 NH:i:1
chr5_700642_701051_0:0:0_0:0:0_38e 147 chr5 700952 199 100M = 700642 -410 AGCCATCTGCGGGGGGACAGGGGCGGTCAGGCGGCTGGGGCCGGCCAGGGCGGCAGCACTCCCGGGCGGCGGGCACGAGGCACTCACCGTAGCGCCGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:908 NH:i:1
-chr5_663747_664214_0:0:0_0:0:0_38f 99 chr5 663747 199 100M = 664115 468 CCGTCTCAAATATAAAATAAAAATTAGCGGTGCCTGGTGGCATGTGCCTGTGGTCCCAGCTTGTTGGCTTAGCTAGGAGGATCGCCTGAGCATGGGAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:909 NH:i:1
+chr5_663747_664214_0:0:0_0:0:0_38f 1123 chr5 663747 199 100M = 664115 468 CCGTCTCAAATATAAAATAAAAATTAGCGGTGCCTGGTGGCATGTGCCTGTGGTCCCAGCTTGTTGGCTTAGCTAGGAGGATCGCCTGAGCATGGGAGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:909 NH:i:1
chr5_663747_664214_0:0:0_0:0:0_38f 147 chr5 664115 199 100M = 663747 -468 CCCAAAGTGTTGGGGTTACAGGCCTGAGGCACCGCGCCCGGCCTGTTTGTTTTTTTCAGACGGAGTCTCACTCCGTCGCCCAGGCTGGAGTGCAGTGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:909 NH:i:1
-chr5_377601_378129_0:0:0_0:0:0_390 99 chr5 377601 199 100M = 378030 529 AGGAGGTGTTGCTCAGGCCAGAAACCCCCTGGGACCCGGGAGATGCGGTGGGGATAACAGGGTCCACGCTGGCCAGCCACAGGCAGCATCTACCACAGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:910 NH:i:1
+chr5_377601_378129_0:0:0_0:0:0_390 1123 chr5 377601 199 100M = 378030 529 AGGAGGTGTTGCTCAGGCCAGAAACCCCCTGGGACCCGGGAGATGCGGTGGGGATAACAGGGTCCACGCTGGCCAGCCACAGGCAGCATCTACCACAGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:910 NH:i:1
chr5_377601_378129_0:0:0_0:0:0_390 147 chr5 378030 199 100M = 377601 -529 CCTCCTAACCACCCCAACCCCCCCCCAGCCCCTCCCAGCCCCTGGCCAGGCCCCTCACTTGGTCCAGGGCTTGCAGGCCTGGTTGTCGCCTGGGGAGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:910 NH:i:1
chr5_105103_105688_0:0:0_0:0:0_391 83 chr5 105589 199 100M = 105103 -586 TCAGGCACCAAGAGCCTGAATAATTCACCAAATGTTAATAATGTAAAAATCCTCCTTTTTAATTGCTTTCCCTGCTCTGCCTGGGGCCGCTCTGCTGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:911 NH:i:1
chr5_105103_105688_0:0:0_0:0:0_391 163 chr5 105103 199 100M = 105589 586 TCACATTTTATATAGAGAGAAATGGAGTCTGGGGTGGACCCAGGTGAGGGTGGGCAGTGGGCATGTCAGCAGCACCCCCCGAGGAGAGCAAGCTCCTGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:911 NH:i:1
-chr5_757025_757544_0:0:0_0:0:0_392 99 chr5 757025 199 100M = 757445 520 CCCCCATGCTACTGGCCCAAACAGTTGTCGCTGTCCCAAGACACAGTGTCCACAGTGACTCGTAGCTACTGTGTCCTCCACAGGATATGATGAAGCCCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:912 NH:i:1
+chr5_757025_757544_0:0:0_0:0:0_392 1123 chr5 757025 199 100M = 757445 520 CCCCCATGCTACTGGCCCAAACAGTTGTCGCTGTCCCAAGACACAGTGTCCACAGTGACTCGTAGCTACTGTGTCCTCCACAGGATATGATGAAGCCCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:912 NH:i:1
chr5_757025_757544_0:0:0_0:0:0_392 147 chr5 757445 199 100M = 757025 -520 AAAAACCAGAACTCCTTGGTGTGGGAAACCCCACACTCCTGGGACACAGAAGTGTGGACTGTTGAGTGAGAGAAGAGGAAAACAGCACTTCGTTTTTCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:912 NH:i:1
-chr5_59449_59887_0:0:0_0:0:0_393 99 chr5 59449 199 100M = 59788 439 CTGTAATCGCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTAGAACCTGGGAAGTGGAGGTTGCAGTCAGCCAAGATCACACCACTGCACTCCAGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:913 NH:i:1
+chr5_59449_59887_0:0:0_0:0:0_393 1123 chr5 59449 199 100M = 59788 439 CTGTAATCGCAGCTACTCAGGAGGCTGAGACAGGAGAATCACTAGAACCTGGGAAGTGGAGGTTGCAGTCAGCCAAGATCACACCACTGCACTCCAGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:913 NH:i:1
chr5_59449_59887_0:0:0_0:0:0_393 147 chr5 59788 199 100M = 59449 -439 ATTTCTCATTATAAGGGCTCAGCAAAGCAGGCAGAGTTAAAAAGCAGAGACAGGAAGAATTTTTAAAATCGTGGACTTCACTCCTACACTGAATCTCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:913 NH:i:1
-chr5_328917_329462_0:0:0_0:0:0_394 99 chr5 328917 199 100M = 329363 546 TGCGCAGATGCGGCTGGGTTTCGGGTGTGGAGCCATCTTGGACCCACGGGCTGCGTCTTCCGGGCACGGGCACAGTGTGTGGGCTCCAGGCATGGGGTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:914 NH:i:1
+chr5_328917_329462_0:0:0_0:0:0_394 1123 chr5 328917 199 100M = 329363 546 TGCGCAGATGCGGCTGGGTTTCGGGTGTGGAGCCATCTTGGACCCACGGGCTGCGTCTTCCGGGCACGGGCACAGTGTGTGGGCTCCAGGCATGGGGTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:914 NH:i:1
chr5_328917_329462_0:0:0_0:0:0_394 147 chr5 329363 199 100M = 328917 -546 GGTTCCCAGGCCCCGCGCTATGACTGGGGTGGGGGCAACGTCTCTCGTGAGGTTTTTTACTTAAATGTGAAACGGCTCAGTACGGTGGCCGCAGCCGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:914 NH:i:1
-chr5_337133_337641_0:0:0_0:0:0_395 99 chr5 337133 199 100M = 337542 509 CAGAGGGGCCTCCTGGGTCTGCTCCGCACCAGGGTCCAGCGGGACCATCTCAAAGCTTGAGCACCAGGCGCAAGGTGGGAGTGCGGGGAGGGGACGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:915 NH:i:1
+chr5_337133_337641_0:0:0_0:0:0_395 1123 chr5 337133 199 100M = 337542 509 CAGAGGGGCCTCCTGGGTCTGCTCCGCACCAGGGTCCAGCGGGACCATCTCAAAGCTTGAGCACCAGGCGCAAGGTGGGAGTGCGGGGAGGGGACGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:915 NH:i:1
chr5_337133_337641_0:0:0_0:0:0_395 147 chr5 337542 199 100M = 337133 -509 AAGCCACAGGGCAGGCAGGGCTGCACCCCTCTGGGGGCTCCAGGAGAGGACCCTGTCTGCCTCCCCAGCCCCTGCCAGCTCAGGCATCAGTGGCTGTGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:915 NH:i:1
chr5_231193_231779_0:0:0_0:0:0_396 83 chr5 231680 199 100M = 231193 -587 TCTTGCCTTCTATTCACTTCTCACAATGTCCCTTCAGCACCTGACCCTATACCTGCCGGTTATTCCTAGGTTATATTATTAATGCAACAGAGTAATATTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:916 NH:i:1
chr5_231193_231779_0:0:0_0:0:0_396 163 chr5 231193 199 100M = 231680 587 GGCAGGATAAGGAGGGTCAGTCTTCTAAGTGATTGACAAGGTGAAGCAAGTCACGTGATCACAGGACAGCGGGCCCTTCCCTCTTAGGTAGCTGAAGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:916 NH:i:1
-chr5_689492_689981_0:0:0_0:0:0_397 99 chr5 621444 199 100M = 621834 490 TGCCTGGGGAATGGACCCCCCTTAGGCCTTTGCCCACCCTCGTGTAGGCTCAGGGTGCTGGTGTGGGCAGCAGCGCCTCCCATCTTCCAGGCGGGGGACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:917 NH:i:1
+chr5_689492_689981_0:0:0_0:0:0_397 1123 chr5 621444 199 100M = 621834 490 TGCCTGGGGAATGGACCCCCCTTAGGCCTTTGCCCACCCTCGTGTAGGCTCAGGGTGCTGGTGTGGGCAGCAGCGCCTCCCATCTTCCAGGCGGGGGACG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:917 NH:i:1
chr5_689492_689981_0:0:0_0:0:0_397 147 chr5 621834 199 100M = 621444 -490 GCTGCGGCCCAGCAGGCTGCCTTCTGGGAAGGGGGTCCAGGTGTCTCTTGGGGACCCTGTCTTTCTGCAGCTCTGTCCTTGTGGCCACGCAGGAGGCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:917 NH:i:1
chr5_316855_317360_0:0:0_0:0:0_398 83 chr5 317261 199 100M = 316855 -506 CGCTCCCCTCTGCCTCTGTCTCCTGCTGCCCCTGGGGCCTCCGTCTCTGTCTCCTGCTACCCCCGGGGCCTCCGCTCACAGCAGCCACTTCTGCCACCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:918 NH:i:1
chr5_316855_317360_0:0:0_0:0:0_398 163 chr5 316855 199 100M = 317261 506 AGGCCCCTTCCGCAGTTCACCTGTCCCGGGCCCCCCACCCAGCAGGAGTGCCGCCTGCCTCCCGCACGGCCGGCCTCCGTGTCCCAGACAGGGTTTGCCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:918 NH:i:1
-chr5_889005_889592_0:0:0_0:0:0_399 99 chr5 823120 199 100M = 823608 588 CGGTATCAACTTACCACCACAGCAGAATCAACAGTGACTCGCTAATTAACAGAACCGTTTGCTAGAAAGCACTAATCTAGTTATATAAATACTGAAATAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:919 NH:i:1
+chr5_889005_889592_0:0:0_0:0:0_399 1123 chr5 823120 199 100M = 823608 588 CGGTATCAACTTACCACCACAGCAGAATCAACAGTGACTCGCTAATTAACAGAACCGTTTGCTAGAAAGCACTAATCTAGTTATATAAATACTGAAATAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:919 NH:i:1
chr5_889005_889592_0:0:0_0:0:0_399 147 chr5 823608 199 100M = 823120 -588 ACAGCAAAGCTGAAGGTTTCTGTGCTCTGAGGGATCCGAGAGTGGATGTCCCACTCCTGTATCCTCAGCCAGACACAGAACTAGCCAGATTCATTAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:919 NH:i:1
chr5_948310_948842_0:0:0_0:0:0_39a 83 chr5 948743 199 100M = 948310 -533 TCTAGAAACCGTTAATGACAACTTCAAATGTTCTATGAGAAACACGCACAGTTCTCCTCAGAGAAGGGCATTTGGGCTGCTGCATTACCTACTGGCGTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:920 NH:i:1
chr5_948310_948842_0:0:0_0:0:0_39a 163 chr5 948310 199 100M = 948743 533 AGACAAAAAAGAAAACAAAGACGATGGCCCCGGAAGGAATGCACAATTTGTTTTAGTTTACAGCACAGAGATCTTTCTCTCAATGGGAATTGTGCTCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:920 NH:i:1
chr5_862795_863237_0:0:0_0:0:0_39b 83 chr5 799918 199 100M = 799576 -442 CCTGGAGTTCTCCTACCCCGGCTACCTGGCCCTGGGCGAGGCGCACCTGAGCATCATCGCCAACGCCGTCAATGAGGGCACCTACACCTGCGTGGTGCGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:921 NH:i:1
chr5_862795_863237_0:0:0_0:0:0_39b 163 chr5 799576 199 100M = 799918 442 TGCGTGGGAGCTGGGCCTTGGCCATGGTCGGGGCTGAGGGGGCACTGACGGGGCTCTTTCCCCCACCCGGAGCAGAATTCCCCTTCCCCACGGTGGCCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:921 NH:i:1
-chr5_359538_360054_0:0:0_0:0:0_39c 99 chr5 359538 199 100M = 359955 517 CAAGGCTGCAGTGAGCCACAATCAAACCACATTACCCCAGCCTAGGTGACAGAACAAGACCCTGTTTCTAAAATAAAAAATGTTTTAAAATAAACAATTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:922 NH:i:1
+chr5_359538_360054_0:0:0_0:0:0_39c 1123 chr5 359538 199 100M = 359955 517 CAAGGCTGCAGTGAGCCACAATCAAACCACATTACCCCAGCCTAGGTGACAGAACAAGACCCTGTTTCTAAAATAAAAAATGTTTTAAAATAAACAATTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:922 NH:i:1
chr5_359538_360054_0:0:0_0:0:0_39c 147 chr5 359955 199 100M = 359538 -517 AAGGACGGTAGGGCGGGTCTACCTTTGTTTATCTGGGAATGTCTTAATCTCCCCTTCATTTTTGAAGCAGCTTTTGCTAGATGTAGAATTTTTGGTCAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:922 NH:i:1
-chr5_316738_317242_0:0:0_0:0:0_39d 99 chr5 316738 199 100M = 317143 505 CTGCCCGTCTGTTCCGAGGAGCCACCTGGGGGCCCTTCCTGCGCAGCTCAGGATGTCTGAGAAGCCCGGGGTCCTCACCGCTCAACGGCCTTCTCCGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:923 NH:i:1
+chr5_316738_317242_0:0:0_0:0:0_39d 1123 chr5 316738 199 100M = 317143 505 CTGCCCGTCTGTTCCGAGGAGCCACCTGGGGGCCCTTCCTGCGCAGCTCAGGATGTCTGAGAAGCCCGGGGTCCTCACCGCTCAACGGCCTTCTCCGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:923 NH:i:1
chr5_316738_317242_0:0:0_0:0:0_39d 147 chr5 317143 199 100M = 316738 -505 CCACCCAGGGGGAGCGTGGAGTGCGAGGAGGGGCAGCCACGGTCCGTTCTGTGGCCTCGGCTCTCCAGGACTCGGAGGCTCCACGCCCCTTCCTGCTCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:923 NH:i:1
chr5_28790_29359_0:0:0_0:0:0_39e 83 chr5 29260 199 100M = 28790 -570 GGTGGGGAGAGAGGAGAGCGTCGAAACTGGGGAAGTGGGGAACCAGAGATGGCAAAGCCTGGGACTTCCAGCTTCACCCACAGGACGCGAGGAGCCTCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:924 NH:i:1
chr5_28790_29359_0:0:0_0:0:0_39e 163 chr5 28790 199 100M = 29260 570 GCCTCTTTCCTTGGATAAAGAGCAAGCACATTACGAGCCACACTTGGTGCTGGTGCTGTGGCATGCAACACTACCTAATGCGAGAGAAAGATGTGAGCAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:924 NH:i:1
@@ -1059,23 +1059,23 @@ chr5_249303_249841_0:0:0_0:0:0_3a0 83 chr5 249742 199 100M = 249303 -539 GCACCCT
chr5_249303_249841_0:0:0_0:0:0_3a0 163 chr5 249303 199 100M = 249742 539 TGTGGCTGTGCGCCGGTCACTCAGCCTGTGTCTGTGGCCCCGCACATAAAATGGGTCCTAACCGGGCTGACTCTTGGGTCCGCCTGACCCAGCACCACTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:926 NH:i:1
chr5_357750_358337_0:0:0_0:0:0_3a1 83 chr5 358238 199 100M = 357750 -588 TCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTGGTATGAGTGTAGACACTCCAGTTCTACTTTGGTTACCGTGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:927 NH:i:1
chr5_357750_358337_0:0:0_0:0:0_3a1 163 chr5 357750 199 100M = 358238 588 CGTTATAAAAATTTTCCTTTAATCTGCAGTAATAATTTTTGTGTAAAGTCTATTTTCTGTGATATGAGTGTAGACACTCCAGTTGGTATAAGTGTAGACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:927 NH:i:1
-chr5_238942_239438_0:0:0_0:0:0_3a2 99 chr5 238942 199 100M = 239339 497 CCTGACCCAATCCCACTTCATGGCAGGGACCCTGGGGGACGGACACTGGGGGATGCTGCTCTGCCCCTGGGCATGGCTCAGGTGGGCATCTCAGCTGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:928 NH:i:1
+chr5_238942_239438_0:0:0_0:0:0_3a2 1123 chr5 238942 199 100M = 239339 497 CCTGACCCAATCCCACTTCATGGCAGGGACCCTGGGGGACGGACACTGGGGGATGCTGCTCTGCCCCTGGGCATGGCTCAGGTGGGCATCTCAGCTGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:928 NH:i:1
chr5_238942_239438_0:0:0_0:0:0_3a2 147 chr5 239339 199 100M = 238942 -497 GACTGTGCTCTGCGGCCTGTGTACCCCACAGAACCGGTTCCTTGGCACGAGGCCCCACCCCTCCACGATGGTGCCCCACCCTGAGCCTGTGCAGGTAAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:928 NH:i:1
-chr5_413374_413963_0:0:0_0:0:0_3a3 99 chr5 413374 199 100M = 413864 590 GAGGGGTGGGCAGGTGCCCTGGCCAAGGAGACCCCCCGCCCAGGGCTACCAGACCCCTCCTCACAGGCAGGGCATCCAAATTTTCTCTGTCACCCACTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:929 NH:i:1
+chr5_413374_413963_0:0:0_0:0:0_3a3 1123 chr5 413374 199 100M = 413864 590 GAGGGGTGGGCAGGTGCCCTGGCCAAGGAGACCCCCCGCCCAGGGCTACCAGACCCCTCCTCACAGGCAGGGCATCCAAATTTTCTCTGTCACCCACTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:929 NH:i:1
chr5_413374_413963_0:0:0_0:0:0_3a3 147 chr5 413864 199 100M = 413374 -590 AGGAAAACAGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCTGCCCTATAGGCCGGGACACAGGCAGCCCTGGGAGGCTAGACCGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:929 NH:i:1
-chr5_927369_927885_0:0:0_0:0:0_3a4 99 chr5 927369 199 100M = 927786 517 AGATCATGCCATTGCACTCCAGCCTGGGTGACTAGAGTGAAACTTCATCTCAAAATTTTTTCTGTAAAATAATATTAACAAAAAAAATTGTTTCAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:930 NH:i:1
+chr5_927369_927885_0:0:0_0:0:0_3a4 1123 chr5 927369 199 100M = 927786 517 AGATCATGCCATTGCACTCCAGCCTGGGTGACTAGAGTGAAACTTCATCTCAAAATTTTTTCTGTAAAATAATATTAACAAAAAAAATTGTTTCAAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:930 NH:i:1
chr5_927369_927885_0:0:0_0:0:0_3a4 147 chr5 927786 199 100M = 927369 -517 ATGTCTGTAATCCCAGCTACTTGGGAGGTTGAGGCAGGAGAATCGCTTGAACCCGGGAAGCGGAGGTTGTGGTGAGCTGAGATTGTGCCACTGCACTCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:930 NH:i:1
-chr5_592825_593367_0:0:0_0:0:0_3a5 99 chr5 592825 199 100M = 593268 543 TCACCCTGCCCTGCTCCCCTCTTCTCCCCCTTCCCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCCTTCCCCTCCATCACCCTGCCCAGCCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:931 NH:i:1
+chr5_592825_593367_0:0:0_0:0:0_3a5 1123 chr5 592825 199 100M = 593268 543 TCACCCTGCCCTGCTCCCCTCTTCTCCCCCTTCCCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCCTTCCCCTCCATCACCCTGCCCAGCCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:931 NH:i:1
chr5_592825_593367_0:0:0_0:0:0_3a5 147 chr5 593268 199 100M = 592825 -543 CTCCGAGCTCGCGCGCGACCCCATCGCGTGGCCCGGCCCGGAAAACTGAGGGTCGCCCCCGCTGCCCCTTCCTGGCTGGGCGCGCAGCGCTCCCGGCTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:931 NH:i:1
-chr5_668290_668836_0:0:0_0:0:0_3a6 99 chr5 668290 199 100M = 668737 547 TTTTGTTGAAAACCTTGCGAGTTTGGGATTTTCATTATCCTTTGCTATTAATAAGACTTCATTCAGTCTAATATATATATATATAAATGTATAAGCCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:932 NH:i:1
+chr5_668290_668836_0:0:0_0:0:0_3a6 1123 chr5 668290 199 100M = 668737 547 TTTTGTTGAAAACCTTGCGAGTTTGGGATTTTCATTATCCTTTGCTATTAATAAGACTTCATTCAGTCTAATATATATATATATAAATGTATAAGCCACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:932 NH:i:1
chr5_668290_668836_0:0:0_0:0:0_3a6 147 chr5 668737 199 100M = 668290 -547 GACTTTGTGTTGAATTGGGCAAGAGCCTCCTCCCGTCCACGTGGGATGGCCTTCCTGATGTGGCTCTCCAAGACCATCCCTGGAGGGCATAAAACCTCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:932 NH:i:1
-chr5_676074_676589_0:0:0_0:0:0_3a7 99 chr5 676074 199 100M = 676490 516 AGTAGCTGGGATTACAGGCGCACGCCACCATGCCCAGGTTAATTTTTGTATTTTTGACAGGGGTTTCACCATGTTGGCCCGGCTGGTCTCAAACTCCTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:933 NH:i:1
+chr5_676074_676589_0:0:0_0:0:0_3a7 1123 chr5 676074 199 100M = 676490 516 AGTAGCTGGGATTACAGGCGCACGCCACCATGCCCAGGTTAATTTTTGTATTTTTGACAGGGGTTTCACCATGTTGGCCCGGCTGGTCTCAAACTCCTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:933 NH:i:1
chr5_676074_676589_0:0:0_0:0:0_3a7 147 chr5 676490 199 100M = 676074 -516 GGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTGGCTAGGGTTAGAATGCACAGTCTAAACTAGTGCCCATTGGCTATTTTAAAGGGAGCAGGAGTATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:933 NH:i:1
-chr5_513227_513635_0:0:0_0:0:0_3a8 99 chr5 513227 199 100M = 513536 409 GGAGGGCCGCTGAGCCGAGTGGGGAGCGCCCAGCCTTCAAAGGCTGCCAAGGACGCACCCTGCCCTCTCTGCTTTGCCTGGTCGTGGGGGTGGGGAAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:934 NH:i:1
+chr5_513227_513635_0:0:0_0:0:0_3a8 1123 chr5 513227 199 100M = 513536 409 GGAGGGCCGCTGAGCCGAGTGGGGAGCGCCCAGCCTTCAAAGGCTGCCAAGGACGCACCCTGCCCTCTCTGCTTTGCCTGGTCGTGGGGGTGGGGAAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:934 NH:i:1
chr5_513227_513635_0:0:0_0:0:0_3a8 147 chr5 513536 199 100M = 513227 -409 ACTGTTGAGCTGGGACTAGGGACTCAGGTTACTGGTGGCTGGGCCGGGCCAGGCCAGGCCTCAAGCAGACCGCCAGGCTCCTGCTCCCTGTCCCAGAGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:934 NH:i:1
chr5_197889_198434_0:0:0_0:0:0_3a9 83 chr5 198335 199 100M = 197889 -546 TGGCATGGACTCTAGGGGATGGTGCACACACCGGACCGGACGGGCCCCTCCCTTACCCCCGGATCCCCCGGCTGGGCAGCGGCCAGGGAGAGGGGCGACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:935 NH:i:1
chr5_197889_198434_0:0:0_0:0:0_3a9 163 chr5 197889 199 100M = 198335 546 ACCTGGAAGGAAGGAGGCGTTGGGGAGAGTCCAGATGGAGGCCATGGGGTTGAGGGGCCCAGACACCCGGCTGGGGGACTCCAGGAAGGCAGCAGGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:935 NH:i:1
-chr5_199161_199709_0:0:0_0:0:0_3aa 99 chr5 199161 199 100M = 199610 549 CCCAGTCCCAACACCTTCAGGCCCACCCTCGGCCCCAGCCTCAGCCACCCCAGCCCCAGTCCCGGCCCAGCCCCAGCCCCAGCGCTCCCGGCCCCGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:936 NH:i:1
+chr5_199161_199709_0:0:0_0:0:0_3aa 1123 chr5 199161 199 100M = 199610 549 CCCAGTCCCAACACCTTCAGGCCCACCCTCGGCCCCAGCCTCAGCCACCCCAGCCCCAGTCCCGGCCCAGCCCCAGCCCCAGCGCTCCCGGCCCCGGGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:936 NH:i:1
chr5_199161_199709_0:0:0_0:0:0_3aa 147 chr5 199610 199 100M = 199161 -549 CGGCGCCCGGCCCCCGCGCACCTGCCGCGCGCACCGCCTCTCCGGCCTGCGCGGCTCCGGGGGCTCCGGGAAGACCGAGCGCTGGCGGCCAGCCCGGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:936 NH:i:1
chr5_804333_804827_0:0:0_0:0:0_3ab 83 chr5 804728 199 100M = 804333 -495 GTTCCAGCTACTCAGGAGGCTGAAATGGGAGGATTGTTTGAGCCTGGGAGGTGGAGGCTCCAGTGAGCTATGATTAAGCCACTCCACTCCGGCCTGGGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:937 NH:i:1
chr5_804333_804827_0:0:0_0:0:0_3ab 163 chr5 804333 199 100M = 804728 495 GACCAGCCCGGCCAACATAGCAACACTCTGTTTTCTATTTTTTTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAATGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:937 NH:i:1
@@ -1087,57 +1087,57 @@ chr5_235152_235658_0:0:0_0:0:0_3ae 83 chr5 235559 199 100M = 235152 -507 CTCCCCA
chr5_235152_235658_0:0:0_0:0:0_3ae 163 chr5 235152 199 100M = 235559 507 GTGGCCGGAGCTGCGGGCCGGGACTGTGTCCAGGACAGAGCCACAAGCTTGTCCCCAGCTCAGGGAGGTCCAGGGGCGGCAGAGGGAGCGACAGGCTGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:940 NH:i:1
chr5_457193_457661_0:0:0_0:0:0_3af 83 chr5 457562 199 100M = 457193 -469 CCCAGCCTAGCCTCGCCGGTTCGCGTCCACCCCTCTTCCCACCAGGAAACTCAGTCTAGGACAGAGAAGGGTAGAAGGGCGAGACAATGGGGACCAGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:941 NH:i:1
chr5_457193_457661_0:0:0_0:0:0_3af 163 chr5 457193 199 100M = 457562 469 CACCGGGCATGTCGGCGCCTCTGGTCAAACCACCTACACTGCCTGGGGTGGGTCTCAAGGAGGCCCGGGGCGGAGGGGGGTTCCCGCGTGCACACGAGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:941 NH:i:1
-chr5_82706_83147_0:0:0_0:0:0_3b0 99 chr5 82706 199 100M = 83048 442 TCGATCCAGGAACCGTCCCAGGGTTGCCCCAGGCCTCCTGGCTCTCCCGCCTCCATCCCGTGGGCTTCCCGGGAGCCCCAGGCTGGTCTCCCACCTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:942 NH:i:1
+chr5_82706_83147_0:0:0_0:0:0_3b0 1123 chr5 82706 199 100M = 83048 442 TCGATCCAGGAACCGTCCCAGGGTTGCCCCAGGCCTCCTGGCTCTCCCGCCTCCATCCCGTGGGCTTCCCGGGAGCCCCAGGCTGGTCTCCCACCTGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:942 NH:i:1
chr5_82706_83147_0:0:0_0:0:0_3b0 147 chr5 83048 199 100M = 82706 -442 TCTTGGTCCCAGTGCTGCAGGTGTGGGCTGGGGAGTGCCTGAGGGAGGGGCCTCTGCTTGGGACCCTTCCTGCCTGGGCGAGGGGCTGAGTCCCTCCTGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:942 NH:i:1
-chr5_301948_302373_0:0:0_0:0:0_3b1 99 chr5 301948 199 100M = 302274 426 GCAGAGGAGCTAGGCCAGATGCTGTGTTTGTGTGGGGTGCTGAGTGGCCAGCCTAAAACCTTCTGAGAAAGTAGGATCCAGCACAGGGAGCTTCAGCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:943 NH:i:1
+chr5_301948_302373_0:0:0_0:0:0_3b1 1123 chr5 301948 199 100M = 302274 426 GCAGAGGAGCTAGGCCAGATGCTGTGTTTGTGTGGGGTGCTGAGTGGCCAGCCTAAAACCTTCTGAGAAAGTAGGATCCAGCACAGGGAGCTTCAGCTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:943 NH:i:1
chr5_301948_302373_0:0:0_0:0:0_3b1 147 chr5 302274 199 100M = 301948 -426 TGGGGGGAGGCAGCAGGAGGGGCGGCCGGGTGGTCAGTTTGAGGACCTGGCCCCACGTGCCTTTGTGGGATGGTGGGGACCCCACACCCCCGCTGGCTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:943 NH:i:1
-chr5_939918_940388_0:0:0_0:0:0_3b2 99 chr5 939918 199 100M = 940289 471 CCATGGCCGCCCGGACCCCGGCGCCGGCGCCGCCGAGCAGCAATGCGCCGCGCCCGCCCACTGCGCAGGCGCACCCGCCACGCATGCGCGCTGCCGCGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:944 NH:i:1
+chr5_939918_940388_0:0:0_0:0:0_3b2 1123 chr5 939918 199 100M = 940289 471 CCATGGCCGCCCGGACCCCGGCGCCGGCGCCGCCGAGCAGCAATGCGCCGCGCCCGCCCACTGCGCAGGCGCACCCGCCACGCATGCGCGCTGCCGCGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:944 NH:i:1
chr5_939918_940388_0:0:0_0:0:0_3b2 147 chr5 940289 199 100M = 939918 -471 GCCACGTCTACGCCTAGGCGCCCCCGACATTGTGATCCCAGCCCGGCGTCCCGCCCCCAACTCACCGCCACCGCGGGCGGGACCTCCCGGACCACGTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:944 NH:i:1
chr5_205536_206069_0:0:0_0:0:0_3b3 83 chr5 205970 199 100M = 205536 -534 ACGCGTGTCCGTGTCCGTGGTGGACCCCCGATGCGGCGCGGGGGCGGGTGAATGCGCGGGCTGCGAGCACGGCAAGGTCTCTCAGGCTTGTGGACGTGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:945 NH:i:1
chr5_205536_206069_0:0:0_0:0:0_3b3 163 chr5 205536 199 100M = 205970 534 CGGTGGGGCTCTCAGGCCCCTGAGGGCCAGACGCGGGACCTGGGTGCGACCCGGGCGGGAGGCGGCAGGGCTCAGCTGTGCTGGACGCTGCAGCAGCGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:945 NH:i:1
-chr5_926583_927119_0:0:0_0:0:0_3b4 99 chr5 926583 199 100M = 927020 537 TTCAAGGGCACAGCTTCACCTGGGCAAGGACCCAGCCTCACCTTCCGGATGCATCGACGCAGACTACTCAGGAGAATTCTTCATAATCGTTTTAAGAAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:946 NH:i:1
+chr5_926583_927119_0:0:0_0:0:0_3b4 1123 chr5 926583 199 100M = 927020 537 TTCAAGGGCACAGCTTCACCTGGGCAAGGACCCAGCCTCACCTTCCGGATGCATCGACGCAGACTACTCAGGAGAATTCTTCATAATCGTTTTAAGAAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:946 NH:i:1
chr5_926583_927119_0:0:0_0:0:0_3b4 147 chr5 927020 199 100M = 926583 -537 ACACATACATATGTTCCATTTCATCAAGGGGAAAAAATGGCTGAAGTCCAATTTACCAAAGTACTTTGAAAACAGACTGTTTCCATTGCAAAGATATTTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:946 NH:i:1
-chr5_579999_580480_0:0:0_0:0:0_3b5 99 chr5 579999 199 100M = 580381 482 TCCTGGGCTGCATACCTTGGATAGAATAGCATCATACAAACAAGTTTCTTTTAGAATCCTGGTACACTTACAATAACCATAAAATAATAGGACTGTAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:947 NH:i:1
+chr5_579999_580480_0:0:0_0:0:0_3b5 1123 chr5 579999 199 100M = 580381 482 TCCTGGGCTGCATACCTTGGATAGAATAGCATCATACAAACAAGTTTCTTTTAGAATCCTGGTACACTTACAATAACCATAAAATAATAGGACTGTAGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:947 NH:i:1
chr5_579999_580480_0:0:0_0:0:0_3b5 147 chr5 580381 199 100M = 579999 -482 GGCCCCCTAGAATAAACTGAGTCCAACACTTCTACACAGTTAAGTTTCACTGAGCTCTCTGATACTGGGAGCAAGGTGGCAGGGTTTAGGGTTTTGCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:947 NH:i:1
-chr5_60533_60972_0:0:0_0:0:0_3b6 99 chr5 60533 199 100M = 60873 440 AAGCTGGTCTCGAACTCCCAACCTCAGGTGATCCGCCTGCCTAAGCCTCCCAAAGTGTTGGGTTTACAGGCGTGAGCCACCACGCCCGGCCCGCTTGTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:948 NH:i:1
+chr5_60533_60972_0:0:0_0:0:0_3b6 1123 chr5 60533 199 100M = 60873 440 AAGCTGGTCTCGAACTCCCAACCTCAGGTGATCCGCCTGCCTAAGCCTCCCAAAGTGTTGGGTTTACAGGCGTGAGCCACCACGCCCGGCCCGCTTGTTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:948 NH:i:1
chr5_60533_60972_0:0:0_0:0:0_3b6 147 chr5 60873 199 100M = 60533 -440 GATGGTCTCGATCTCCTGACCTTGTGATCCGCCCACCTCGGCCTCCCAGAGTGCTGGGATTACAGGCGTGAGCCACGCGCCCAGCCAGAAGCTCTGTAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:948 NH:i:1
chr5_16352_16827_0:0:0_0:0:0_3b7 83 chr5 16728 199 100M = 16352 -476 CACTTGTAGCCATCTGCTTCTCTTTTTGGTTCTGGAACATTCTAGAACCTTGGAAGGTGGCTGGGGCTGAGTCTCAGCCAGCAGGGTTGCCCAGTGCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:949 NH:i:1
chr5_16352_16827_0:0:0_0:0:0_3b7 163 chr5 16352 199 100M = 16728 476 TTTTTTGTTCTTGCAATAGTTTACTGAGAATGATGATTTCCAATTTCATCCATGTCCCTACAAAGGACATGAACTCATCATTTTTTATGGCTGCATAGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:949 NH:i:1
-chr5_845264_845823_0:0:0_0:0:0_3b8 99 chr5 845264 199 100M = 845724 560 CAGGTGTGATCAGCATAAGCTCCGGGGTGATCTGAACCACTCCTGTCACTCAGGAAATTCCAAGGCTTCGAGGAGCTTCTGTGCCAGGAACCAGGAACAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:950 NH:i:1
+chr5_845264_845823_0:0:0_0:0:0_3b8 1123 chr5 845264 199 100M = 845724 560 CAGGTGTGATCAGCATAAGCTCCGGGGTGATCTGAACCACTCCTGTCACTCAGGAAATTCCAAGGCTTCGAGGAGCTTCTGTGCCAGGAACCAGGAACAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:950 NH:i:1
chr5_845264_845823_0:0:0_0:0:0_3b8 147 chr5 845724 199 100M = 845264 -560 ACCTCCGCGTGCTGACACCCACATCAGTGACATTTGGGCTAAGCCGCCTAATGCAGAGTGGCTATAAGCGCGCTGAGCCGCTCGTGAAATTCAGACAACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:950 NH:i:1
-chr5_967017_967469_0:0:0_0:0:0_3b9 99 chr5 967017 199 100M = 967370 453 ATCCTGTCAAATGGCTATGAACTCATCTGAAAACAAAAAATTTTTTTTTCGAGATGGAGTTTCGCTCTTGTTGCCCAGGTTTGAGTGCAGTGGCGTGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:951 NH:i:1
+chr5_967017_967469_0:0:0_0:0:0_3b9 1123 chr5 967017 199 100M = 967370 453 ATCCTGTCAAATGGCTATGAACTCATCTGAAAACAAAAAATTTTTTTTTCGAGATGGAGTTTCGCTCTTGTTGCCCAGGTTTGAGTGCAGTGGCGTGATC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:951 NH:i:1
chr5_967017_967469_0:0:0_0:0:0_3b9 147 chr5 967370 199 100M = 967017 -453 GGAATAACCAAAATGGACATTCCTACAATCTACCCTGCCACATCTCTCCCACTGTTTTAGATCTGCTGTAATTAATGCAATCATTTATACACATTTAGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:951 NH:i:1
chr5_602056_602482_0:0:0_0:0:0_3ba 83 chr5 602383 199 100M = 602056 -427 CAGCATCAGCCCCCCGAGGGGACCTGATGTTCCTGCTGGACAGCTCAGCCAGCGTCTCTCACTACGAGTTCTCCCGGGTTCGGGAGTTTGTGGGGCAGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:952 NH:i:1
chr5_602056_602482_0:0:0_0:0:0_3ba 163 chr5 602056 199 100M = 602383 427 GCCTAGGCCAGGACTCTTGGCTCCACCTGCTTGCAGGGTCTGTGAGGAATGGGAAGGTGTGGGGGCTTGGAGCGGAGAGTCGCCCTCTCACAGATGGGAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:952 NH:i:1
chr5_657623_658114_0:0:0_0:0:0_3bb 83 chr5 658015 199 100M = 657623 -492 GTGTTGGATCACTTGAGGTCAGGAGTTCGAGGCCAGCCTGGCCAGTGTGCCGAGACCCCACCTCTACTAAAAATACAAAAATTAACTGGACGGGGCCGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:953 NH:i:1
chr5_657623_658114_0:0:0_0:0:0_3bb 163 chr5 657623 199 100M = 658015 492 AAGACCCTGTCTTAAAAACTGAGAATAATTTGGAACAAGCCCGGTGGCTCACTCCTGTAATCCCAGCATGTTGGGAGGCCAAGGAGAGAAGATCACTTGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:953 NH:i:1
-chr5_924203_924670_0:0:0_0:0:0_3bc 99 chr5 924203 199 100M = 924571 468 TTGCAGTGAGCCGAGATCGAGCCACTGTACTCCAGCCTGGCTGAAAAAAGTGAAACTCTGTCTCAAAATGAATGAATGAATGAATGAATGAATGAATGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:12 SD:i:0 SN:i:954 NH:i:1
+chr5_924203_924670_0:0:0_0:0:0_3bc 1123 chr5 924203 199 100M = 924571 468 TTGCAGTGAGCCGAGATCGAGCCACTGTACTCCAGCCTGGCTGAAAAAAGTGAAACTCTGTCTCAAAATGAATGAATGAATGAATGAATGAATGAATGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:12 SD:i:0 SN:i:954 NH:i:1
chr5_924203_924670_0:0:0_0:0:0_3bc 147 chr5 924571 199 100M = 924203 -468 AACGGCCTTCCGCTGCCTCGCAGGGCCAGACAACCCCAGGAGAGCCGTGGTGCCCTGAGGGCTGCTCCACAGGTGACACAGGCGTGGCCATCAGTGGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:954 NH:i:1
-chr5_850507_850981_0:0:0_0:0:0_3bd 99 chr5 850507 199 100M = 850882 475 CGTCTCTACCAAAAATACAAAAAACTTAGCTGGACGTGGTGGTGGGTGCCAGTAGTCCCAACTACACAGGAGGCTGAGGCACGAGGATCACTTGAACCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:955 NH:i:1
+chr5_850507_850981_0:0:0_0:0:0_3bd 1123 chr5 850507 199 100M = 850882 475 CGTCTCTACCAAAAATACAAAAAACTTAGCTGGACGTGGTGGTGGGTGCCAGTAGTCCCAACTACACAGGAGGCTGAGGCACGAGGATCACTTGAACCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:955 NH:i:1
chr5_850507_850981_0:0:0_0:0:0_3bd 147 chr5 850882 199 100M = 850507 -475 GGTCCGAAGCACTCTCTGCACAGCGGTGAATCCCCACAATAGCCCTCTGGGGAAGGTGCTGTTATCACCCACGTGAGACACACGAAGGAAAGACACGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:955 NH:i:1
chr5_760828_761309_0:0:0_0:0:0_3be 83 chr5 761210 199 100M = 760828 -482 AATGGGGAGAGACACAGAGACAGAGACAGAGACAGAGAGAGGCAGACAGAGACAGAGAGAGAGACAGACAGACACAGAGCAGAACAGGGAGAGACAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:956 NH:i:1
chr5_760828_761309_0:0:0_0:0:0_3be 163 chr5 760828 199 100M = 761210 482 GAGAGATAGAGAGAGGCAGACAGAGACAGAGAGACAGACAGACACAGAGCAGAACAGGGAGAGACAGAGAGAGAGAGACAGAGAGAGGCAGACAGAGAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:956 NH:i:1
-chr5_86983_87506_0:0:0_0:0:0_3bf 99 chr5 86983 199 100M = 87407 524 GCGAGCAGACACAAGACATGGGACACAGGTGCGCCAGTGAACACGGGACTGGTGTAAGCCAGCGGGCATGAGACACGGGTGTGAGCGAGTGGACACGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:957 NH:i:1
+chr5_86983_87506_0:0:0_0:0:0_3bf 1123 chr5 86983 199 100M = 87407 524 GCGAGCAGACACAAGACATGGGACACAGGTGCGCCAGTGAACACGGGACTGGTGTAAGCCAGCGGGCATGAGACACGGGTGTGAGCGAGTGGACACGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:957 NH:i:1
chr5_86983_87506_0:0:0_0:0:0_3bf 147 chr5 87407 199 100M = 86983 -524 GAGGGGTGCCTCCAGCTTTGGACCCAGTCAGGGTCTTTTCTGGAAGACACCATGGGCCCCGGGTAGGCCCCCACACCCAGCCAGAAACCCAGGGCCAGCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:957 NH:i:1
chr5_157471_157958_0:0:0_0:0:0_3c0 83 chr5 157859 199 100M = 157471 -488 ATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGCATAAGCCACCGCGCCCAGCCTCTCTGTTTTTTAAAATCTGTTTTATTTTAAATAGTTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:958 NH:i:1
chr5_157471_157958_0:0:0_0:0:0_3c0 163 chr5 157471 199 100M = 157859 488 GAAAAACTCCAATCATTATTTCTTCAGCTATTTTACCTTCTTCCCCTCTTCTGGGACCAACTCCAATTAAACGTATGTTAGGCCACTCAACGTTGTCTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:958 NH:i:1
-chr5_814915_815483_0:0:0_0:0:0_3c1 99 chr5 814915 199 100M = 815384 569 CAACCTTTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGTGACTACAGGTGTACACCACCATGCCAGGCTAATTTTTGTATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:959 NH:i:1
+chr5_814915_815483_0:0:0_0:0:0_3c1 1123 chr5 814915 199 100M = 815384 569 CAACCTTTGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTCCTGAGTAGCTGTGACTACAGGTGTACACCACCATGCCAGGCTAATTTTTGTATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:959 NH:i:1
chr5_814915_815483_0:0:0_0:0:0_3c1 147 chr5 815384 199 100M = 814915 -569 ACAACCGAGCGTGGTGGCGCATGCCTGTAATCCTAGCTACTCAGGAGTCTGAGGCAGGAGACTCACTTGAACCCAGGAGCTGGAGGTGGCAGTGAGCCGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:959 NH:i:1
-chr5_736451_736922_0:0:0_0:0:0_3c2 99 chr5 736451 199 100M = 736823 472 CTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGGCCATCTGGCCAACACGGTGAAACCCTGACTCTACTAAAAGCACCAAAAATTAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:960 NH:i:1
+chr5_736451_736922_0:0:0_0:0:0_3c2 1123 chr5 736451 199 100M = 736823 472 CTTTGGGAGGCCGAGGCGGGCGGATCACGAGGTCAGGAGATCGAGGCCATCTGGCCAACACGGTGAAACCCTGACTCTACTAAAAGCACCAAAAATTAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:960 NH:i:1
chr5_736451_736922_0:0:0_0:0:0_3c2 147 chr5 736823 199 100M = 736451 -472 GTGAGCCAAGATCGTGTCATTGCACTCCAGCCTGGGCGACAGAGGGAGACTCCGACTTCAACAAAAAAAAAAAAAAAGGAAATTGAAGGTCTATGTCTAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:960 NH:i:1
-chr5_497050_497591_0:0:0_0:0:0_3c3 99 chr5 497050 199 100M = 497492 542 GGGCCGAGGTGGCCATCTGCGGTTCTGTGTGGCCCCAGGTTCTCCTCAAACGGCCTGCTCTGGGCACTGGCCATGAAAATGGCCGTGGAGGAGATCAACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:961 NH:i:1
+chr5_497050_497591_0:0:0_0:0:0_3c3 1123 chr5 497050 199 100M = 497492 542 GGGCCGAGGTGGCCATCTGCGGTTCTGTGTGGCCCCAGGTTCTCCTCAAACGGCCTGCTCTGGGCACTGGCCATGAAAATGGCCGTGGAGGAGATCAACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:961 NH:i:1
chr5_497050_497591_0:0:0_0:0:0_3c3 147 chr5 497492 199 100M = 497050 -542 ATGGAGCTGCTGAGCGCCCGGGAGACCTTCCCCTCCTTCTTCCGCACCGTGCCCAGCGACCGTGTGCAGCTGACGGCCGCCGCGGAGCTGCTGCAGGAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:961 NH:i:1
-chr5_488005_488574_0:0:0_0:0:0_3c4 99 chr5 488005 199 100M = 488475 570 GTGGGGGAGGGGGAGGGACGGGGTGGGGAAGGGATGGTCCAGGGGAGGAGAAGAAGCGGGGCGGGCGGAGGGAGGACAGGCAGCAGGGAGGGACAGGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:962 NH:i:1
+chr5_488005_488574_0:0:0_0:0:0_3c4 1123 chr5 488005 199 100M = 488475 570 GTGGGGGAGGGGGAGGGACGGGGTGGGGAAGGGATGGTCCAGGGGAGGAGAAGAAGCGGGGCGGGCGGAGGGAGGACAGGCAGCAGGGAGGGACAGGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:962 NH:i:1
chr5_488005_488574_0:0:0_0:0:0_3c4 147 chr5 488475 199 100M = 488005 -570 AGGCTGGGAGGACCCCACGGAGGACCCAGAGAGCAGGGGAGGGCAGTGGGGCCCATGCAAACAGCTGCAAAATGCCATGGGGTGTGCACAGGCCAGGGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:962 NH:i:1
-chr5_262334_262810_0:0:0_0:0:0_3c5 99 chr5 262334 199 100M = 262711 477 TGGCTAATTTTTTGATTTTTTGTAGAGACAGGGTCTCGCTATGAAACAAAACTGAAGCATGATGTTGCTGGTCTTGAACTCCTGGGCTCAAACAATCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:963 NH:i:1
+chr5_262334_262810_0:0:0_0:0:0_3c5 1123 chr5 262334 199 100M = 262711 477 TGGCTAATTTTTTGATTTTTTGTAGAGACAGGGTCTCGCTATGAAACAAAACTGAAGCATGATGTTGCTGGTCTTGAACTCCTGGGCTCAAACAATCCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:963 NH:i:1
chr5_262334_262810_0:0:0_0:0:0_3c5 147 chr5 262711 199 100M = 262334 -477 CATGCCTGGCTAATTTTTGTATTTTTAGTAGAGACAGGGTTTCACCATGTTGGCCAGCCTGGTCTCACTTCTGACCTCAAGTGATCCGCCCACCTTGGCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:963 NH:i:1
-chr5_177010_177467_0:0:0_0:0:0_3c6 99 chr5 177010 199 100M = 177368 458 TGAAGCAATCACCCCCCACTCCCCCTCCCTGGCCCCCGGTAACGGGGTCGGCTTCCTGTGCTGCGGCTCTACCAGTTCCGGTCGTTTCCCCTGCGGTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:964 NH:i:1
+chr5_177010_177467_0:0:0_0:0:0_3c6 1123 chr5 177010 199 100M = 177368 458 TGAAGCAATCACCCCCCACTCCCCCTCCCTGGCCCCCGGTAACGGGGTCGGCTTCCTGTGCTGCGGCTCTACCAGTTCCGGTCGTTTCCCCTGCGGTGAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:964 NH:i:1
chr5_177010_177467_0:0:0_0:0:0_3c6 147 chr5 177368 199 100M = 177010 -458 CATGCCTAGAAGTGGAACTGCTGGGTTCCCAATAATTCTGTTGAACGTTTTGAGCATCGCGGCGGCCGCACTGTTTTACATTCTCAACAGCAATGCATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:964 NH:i:1
chr5_177822_178311_0:0:0_0:0:0_3c7 83 chr5 178212 199 100M = 177822 -490 CACGCCTGTAATCCCAGCAGTTTCAGAGGCCGAGGCAGGTGGATCACTTGAGCCCAGGAGTTCGAGATCAGCCTGGGCGACATGGCGAGACCCCCATCTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:965 NH:i:1
chr5_177822_178311_0:0:0_0:0:0_3c7 163 chr5 177822 199 100M = 178212 490 GGCCTGGCCTTTATTTTTATTATTACAGTCATACCAGCAGGAAATAGCATCTCACTGGGGTTTTGATTTGCATTTCCCCAATTAATAATGATGTTGAACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:965 NH:i:1
chr5_366788_367297_0:0:0_0:0:0_3c8 83 chr5 367198 199 100M = 366788 -510 CCCGGGGCTCACACGGGAGGAGCGCGCCGAGCCAGTACCCAGCCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCAGCCCCGAGCCCAGTACCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:966 NH:i:1
chr5_366788_367297_0:0:0_0:0:0_3c8 163 chr5 366788 199 100M = 367198 510 ACCCGCCCGCCTCCCCCGGGCTCCGCGACGCACGCTGGGGCCCCCGCCAGGCCCGGAGGGTCGCGCTCCAGGTAAAGCGCGCGGGGCGGGCCCGGGAGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:966 NH:i:1
-chr5_112301_112816_0:0:0_0:0:0_3c9 99 chr5 112301 199 100M = 112717 516 AGTTCAAGACCAGCCTGGCCAACGTGGCAAAACCCCGTCTCTACTAAAATACCAAAATTAGCTGGGTGTGGTGGTGGGCGCCTGTAAACCCAGCTCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:967 NH:i:1
+chr5_112301_112816_0:0:0_0:0:0_3c9 1123 chr5 112301 199 100M = 112717 516 AGTTCAAGACCAGCCTGGCCAACGTGGCAAAACCCCGTCTCTACTAAAATACCAAAATTAGCTGGGTGTGGTGGTGGGCGCCTGTAAACCCAGCTCCTTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:967 NH:i:1
chr5_112301_112816_0:0:0_0:0:0_3c9 147 chr5 112717 199 100M = 112301 -516 TATCACCACCCAAGAGGACATGGGAGGAACAGAGGCTGTGGCCCCTGCTGTGAGTGCCCCCCAGAAAGGGGGTCCCGGCTCTGTGCATGTGACATGTGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:967 NH:i:1
chr5_825607_826094_0:0:0_0:0:0_3ca 83 chr5 825995 199 100M = 825607 -488 CAGGCTGCACCGGGCATGGGAATCCGCCAGCTGCGAGATTGGGGGTAAAGAGCTCAGACATGGTCAGAAGCCTCTGCCTAACACACGGCTCCAGTAGCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:968 NH:i:1
chr5_825607_826094_0:0:0_0:0:0_3ca 163 chr5 825607 199 100M = 825995 488 ATTTGAGCCCAGGAAGTCGAGGCTGCAGTGAGCCAAGATCATGCCACTGTACTCCAGCCTGGGTGACAGACAGAGCAAGACTGTCTCAAAAAAATAAAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:968 NH:i:1
@@ -1145,27 +1145,27 @@ chr5_402355_402861_0:0:0_0:0:0_3cb 83 chr5 402762 199 100M = 402355 -507 TGCAGCC
chr5_402355_402861_0:0:0_0:0:0_3cb 163 chr5 402355 199 100M = 402762 507 GAGGTCTCCTCCCAGCCCCCACCCAGAGGACCTTCCCACAGCCTTGGGAGCTGAAACCCAGGCCACCCCATCAAGTTGGCCTCTGTGGGTGTACACACTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:969 NH:i:1
chr5_951675_952261_0:0:0_0:0:0_3cc 83 chr5 952162 199 100M = 951675 -587 CATGTGACAGACAATCTGTCCTTCAAACCACCCAGGGCCACAGTGAGCCTCTGCACTGTTACTTTAAAAACGTAAATTGTTTAAAGACAAATTTAAATGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:970 NH:i:1
chr5_951675_952261_0:0:0_0:0:0_3cc 163 chr5 951675 199 100M = 952162 587 GTAGAGGTGGGAACGGGGACTGGCATACAACACCCTGTGAGTATCTGTGAGACAAGTGGTCAACACAGAGAAGTTTCCCATCGGGAGTTTTCTGTATCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:970 NH:i:1
-chr5_439064_439470_0:0:0_0:0:0_3cd 99 chr5 439064 199 100M = 439371 407 AGGGGGGACGAGCGGGCCCGAGGACCCGGCGCAGGCCTGCGAGCGCGCTCACCTCTCCCTGTCGCTGCGCGTGGGCCGCCGCCGCGCTCAGGCTCGCCGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:971 NH:i:1
+chr5_439064_439470_0:0:0_0:0:0_3cd 1123 chr5 439064 199 100M = 439371 407 AGGGGGGACGAGCGGGCCCGAGGACCCGGCGCAGGCCTGCGAGCGCGCTCACCTCTCCCTGTCGCTGCGCGTGGGCCGCCGCCGCGCTCAGGCTCGCCGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:971 NH:i:1
chr5_439064_439470_0:0:0_0:0:0_3cd 147 chr5 439371 199 100M = 439064 -407 TAGGCAGACTGGCGGACCGCGCCCACGACTCTCTGTCGCCGTCGACCCCGGGGTGGGGGATGGGGCGGGGGGCGGGGCGGCGCTTGCGCACTGGCGGGCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:971 NH:i:1
-chr5_795059_795619_0:0:0_0:0:0_3ce 99 chr5 795059 199 100M = 795520 561 CCGCAGCCAACCGCGCTCTCCTCTTCGCAGAGTGCGCGCGCAGGATGAAGAAGTGCATCAGGTGCCAGGTGGTCGTCAGCAAGAAACTGCGCCCAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:972 NH:i:1
+chr5_795059_795619_0:0:0_0:0:0_3ce 1123 chr5 795059 199 100M = 795520 561 CCGCAGCCAACCGCGCTCTCCTCTTCGCAGAGTGCGCGCGCAGGATGAAGAAGTGCATCAGGTGCCAGGTGGTCGTCAGCAAGAAACTGCGCCCAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:972 NH:i:1
chr5_795059_795619_0:0:0_0:0:0_3ce 147 chr5 795520 199 100M = 795059 -561 ACTCCAGCCCGCACACCACCTTATGCCTGATTTCCACGGCTCACCTCCTGCCCGCACCCGGGCCCCACCTCTGCCTCCAAATCACCCACCCCGCCAGCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:972 NH:i:1
-chr5_968538_969044_0:0:0_0:0:0_3cf 99 chr5 968538 199 100M = 968945 507 CATGAACAAAAACACTGTCACAGACCACAGTTGAGAGTGGAGATGCCTGGCCCCAACCGAATGCAGGCCACATCACCCCTGGCACCCATTCCCTTTGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:973 NH:i:1
+chr5_968538_969044_0:0:0_0:0:0_3cf 1123 chr5 968538 199 100M = 968945 507 CATGAACAAAAACACTGTCACAGACCACAGTTGAGAGTGGAGATGCCTGGCCCCAACCGAATGCAGGCCACATCACCCCTGGCACCCATTCCCTTTGTCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:973 NH:i:1
chr5_968538_969044_0:0:0_0:0:0_3cf 147 chr5 968945 199 100M = 968538 -507 CCTGGGCAACAGAGCGAGATCCTGACTGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAACAGAAAGAACAAACCACTTTTAGACATTCCAAATTCAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:8 SD:i:0 SN:i:973 NH:i:1
chr5_142453_142979_0:0:0_0:0:0_3d0 83 chr5 142564 199 100M = 142453 -211 TCCCGCTCCCCCCACCAACCCCGGGAACCGCCTCCCGCTCCCCCCACCAACCCCGGGAACCGCCTCCCGCTCCCCCCACCAACCCCGGGAACCGCCTCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:974 NH:i:1
chr5_142453_142979_0:0:0_0:0:0_3d0 163 chr5 142453 199 100M = 142564 211 AACCCCGGGAACCGCCTCCCGCTCCCCCCACCAACCCCGGGAACCGCCTCCCGCTCCCCCCACCAACCCCGGGAACCGCCTCCCGCTCCCCCCGCAACCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:974 NH:i:1
-chr5_79881_80393_0:0:0_0:0:0_3d1 99 chr5 79881 199 100M = 80294 513 GCCAGGGTCACCAGGTGAGCAAGGCTGGGGAACAATGGAGGCGGAGCCCCGCGACTGCCCAGCTCACTGCCCCCAGTGTCCAGGCTGTGGTGCAGGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:975 NH:i:1
+chr5_79881_80393_0:0:0_0:0:0_3d1 1123 chr5 79881 199 100M = 80294 513 GCCAGGGTCACCAGGTGAGCAAGGCTGGGGAACAATGGAGGCGGAGCCCCGCGACTGCCCAGCTCACTGCCCCCAGTGTCCAGGCTGTGGTGCAGGCGTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:975 NH:i:1
chr5_79881_80393_0:0:0_0:0:0_3d1 147 chr5 80294 199 100M = 79881 -513 AGGCTGAGACAGGAGAATCACTTGAACCCGGGAGGCGGAGGTTATGGAGAGCCAAGATCGCACCACTGCACCCCAGCCTGGGCAACAAGAGTAAAACTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:975 NH:i:1
chr5_147711_148244_0:0:0_0:0:0_3d2 83 chr5 148145 199 100M = 147711 -534 CTGAGAAAGAAGTAGGGAGAGGCGTGGCTGGAGGGGCCGGAGACCTCTCTAAGGAGGAGGCTTGGACCGAGAAGCTCCTGAGAGTTCAGGGCACGATACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:976 NH:i:1
chr5_147711_148244_0:0:0_0:0:0_3d2 163 chr5 147711 199 100M = 148145 534 ACGACTTCCTGGCACGGGGCCGGGCTGAGGTGGGGTCAGACACGGGCCGGAGGACGCTGACCAGCAGGCAGCACCGCTCAGTTCCCGTGCCTCCGCCACC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:976 NH:i:1
-chr5_905484_905938_0:0:0_0:0:0_3d3 99 chr5 905484 199 100M = 905839 455 GCTCCAGCCTGGGTGACACAGCAAGACTCCGTCTCAAAAAGAAAAGAAAAAAAAAAAACAAAACAACATATTTCACAGAGAAGAATTTATGTTTTTGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:977 NH:i:1
+chr5_905484_905938_0:0:0_0:0:0_3d3 1123 chr5 905484 199 100M = 905839 455 GCTCCAGCCTGGGTGACACAGCAAGACTCCGTCTCAAAAAGAAAAGAAAAAAAAAAAACAAAACAACATATTTCACAGAGAAGAATTTATGTTTTTGGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:977 NH:i:1
chr5_905484_905938_0:0:0_0:0:0_3d3 147 chr5 905839 199 100M = 905484 -455 CTTAAGGTCCCTCCTGAAGCTGCAGTCACACCATGGACCAGGGCTGTGACCTCATCCGAAGGCTCAACTGGGGCTGAGGCCCACCTCTGAGCTCACTCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:977 NH:i:1
chr5_154518_154974_0:0:0_0:0:0_3d4 83 chr5 154875 199 100M = 154518 -457 TTTTTGATAATGCCATTTGAAACATAAGTTTTAAATTTTGATGATGTCTAATTTTTTTCTTTGGTCACTTATGCTTCTGGTATCTGTTTTAGTCTATTTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:978 NH:i:1
chr5_154518_154974_0:0:0_0:0:0_3d4 163 chr5 154518 199 100M = 154875 457 GTTGATTGTTTCTGGAGTTCGTGGTTGATTGTTTCTGGAGTTCTGGGTTGACTGTTTCTGGAGTTCAGGGTTGATTGTTTCTGGAATTTGGGGTTGATTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:978 NH:i:1
-chr5_740421_740925_0:0:0_0:0:0_3d5 99 chr5 740421 199 100M = 740826 505 GAGCGATGCGAAACGCCGCGCTTGCCGGAGATAGCATTGAGGGCCCTTCCGCGGCCCGGCGTTTCGTCTCTTGGGACGATTTTGTGCGTTCCCTCGGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:979 NH:i:1
+chr5_740421_740925_0:0:0_0:0:0_3d5 1123 chr5 740421 199 100M = 740826 505 GAGCGATGCGAAACGCCGCGCTTGCCGGAGATAGCATTGAGGGCCCTTCCGCGGCCCGGCGTTTCGTCTCTTGGGACGATTTTGTGCGTTCCCTCGGTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:979 NH:i:1
chr5_740421_740925_0:0:0_0:0:0_3d5 147 chr5 740826 199 100M = 740421 -505 GAGATTTGGGTCCAGTTACAGTAAATCCAGTCAACGCGAGTGTGTCATTCCCCAGCTATGCCTGTCTGCCTGGATGCAGCCTCGCGGTGGACAGTGGGCT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:979 NH:i:1
-chr5_189138_189655_0:0:0_0:0:0_3d6 99 chr5 189138 199 100M = 189556 518 GTGACTTAGTAGCTGAGGACTTGCTCTAGGGTCAGAGGGCACATGGCAGGAGAGTCACCCCGGCTGAACGAGGCTCTGCATCTTACCGAAGCCGCTCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:980 NH:i:1
+chr5_189138_189655_0:0:0_0:0:0_3d6 1123 chr5 189138 199 100M = 189556 518 GTGACTTAGTAGCTGAGGACTTGCTCTAGGGTCAGAGGGCACATGGCAGGAGAGTCACCCCGGCTGAACGAGGCTCTGCATCTTACCGAAGCCGCTCTAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:980 NH:i:1
chr5_189138_189655_0:0:0_0:0:0_3d6 147 chr5 189556 199 100M = 189138 -518 TGGGGGAGCCCAGGAGCTTCCGGTGGGACATGTGGAGCTCTGCTGTGGAAGTAGACCAGGGATATCTCGGGTGGCTGGGGTCCCTGGCTGCCGGGACAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:980 NH:i:1
-chr5_176013_176536_0:0:0_0:0:0_3d7 99 chr5 176013 199 100M = 176437 524 ATGTGCTGTGTGTAACAACAACAATTTGGGAACAGCCTAAGTATTCAGTGATGTGGGATGGGTTTTAACTAAATTAACTCTGTACTGTAACAGGCTCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:981 NH:i:1
+chr5_176013_176536_0:0:0_0:0:0_3d7 1123 chr5 176013 199 100M = 176437 524 ATGTGCTGTGTGTAACAACAACAATTTGGGAACAGCCTAAGTATTCAGTGATGTGGGATGGGTTTTAACTAAATTAACTCTGTACTGTAACAGGCTCATT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:981 NH:i:1
chr5_176013_176536_0:0:0_0:0:0_3d7 147 chr5 176437 199 100M = 176013 -524 ATCTACCCGCCTCGGTCTCTCAAAGTGCTGGGATTACAGGTGAGCCACTGCGCCCGGCCCATGCCCAGCTAATTTTTCGCATTTTTAGTAGGGACGGAGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:981 NH:i:1
chr5_306158_306676_0:0:0_0:0:0_3d8 83 chr5 306577 199 100M = 306158 -519 CCCATCCCCACCCCCGTCCCCGCCCCCAGCCCCATCCCTGCCCCCATCCCCAACGCCTTCCTCACACCATCCCCCGATCCCCATCCCTGTCCCCACCCCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:982 NH:i:1
chr5_306158_306676_0:0:0_0:0:0_3d8 163 chr5 306158 199 100M = 306577 519 GTTTCAATGTGTAAAGCAGTGAAATGGGTCCCAGAGAGTCCACAGGTGGAGAACGGCCTTCACCAGGCAGCCAGGGAACGGTGTGGGGTGCTCACCAACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:982 NH:i:1
@@ -1175,17 +1175,17 @@ chr5_818372_818819_0:0:0_0:0:0_3da 83 chr5 818720 199 100M = 818372 -448 TGTCAAC
chr5_818372_818819_0:0:0_0:0:0_3da 163 chr5 818372 199 100M = 818720 448 ATAATAAAGTGAAGTGCAATACAACAAGGTATGCCTGTACTCACTAACATCCCAAATGATGCTGCTAAAGATCTTAGCAAAAGAGGTACTTTGCCAATGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:984 NH:i:1
chr5_114240_114729_0:0:0_0:0:0_3db 83 chr5 114630 199 100M = 114240 -490 TCAGCCTGGCCTCTCAGTCTTGTGACCCCTCCCCAACCACTAGGAGCCCTCAGGCTGTGAACCAGAGAGATCCAGGGTACATGCTGGGGCACCAGAGGAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:985 NH:i:1
chr5_114240_114729_0:0:0_0:0:0_3db 163 chr5 114240 199 100M = 114630 490 CCAGCCCCATGGCCCCACACAGCCTGGCGTGTCCCCGAGTGGGGCTCTGATCAGCAGGGAAGGATCAGGAACACGGGCCCTCTCCCAAAATCCTGGGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:985 NH:i:1
-chr5_486788_487231_0:0:0_0:0:0_3dc 99 chr5 486788 199 100M = 487132 444 GGCCTCTTAAGGTGGCCACGGCCTGCTGGCTAAGTAGTGGGCACTGTGCTGTGGCCCTCAGCCTCCAACAGAGACAACGGCTGGTCTGTGGGGCCAACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:986 NH:i:1
+chr5_486788_487231_0:0:0_0:0:0_3dc 1123 chr5 486788 199 100M = 487132 444 GGCCTCTTAAGGTGGCCACGGCCTGCTGGCTAAGTAGTGGGCACTGTGCTGTGGCCCTCAGCCTCCAACAGAGACAACGGCTGGTCTGTGGGGCCAACAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:986 NH:i:1
chr5_486788_487231_0:0:0_0:0:0_3dc 147 chr5 487132 199 100M = 486788 -444 ACCATGCAGCCCCCAACACAGACCCTCATCTGCAGCCCTCCCACACTCCTGACAGCCAAGCAGCCCTGCCCAGCTGCTCCAGCCAGAGGCCTTGGCCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:986 NH:i:1
chr5_135803_136328_0:0:0_0:0:0_3dd 83 chr5 136229 199 100M = 135803 -526 GCCCCCAGAGGCACTCCTGACCCAGGACTTGGAGAGGGGCCTGCCCTGTGGCTGCGGAGCACGTGTGTGTATGTGTGTGCCCTCTCTGCCCTGCCCGCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:987 NH:i:1
chr5_135803_136328_0:0:0_0:0:0_3dd 163 chr5 135803 199 100M = 136229 526 GGGTGCCTGTGAGGAACCTGGGAAAAGTTGTGCATTACGCCAAGGTCCAGCTGCGGTTCCAGCACAGCCAGGTGGGGGCCGGGCTGGGTGGAGCACGCTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:987 NH:i:1
-chr5_838548_839015_0:0:0_0:0:0_3de 99 chr5 838548 199 100M = 838916 468 CCAACATGACGAAACCCCGTCTCTACTAAAAATATAAAAACTAGCAGGGCATGGTGGCCGGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:988 NH:i:1
+chr5_838548_839015_0:0:0_0:0:0_3de 1123 chr5 838548 199 100M = 838916 468 CCAACATGACGAAACCCCGTCTCTACTAAAAATATAAAAACTAGCAGGGCATGGTGGCCGGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGAAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:988 NH:i:1
chr5_838548_839015_0:0:0_0:0:0_3de 147 chr5 838916 199 100M = 838548 -468 CACCTTCTCGATGGGCTTTGTGGCTGCCGCTGTCGGGTGCCCAGGACTAATTCCATGCTACTTTCTTTTTCCTTTTTTTTTTTTTTTTTGAGACGGAGTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:988 NH:i:1
-chr5_97297_97802_0:0:0_0:0:0_3df 99 chr5 97297 199 100M = 97703 506 CGAGTTCCTGGCTCCAGGGGGAAGCGAGTGGTAAGTCTGTGAACAGAGCCCAGCTGTGGATTCTGTCAATGGGGTCAGGTCTCACCCTGTGGCTTCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:989 NH:i:1
+chr5_97297_97802_0:0:0_0:0:0_3df 1123 chr5 97297 199 100M = 97703 506 CGAGTTCCTGGCTCCAGGGGGAAGCGAGTGGTAAGTCTGTGAACAGAGCCCAGCTGTGGATTCTGTCAATGGGGTCAGGTCTCACCCTGTGGCTTCCAGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:989 NH:i:1
chr5_97297_97802_0:0:0_0:0:0_3df 147 chr5 97703 199 100M = 97297 -506 GCCGCGGTCCCCCCGACCCCGCTCCAGTAACGGCTCCTCCTGCCTGCAGCCCCCTCCTCCGTCTGCCTGGCCTCGGGAATGCAGCGTCCCTCGGCAGCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:989 NH:i:1
-chr5_327966_328457_0:0:0_0:0:0_3e0 99 chr5 327966 199 100M = 328358 492 CCTGGGAAGGAGCCCTGCGGAGCTGGGGGGTGGGGAGGCAGAGGTGGAGAGGCGAGAGTTGCCCGCGGCTGCCCCTACCCCTGCCCATGTCGCCCACACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:990 NH:i:1
+chr5_327966_328457_0:0:0_0:0:0_3e0 1123 chr5 327966 199 100M = 328358 492 CCTGGGAAGGAGCCCTGCGGAGCTGGGGGGTGGGGAGGCAGAGGTGGAGAGGCGAGAGTTGCCCGCGGCTGCCCCTACCCCTGCCCATGTCGCCCACACT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:990 NH:i:1
chr5_327966_328457_0:0:0_0:0:0_3e0 147 chr5 328358 199 100M = 327966 -492 CCTCCCATTGTCCCGTCGCCCTCGAAACTCTCCCAGAGACGGGCGAGTTGTCCACCTGTGCAGGTCTGAACTGACTTCCTGTGCCAGGGCCTCGCCTGAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:990 NH:i:1
-chr5_883361_883866_0:0:0_0:0:0_3e1 99 chr5 883361 199 100M = 883767 506 AAATATAAAGAATTTTTGGCCAGGTGCAGTGGCTCACGCTTGTTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCACGAGGTCAGGACATTAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:991 NH:i:1
+chr5_883361_883866_0:0:0_0:0:0_3e1 1123 chr5 883361 199 100M = 883767 506 AAATATAAAGAATTTTTGGCCAGGTGCAGTGGCTCACGCTTGTTAATCCCAGCACTTTGGGAGGCCGAGGCGGGTGGATCACGAGGTCAGGACATTAAGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:991 NH:i:1
chr5_883361_883866_0:0:0_0:0:0_3e1 147 chr5 883767 199 100M = 883361 -506 AAATGATGCTACAAATCTTAGCAAAAGAGGTACTTTGCCAATGCCTCTTACCAAATTACTAAAAAGGTTTCCTGAGTACATTACCATGCAAACCAAGAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:991 NH:i:1
chr5_461591_462132_0:0:0_0:0:0_3e2 83 chr5 462033 199 100M = 461591 -542 GGTGGCGGAGTCGATGCTGCTCGTGGACGGGGATGCTGTGCGGTCCAGCCTCTGGAGGATGGGGTGGAGCTGCTCGGTCCCGCAGGGGCTCCAGCATGGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:992 NH:i:1
chr5_461591_462132_0:0:0_0:0:0_3e2 163 chr5 461591 199 100M = 462033 542 GCGAACCTGCAGACCCCAGCTCCCGCCCCCACAAACGGGGTCCCGTGGCCTCCAGGAGCCCCACACGCACCTTTAGCAGCTCAGGCTCCCACGAGTCCAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:992 NH:i:1
@@ -1195,11 +1195,11 @@ chr5_85026_85485_0:0:0_0:0:0_3e4 83 chr5 85386 199 100M = 85026 -460 AGAACTCTATT
chr5_85026_85485_0:0:0_0:0:0_3e4 163 chr5 85026 199 100M = 85386 460 ACGGCGGCACCGTGGCCGTGGCAGAGGCACCATCCCTCCACCCTCACTCATGTAGGGATGTGGGGCACACGGACTGGAGCCCAAGGCCTTCGTGCTGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:994 NH:i:1
chr5_263242_263676_0:0:0_0:0:0_3e5 83 chr5 263577 199 100M = 263242 -435 GGTGCGATCTTGGCTCACTGCCAGCTCCACCTCTAGGGTTTTTAAGCAATTCTCTGCCCCAGCCGCCCGAGTAGCTGGGATTACAGGTGTGCACCACCAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:995 NH:i:1
chr5_263242_263676_0:0:0_0:0:0_3e5 163 chr5 263242 199 100M = 263577 435 AAAAAATCAGCTTTTGATATTGTTAAGTTTCTCCATTTTTCTCATGCTTTCTATGCCACTGATTTCTGTATTTGTCTTTATTTCCCTTTTTTGGCTTTGT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:995 NH:i:1
-chr5_629408_629885_0:0:0_0:0:0_3e6 99 chr5 629408 199 100M = 629786 478 ATGCTGGGAGGCCAAGGAGAGAGAATCACTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACATGTCGAACTCCACCTCTACTAAAAATACAAAAATTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:996 NH:i:1
+chr5_629408_629885_0:0:0_0:0:0_3e6 1123 chr5 629408 199 100M = 629786 478 ATGCTGGGAGGCCAAGGAGAGAGAATCACTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACATGTCGAACTCCACCTCTACTAAAAATACAAAAATTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:996 NH:i:1
chr5_629408_629885_0:0:0_0:0:0_3e6 147 chr5 629786 199 100M = 629408 -478 GTGTTCTTTGCTGGGTCCTCATGTGGCAGAGAGAGCAACTCAGCTCTCTCTGGTGTCCCTTATAAGGACACTCATCCTGCACTGCTCACAGCGGTGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:996 NH:i:1
chr5_549451_549920_0:0:0_0:0:0_3e7 83 chr5 549821 199 100M = 549451 -470 GCCTCCTGAGTTGCTGAGACTACAGGCATGTGCCACCACACCCGGCTAATTTTTGTATTTTAAGTAGAGACGGGGTTTCACCGCATTGGCCAGGATGGTC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:997 NH:i:1
chr5_549451_549920_0:0:0_0:0:0_3e7 163 chr5 549451 199 100M = 549821 470 GCCGTGAGGGAACAGGCCCTTGGACTTGGGACCTGGCCGGAGCACAGATCTCACAGGAGGTGGCGGTGCTGTGGCCGTCGCCCACTGCTCGGCTCCACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:997 NH:i:1
-chr5_833008_833478_0:0:0_0:0:0_3e8 99 chr5 833008 199 100M = 833379 471 TGTGACTCACCAGTCCATGATGTTGGTGGACAGTGCGGCCGAGAACCCCAGGACATTGAAGCTGATCTCAGTGGCCGTGCACAGCGCCAGCCCGCCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:998 NH:i:1
+chr5_833008_833478_0:0:0_0:0:0_3e8 1123 chr5 833008 199 100M = 833379 471 TGTGACTCACCAGTCCATGATGTTGGTGGACAGTGCGGCCGAGAACCCCAGGACATTGAAGCTGATCTCAGTGGCCGTGCACAGCGCCAGCCCGCCCATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:998 NH:i:1
chr5_833008_833478_0:0:0_0:0:0_3e8 147 chr5 833379 199 100M = 833008 -471 ACTGAGCCCCAGCCATGGGCAGGTGAGCCACAGAAACCTTCCTGATTCTGTGAACGCTCCTGTGAAACGGTGAGTGTGAAGGAGATGGTCCCCAGGGAAC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:998 NH:i:1
-chr5_752977_753345_0:0:0_0:0:0_3e9 99 chr5 752977 199 100M = 753246 369 ACTACAGGCGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTGGTAAAGATGGGGTTACACTGTGTTAGCCAGGATGGTCTCAGTCTCCTGACCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:999 NH:i:1
+chr5_752977_753345_0:0:0_0:0:0_3e9 1123 chr5 752977 199 100M = 753246 369 ACTACAGGCGCCCGCCACCACGCCCGGCTAATTTTTTGTATTTTTGGTAAAGATGGGGTTACACTGTGTTAGCCAGGATGGTCTCAGTCTCCTGACCTCG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:999 NH:i:1
chr5_752977_753345_0:0:0_0:0:0_3e9 147 chr5 753246 199 100M = 752977 -369 GATTACAGGCGTGGGCCACCGCGCCCAGCCAAAGACAAAGGTTTTTAAAGGAAAGATGAGAAGGATGACTTCATTGTTTTGAAATAATTATCCTTGGCTG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:999 NH:i:1
diff --git a/test/featureCounts/data/test-minimum-dup.ora b/test/featureCounts/data/test-minimum-dup.ora
new file mode 100644
index 0000000..3fa8570
--- /dev/null
+++ b/test/featureCounts/data/test-minimum-dup.ora
@@ -0,0 +1,9 @@
+# Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-o" "test-minimum-dup.ora" "-p" "-a" "test-minimum.GTF" "--ignoreDup" "test-chrname.sam"
+Geneid test-chrname.sam
+simu_gene1 8
+simu_gene2 5
+simu_gene3 6
+simu_gene4 3
+simu_gene5 21
+simu_gene6 14
+simu_gene7 82
diff --git a/test/featureCounts/data/test-minimum-5reduce.ora.summary b/test/featureCounts/data/test-minimum-dup.ora.summary
similarity index 61%
rename from test/featureCounts/data/test-minimum-5reduce.ora.summary
rename to test/featureCounts/data/test-minimum-dup.ora.summary
index 72c60d8..6f8f328 100644
--- a/test/featureCounts/data/test-minimum-5reduce.ora.summary
+++ b/test/featureCounts/data/test-minimum-dup.ora.summary
@@ -1,11 +1,12 @@
-Status corner-reduction.sam
-Assigned 5
-Unassigned_Ambiguity 0
+Status test-chrname.sam
+Assigned 139
+Unassigned_Ambiguity 1
Unassigned_MultiMapping 0
-Unassigned_NoFeatures 12
+Unassigned_NoFeatures 158
Unassigned_Unmapped 0
Unassigned_MappingQuality 0
Unassigned_FragementLength 0
Unassigned_Chimera 0
Unassigned_Secondary 0
Unassigned_Nonjunction 0
+Unassigned_Duplicate 302
diff --git a/test/featureCounts/featureCounts-test.sh b/test/featureCounts/featureCounts-test.sh
index f399f57..4e8cb64 100644
--- a/test/featureCounts/featureCounts-test.sh
+++ b/test/featureCounts/featureCounts-test.sh
@@ -1,4 +1,4 @@
-mkdr -p result
+mkdir -p result
sh test_minimal_example.sh
sh test_chr_aliases.sh
sh test_chr_inference.sh
diff --git a/test/featureCounts/result/test-minimum.FC b/test/featureCounts/result/test-minimum.FC
deleted file mode 100644
index 18888d0..0000000
--- a/test/featureCounts/result/test-minimum.FC
+++ /dev/null
@@ -1,9 +0,0 @@
-# Program:featureCounts v1.4.6-p4; Command:"../../bin/featureCounts" "-a" "data/test-minimum.GTF" "-o" "result/test-minimum.FC" "data/test-minimum.sam"
-Geneid Chr Start End Strand Length data/test-minimum.sam
-simu_gene1 chr3;chr3;chr3 100;20000;40000 10000;30000;89000 +;+;+ 68903 15
-simu_gene2 chr3;chr3 100010;102000 101000;131000 +;+ 29992 4
-simu_gene3 chr3;chr3;chr3;chr3 500010;502000;504000;600000 501000;503000;529000;669000 -;-;-;- 95994 10
-simu_gene4 chr3;chr3;chr3 602000;672000;702000 631000;699000;719000 +;+;+ 73003 2
-simu_gene5 chr4;chr4;chr4;chr4 20000;120000;200000;220000 100000;190000;210000;300000 -;-;-;- 240004 74
-simu_gene6 chr4;chr4 420000;500000 490000;560000 -;- 130002 30
-simu_gene7 chr5;chr5;chr5 120000;500000;970000 490000;960000;1000000 -;-;- 860003 254
diff --git a/test/featureCounts/result/test-minimum.FC.summary b/test/featureCounts/result/test-minimum.FC.summary
deleted file mode 100644
index 4df639a..0000000
--- a/test/featureCounts/result/test-minimum.FC.summary
+++ /dev/null
@@ -1,12 +0,0 @@
-Status data/test-minimum.sam
-Assigned 389
-Unassigned_Ambiguity 2
-Unassigned_MultiMapping 191
-Unassigned_NoFeatures 416
-Unassigned_Unmapped 0
-Unassigned_MappingQuality 0
-Unassigned_FragmentLength 0
-Unassigned_Chimera 0
-Unassigned_Secondary 0
-Unassigned_Nonjunction 0
-Unassigned_Duplicate 0
diff --git a/test/featureCounts/test_all.sh b/test/featureCounts/test_all.sh
deleted file mode 100644
index 510276f..0000000
--- a/test/featureCounts/test_all.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-
-sh test_minimal_example.sh
-sh test_chr_aliases.sh
-sh test_chr_inference.sh
-sh test_corner_cases.sh
-
diff --git a/test/featureCounts/test_corner_cases.sh b/test/featureCounts/test_corner_cases.sh
index 8326059..0f734f9 100644
--- a/test/featureCounts/test_corner_cases.sh
+++ b/test/featureCounts/test_corner_cases.sh
@@ -31,6 +31,8 @@ $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-mi
$SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-STR.ora data/test-minimum.GTF "-p -s 1 " "stranded read summarization"
$SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-UNSTR.ora data/test-minimum.GTF "-p -s 2 " "reversely stranded read summarization"
+# test 5' and 3' end extension
+$SH_CMD data/compare.sh data/test-chrname.sam data/test-minimum-dup.ora data/test-minimum.GTF " -p --ignoreDup " "Ignoring duplicate fragments"
$SH_CMD data/compare.sh data/corner-JUNC.sam data/corner-JUNC-ONLY.ora data/test-minimum.GTF "--countSplitAlignmentsOnly -O -f " "Junction reads only" FL
echo
diff --git a/test/subjunc/subjunc-test.sh b/test/subjunc/subjunc-test.sh
index 82257d4..c3ed6f1 100644
--- a/test/subjunc/subjunc-test.sh
+++ b/test/subjunc/subjunc-test.sh
@@ -2,6 +2,6 @@ mkdir result
../../bin/subread-buildindex -o ../small1 ../chr901.fa
-../../bin/subjunc -i ../small1 -o result/junctions.sam -r data/junction-reads-A.fq -R data/junction-reads-B.fq
+../../bin/subjunc --SAMoutput -i ../small1 -o result/junctions.sam -r data/junction-reads-A.fq -R data/junction-reads-B.fq
-../../bin/subjunc -i ../small1 -o result/junctionsNfusions.bam --BAMoutput -r data/junction-reads-A.fq -R data/junction-reads-B.fq --allJunctions
+../../bin/subjunc -i ../small1 -o result/junctionsNfusions.bam -r data/junction-reads-A.fq -R data/junction-reads-B.fq --allJunctions
diff --git a/test/subread-align/subread-align-test.sh b/test/subread-align/subread-align-test.sh
index bc50a10..f611a85 100644
--- a/test/subread-align/subread-align-test.sh
+++ b/test/subread-align/subread-align-test.sh
@@ -4,7 +4,7 @@ PYTHON_EXEC=python
rm test-tmp.log
mkdir -p result
-$SUBREAD_HOME/subread-buildindex -o ../small1 -M100 ../chr901.fa
+$SUBREAD_HOME/subread-buildindex -B -F -o ../small1 -M100 ../chr901.fa
md5sum ../small1.00.b.array >> test-tmp.log
md5sum ../small1.00.b.tab >> test-tmp.log
@@ -13,7 +13,7 @@ echo "*** SINGLE-END READS NO ERROR ******" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log
@@ -22,7 +22,7 @@ echo "*** SINGLE-END READS NO ERROR NO DUP ******" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -u -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -u -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log
@@ -33,7 +33,7 @@ echo "*** READS WITH NO ERROR ******" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -H -J
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -H -J
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >> test-tmp.log
echo >>test-tmp.log
@@ -42,7 +42,7 @@ echo "*** READS NO ERROR, NO DUPLICATED REPORT ******" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -u -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -Q -J
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -u -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -Q -J
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log
@@ -53,7 +53,7 @@ echo "*** READS WITH ONLY SEQUENCING ERROR ******" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -i ../small1 -r data/test-error-r1.fq -R data/test-error-r2.fq -o result/test-tmp.sam -H -J
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -i ../small1 -r data/test-error-r1.fq -R data/test-error-r2.fq -o result/test-tmp.sam -H -J
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log
echo >>test-tmp.log
@@ -63,7 +63,7 @@ echo "*** SUBREAD IS RUN WITH LONG INDEL DETECTION ***" >> test-tmp.log
echo "*************************************************" >> test-tmp.log
echo >>test-tmp.log
-$SUBREAD_HOME/subread-align -i ../small1 --gzFASTQinput -r data/test-err-mut-r1.fq.gz -R data/test-err-mut-r2.fq.gz -o result/test-tmp.sam -H -J --rg-id MyTestGroup --rg SM:sample1 --rg TP:1 --rg XX:YY
+$SUBREAD_HOME/subread-align --SAMoutput -t0 -i ../small1 --gzFASTQinput -r data/test-err-mut-r1.fq.gz -R data/test-err-mut-r2.fq.gz -o result/test-tmp.sam -H -J --rg-id MyTestGroup --rg SM:sample1 --rg TP:1 --rg XX:YY
cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log
cat test-tmp.log
diff --git a/test/subread-align/test-tmp.log b/test/subread-align/test-tmp.log
index 268b047..48da4e0 100644
--- a/test/subread-align/test-tmp.log
+++ b/test/subread-align/test-tmp.log
@@ -1,49 +1,49 @@
-a87c3f05f25477325ea8f2742f05231b ../small1.00.b.array
-b5bf8ea4c82873ab98cc52e334c5e53b ../small1.00.b.tab
+eacaf0816806c7b7c8294674ce706610 ../small1.00.b.array
+aba1f9dc3531e06248aea9b8ee98f8a7 ../small1.00.b.tab
*************************************************
*** SINGLE-END READS NO ERROR ******
*************************************************
-unmatched= 549 ; matched= 19418 ; unmapped= 31 ; reads= 19998 ;NN= 0
-accuracy= 0.972504632644 ; sensitivity= 0.998449844984
+unmatched= 533 ; matched= 19401 ; unmapped= 64 ; reads= 19998 ;NN= 0
+accuracy= 0.973261763821 ; sensitivity= 0.996799679968
paired_match= 0 ; paired= 0.0
*************************************************
*** SINGLE-END READS NO ERROR NO DUP ******
*************************************************
-unmatched= 43 ; matched= 18774 ; unmapped= 1181 ; reads= 19998 ;NN= 0
-accuracy= 0.997714832332 ; sensitivity= 0.940944094409
+unmatched= 2 ; matched= 18733 ; unmapped= 1263 ; reads= 19998 ;NN= 0
+accuracy= 0.999893247932 ; sensitivity= 0.936843684368
paired_match= 0 ; paired= 0.0
*************************************************
*** READS WITH NO ERROR ******
*************************************************
-unmatched= 603 ; matched= 39342 ; unmapped= 51 ; reads= 39996 ;NN= 0
-accuracy= 0.984904243335 ; sensitivity= 0.998724872487
-paired_match= 39260 ; paired= 0.997915713487
+unmatched= 549 ; matched= 39361 ; unmapped= 86 ; reads= 39996 ;NN= 0
+accuracy= 0.98624404911 ; sensitivity= 0.997849784978
+paired_match= 39306 ; paired= 0.998602677777
*************************************************
*** READS NO ERROR, NO DUPLICATED REPORT ******
*************************************************
-unmatched= 165 ; matched= 38786 ; unmapped= 1045 ; reads= 39996 ;NN= 0
-accuracy= 0.9957639085 ; sensitivity= 0.973872387239
-paired_match= 38690 ; paired= 0.997524880111
+unmatched= 0 ; matched= 38470 ; unmapped= 1526 ; reads= 39996 ;NN= 0
+accuracy= 1.0 ; sensitivity= 0.961846184618
+paired_match= 38432 ; paired= 0.999012217312
*************************************************
*** READS WITH ONLY SEQUENCING ERROR ******
*************************************************
-unmatched= 655 ; matched= 33542 ; unmapped= 5803 ; reads= 40000 ;NN= 0
-accuracy= 0.980846273065 ; sensitivity= 0.854925
-paired_match= 28692 ; paired= 0.855405163675
+unmatched= 458 ; matched= 33789 ; unmapped= 5753 ; reads= 40000 ;NN= 0
+accuracy= 0.986626565831 ; sensitivity= 0.856175
+paired_match= 29038 ; paired= 0.859392109858
*************************************************
*** READS WITH SEQUENCING ERROR AND MUTATION ***
*** SUBREAD IS RUN WITH LONG INDEL DETECTION ***
*************************************************
-unmatched= 628 ; matched= 33205 ; unmapped= 6167 ; reads= 40002 ;NN= 0
-accuracy= 0.981438240771 ; sensitivity= 0.845782710864
-paired_match= 28042 ; paired= 0.84451136877
+unmatched= 458 ; matched= 33789 ; unmapped= 5753 ; reads= 40000 ;NN= 0
+accuracy= 0.986626565831 ; sensitivity= 0.856175
+paired_match= 29038 ; paired= 0.859392109858
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/subread.git
More information about the debian-med-commit
mailing list