[med-svn] [Git][med-team/gmap][master] 4 commits: routine-update: New upstream version
Nilesh Patra
gitlab at salsa.debian.org
Wed Oct 21 06:56:20 BST 2020
Nilesh Patra pushed to branch master at Debian Med / gmap
Commits:
0c0c4b2c by Nilesh Patra at 2020-10-21T11:16:26+05:30
routine-update: New upstream version
- - - - -
91beb530 by Nilesh Patra at 2020-10-21T11:16:27+05:30
New upstream version 2020-10-14+ds
- - - - -
c571e84a by Nilesh Patra at 2020-10-21T11:16:39+05:30
Update upstream source from tag 'upstream/2020-10-14+ds'
Update to upstream version '2020-10-14+ds'
with Debian dir 21698ca0308be32dda7b7cc3e3ea65fae0b34395
- - - - -
c42803c6 by Nilesh Patra at 2020-10-21T11:17:19+05:30
routine-update: Ready to upload to unstable
- - - - -
11 changed files:
- ChangeLog
- VERSION
- debian/changelog
- src/distant-rna.c
- src/genome128_hr.c
- src/gmap.c
- src/kmer-search.c
- src/pair.c
- src/pair.h
- src/stage3hr.c
- src/substring.c
Changes:
=====================================
ChangeLog
=====================================
@@ -1,3 +1,31 @@
+2020-10-15 twu
+
+ * VERSION, index.html: Updated version number
+
+ * gmap.c: Added option --nomargin. Removed -N abbreviation for --nolengths
+
+ * pair.c, pair.h: Fixed printing of ruler for long wrap lengths
+
+ * kmer-search.c: Fixed parameters to Genome_count_mismatches_substring and
+ Substring_new in single_hits_gminus. Simplified indentation in
+ search_transcriptome_ends
+
+2020-10-14 twu
+
+ * stage3hr.c: Changed remap_transcriptome_p into a macro
+
+ * genome128_hr.c: Added assertions that pos5 < pos3
+
+ * distant-rna.c: Ensuring that pos5 < pos3 when checking for mismatches
+
+ * substring.c: Checking for pos3 == pos5 and not computing mismatches for
+ that case
+
+2020-10-13 twu
+
+ * kmer-search.c: Initializing abortp in the correct place for
+ transcriptome-guided genomic alignment
+
2020-10-12 twu
* fa_coords.pl.in: Checking for circular and altscaffold arguments to be
=====================================
VERSION
=====================================
@@ -1 +1 @@
-2020-10-12
\ No newline at end of file
+2020-10-14
\ No newline at end of file
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+gmap (2020-10-14+ds-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+
+ -- Nilesh Patra <npatra974 at gmail.com> Wed, 21 Oct 2020 11:17:19 +0530
+
gmap (2020-10-12+ds-1) unstable; urgency=medium
* Team upload.
=====================================
src/distant-rna.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: distant-rna.c 223081 2020-09-13 14:21:03Z twu $";
+static char rcsid[] = "$Id: distant-rna.c 223174 2020-10-15 01:50:42Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -35,7 +35,7 @@ static char rcsid[] = "$Id: distant-rna.c 223081 2020-09-13 14:21:03Z twu $";
/* For transcript splicing, need to increase MAXCHIMERAPATHS */
/* #define MAXCHIMERAPATHS 100 */
#define MAXCHIMERAPATHS 10000
-#define MIN_SPLICE_EXON_LENGTH 0
+#define MIN_SPLICE_EXON_LENGTH 1 /* Needs to be greater than 0 */
#define MIN_FRAGLENGTH 25
static Univ_IIT_T chromosome_iit;
=====================================
src/genome128_hr.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 222927 2020-06-29 16:07:54Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 223175 2020-10-15 01:50:58Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -478,10 +478,8 @@ print_blocks_alt (Genomecomp_T *blocks, Genomecomp_T *alt_blocks, Univcoord_T st
Genomecomp_T *ref_ptr, *snp_ptr;
Univcoord_T startblocki, endblocki;
Genomecomp_T high, low, flags, snpmask;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
int startcolumni, endcolumni;
int startdiscard32, enddiscard32;
-#endif
/* sequence = (char *) CALLOC(length+1,sizeof(char)); */
@@ -492,6 +490,9 @@ print_blocks_alt (Genomecomp_T *blocks, Genomecomp_T *alt_blocks, Univcoord_T st
endcolumni = (endpos % 128) / 32;
endblocki = endpos/128U*12 + endcolumni;
#else
+ startcolumni = (startpos % 128) / 32;
+ endcolumni = (endpos % 128) / 32;
+
startblocki = startpos/128U*12;
endblocki = endpos/128U*12;
#endif
@@ -5006,6 +5007,8 @@ int
Genome_count_mismatches_substring (int *ref_mismatches, Genome_T ome, Genome_T ome_alt, Compress_T query_compress,
Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
+ assert(pos5 < pos3);
+
if (alt_blocks == NULL) {
*ref_mismatches = Genome_count_mismatches_substring_ref(ome,query_compress,left,pos5,pos3,plusp,genestrand);
return *ref_mismatches;
@@ -5695,6 +5698,8 @@ Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Genome_T om
int i;
#endif
+ assert(pos5 < pos3);
+
if (ome_alt == NULL) {
nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,ome,query_compress,
left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
@@ -5727,6 +5732,8 @@ Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Genome
int i;
#endif
+ assert(pos5 < pos3);
+
if (ome_alt == NULL) {
nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,ome,query_compress,
left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
@@ -6282,6 +6289,8 @@ Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Genome_T o
int i;
#endif
+ assert(pos5 < pos3);
+
if (ome_alt == NULL) {
nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,ome,query_compress,
left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
@@ -6312,6 +6321,8 @@ Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Genom
int i;
#endif
+ assert(pos5 < pos3);
+
if (ome_alt == NULL) {
nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,ome,query_compress,
left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
@@ -6360,6 +6371,7 @@ Genome_first_kmer_left (int *nmismatches5, Genome_T ome, Compress_T query_compre
printf("\n");
);
+ assert(pos5 < pos3);
*nmismatches5 = -1;
@@ -6694,6 +6706,8 @@ Genome_first_kmer_right (int *nmismatches3, Genome_T ome, Compress_T query_compr
printf("\n");
);
+ assert(pos5 < pos3);
+
*nmismatches3 = -1;
startblocki = (left+pos5)/128U*12;
@@ -7024,6 +7038,7 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
printf("\n");
);
+ assert(pos5 < pos3);
startblocki = (left+pos5)/128U*12;
startcolumni = ((left+pos5) % 128) / 32;
@@ -7618,6 +7633,8 @@ Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compres
Univcoord_T left, int pos5, int pos3,
bool plusp, int genestrand) {
+ assert(pos5 < pos3);
+
if (alt_blocks == NULL) {
return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress,
left,pos5,pos3,plusp,genestrand);
=====================================
src/gmap.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 223116 2020-10-06 18:07:46Z twu $";
+static char rcsid[] = "$Id: gmap.c 223199 2020-10-15 22:26:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -461,6 +461,7 @@ static bool strictp = true;
/* static int proteinmode = 1; */
static bool uncompressedp = false;
static bool nointronlenp = false;
+static int print_margin_p = true;
static int invertmode = 0;
static int ngap = 3;
static int wraplength = 50;
@@ -654,7 +655,8 @@ static struct option long_options[] = {
{"direction", required_argument, 0, 'z'}, /* sense_try, sense_filter */
#endif
{"tolerant", no_argument, 0, 'Y'}, /* strictp */
- {"nolengths", no_argument, 0, 'N'}, /* nointronlenp */
+ {"nolengths", no_argument, 0, 0}, /* nointronlenp */
+ {"nomargin", no_argument, 0, 0}, /* print_margin_p */
{"invertmode", required_argument, 0, 'I'}, /* invertmode */
{"introngap", required_argument, 0, 'i'}, /* ngap */
{"wraplength", required_argument, 0, 'l'}, /* wraplength */
@@ -5701,9 +5703,9 @@ parse_command_line (int argc, char *argv[], int optind) {
while ((opt = getopt_long(argc,argv,
#ifdef PMAP
- "q:D:a:d:k:g:2B:K:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
+ "q:D:a:d:k:g:2B:K:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYI:i:l:",
#else
- "q:D:d:k:g:2B:K:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YNI:i:l:",
+ "q:D:d:k:g:2B:K:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YI:i:l:",
#endif
long_options, &long_option_index)) != -1) {
switch (opt) {
@@ -5878,6 +5880,10 @@ parse_command_line (int argc, char *argv[], int optind) {
output_buffer_size = atoi(check_valid_int(optarg));
} else if (!strcmp(long_name,"print-comment")) {
print_comment_p = true;
+ } else if (!strcmp(long_name,"nolengths")) {
+ nointronlenp = true;
+ } else if (!strcmp(long_name,"nomargin")) {
+ print_margin_p = false;
} else if (!strcmp(long_name,"failsonly")) {
if (nofailsp == true) {
fprintf(stderr,"Cannot specify both --nofails and --failsonly\n");
@@ -6330,7 +6336,6 @@ parse_command_line (int argc, char *argv[], int optind) {
#endif
case 'Y': strictp = false; break;
- case 'N': nointronlenp = true; break;
case 'I': invertmode = atoi(check_valid_int(optarg)); break;
case 'i': user_ngap = atoi(check_valid_int(optarg)); break;
case 'l': wraplength = atoi(check_valid_int(optarg)); break;
@@ -7030,7 +7035,7 @@ main (int argc, char *argv[]) {
donor_typeint,acceptor_typeint);
Dynprog_end_setup(splicesites,splicetypes,splicedists,nsplicesites,
trieoffsets_obs,triecontents_obs,trieoffsets_max,triecontents_max);
- Pair_setup(novelsplicingp,splicing_iit,trim_indel_score,
+ Pair_setup(novelsplicingp,splicing_iit,trim_indel_score,print_margin_p,
gff3_separators_p,sam_insert_0M_p,force_xs_direction_p,
md_lowercase_variant_p,/*snps_p*/genomecomp_alt ? true : false,
gff3_phase_swap_p,cdstype,sam_cigar_extended_p,cigar_action);
@@ -7739,7 +7744,8 @@ External map file options\n\
fprintf(stdout,"\
Alignment output options\n\
- -N, --nolengths No intron lengths in alignment\n\
+ --nolengths No intron lengths in alignment\n\
+ --nomargin No left margin in GMAP standard output (with the -A flag)\n\
-I, --invertmode=INT Mode for alignments to genomic (-) strand:\n\
0=Don't invert the cDNA (default)\n\
1=Invert cDNA and print genomic (-) strand\n\
=====================================
src/kmer-search.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: kmer-search.c 222813 2020-06-03 22:02:45Z twu $";
+static char rcsid[] = "$Id: kmer-search.c 223197 2020-10-15 22:23:47Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1368,17 +1368,21 @@ single_hits_gminus (int *found_score_overall, int *found_score_within_trims,
pos3 = (univdiagonal + queryend <= genomelength + querylength) ? queryend : (int) (genomelength - left);
debug2(printf("tr gminus query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
- querylength - queryend,querylength - querystart,alignstart,alignend,alignstart - path->chroffset,alignend - path->chroffset,
+ querystart,queryend,alignstart,alignend,alignstart - path->chroffset,alignend - path->chroffset,
left - path->chroffset,alignend - path->chroffset,querylength,queryend));
+
+ /* Verified that (querylength - pos3) and (querylength - pos5)
+ are correct here, possibly because of how querystart and
+ queryend are defined above*/
nmismatches =
Genome_count_mismatches_substring(&ref_nmismatches,genomebits,genomebits_alt,query_compress_rev,left,
- pos5,pos3,/*plusp*/false,/*genestrand*/0);
+ querylength - pos3,querylength - pos5,/*plusp*/false,/*genestrand*/0);
nmismatches_whole += nmismatches;
nmismatches_refdiff += ref_nmismatches;
debug2(printf("mismatches rev from %d to %d: %d\n",pos5,pos3,nmismatches));
- if ((substring = Substring_new(nmismatches,ref_nmismatches,left,
- querylength - pos3,querylength - pos5,querylength,
+ /* Verified that pos5 and pos3 are correct */
+ if ((substring = Substring_new(nmismatches,ref_nmismatches,left,pos5,pos3,querylength,
/*plusp*/false,/*genestrand*/0,query_compress_rev,
path->chrnum,path->chroffset,path->chrhigh,path->chrlength,
/*splice_querystart_p*/false,/*splicetype_querystart*/NO_SPLICE,/*ambig_prob_querystart*/0.0,
@@ -1735,7 +1739,7 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
total_nmismatches = Genome_count_mismatches_substring(&ref_nmismatches,transcriptomebits,NULL,query_compress_fwd,
tr_left,/*pos5*/trim5,/*pos3*/queryend,/*plusp*/true,/*genestrand*/0);
- total_nmatches = querylength - total_nmismatches;
+ total_nmatches = (querylength - trim5 - trim3) - total_nmismatches;
debug2(printf("tplus diagonal %u => trimmed %d..%d, nmismatches: %d total, %d at 5', and %d at 3'. total_nmatches: %d\n",
tr_univdiagonal,trim5,querylength - trim3,total_nmismatches,nmismatches5,nmismatches3,
total_nmatches));
@@ -1935,14 +1939,13 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
for (q = tr_lefts, r = querystarts, s = queryends, t = adjustments; q != NULL;
q = Uintlist_next(q), r = Intlist_next(r), s = Intlist_next(s)) {
transcript_diag = Uintlist_head(q) - troffset;
/* Find initial exonpos */
- abortp = false;
-
querystart = Intlist_head(r); /* tplus: compute from querystart to queryend */
queryend = Intlist_head(s);
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -2057,7 +2060,7 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
if (abortp == true) {
/* Skip */
} else if (/* len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (1)\n"));
abortp = true;
} else {
gaps = Intlist_push(gaps,(int) adj);
@@ -2091,14 +2094,13 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
for (q = tr_lefts, r = querystarts, s = queryends, t = adjustments; q != NULL;
q = Uintlist_next(q), r = Intlist_next(r), s = Intlist_next(s)) {
transcript_diag = Uintlist_head(q) - troffset;
/* Find initial exonpos */
- abortp = false;
-
querystart = Intlist_head(r); /* tplus: compute from querystart to queryend */
queryend = Intlist_head(s);
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -2212,7 +2214,7 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
if (abortp == true) {
/* Skip */
} else if (/*len <= nindels ||*/ exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (2)\n"));
abortp = true;
} else {
gaps = Intlist_push(gaps,(int) adj);
@@ -2283,10 +2285,11 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
/*plusp*/false,/*genestrand*/0,/*query_unk_mismatch_local_p*/false,/*kmer*/index1part_tr);
trim3 = querylength - queryend;
+ /* Verified parameters to Genome_count_mismatches_substring */
total_nmismatches = Genome_count_mismatches_substring(&ref_nmismatches,transcriptomebits,NULL,query_compress_rev,
tr_left,/*pos5*/trim5,/*pos3*/queryend,/*plusp*/false,/*genestrand*/0);
- total_nmatches = querylength - total_nmismatches;
+ total_nmatches = (querylength - trim3 - trim5) - total_nmismatches;
debug2(printf("tminus diagonal %u => trimmed %d..%d, nmismatches: %d total, %d at 5', and %d at 3', total_nmatches: %d\n",
tr_univdiagonal,trim5,querylength - trim3,total_nmismatches,nmismatches5,nmismatches3,
total_nmatches));
@@ -2490,13 +2493,13 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
for (q = tr_lefts, r = querystarts, s = queryends, t = adjustments; q != NULL;
q = Uintlist_next(q), r = Intlist_next(r), s = Intlist_next(s)) {
transcript_diag = Uintlist_head(q) - troffset;
/* Find initial exonpos */
- abortp = false;
queryend = Intlist_head(s); /* tminus: compute from queryend to querystart */
querystart = Intlist_head(r);
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -2610,7 +2613,7 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
if (abortp == true) {
/* Skip */
} else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (3)\n"));
abortp = true;
} else {
gaps = Intlist_push(gaps,(int) adj);
@@ -2644,13 +2647,13 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
for (q = tr_lefts, r = querystarts, s = queryends, t = adjustments; q != NULL;
q = Uintlist_next(q), r = Intlist_next(r), s = Intlist_next(s)) {
transcript_diag = Uintlist_head(q) - troffset;
/* Find initial exonpos */
- abortp = false;
queryend = Intlist_head(s); /* tminus: compute from queryend to querystart */
querystart = Intlist_head(r);
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -2764,7 +2767,7 @@ search_transcriptome_complete (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
if (abortp == true) {
/* Skip */
} else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (4)\n"));
abortp = true;
} else {
gaps = Intlist_push(gaps,(int) adj);
@@ -2873,6 +2876,7 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
#ifdef DEBUG2
int trend;
bool allocp;
+ Univcoord_T left;
#endif
Chrpos_T *exonstarts, genomicpos, chrlength;
Univcoord_T alignstart, alignend, chroffset, chrhigh;
@@ -2954,7 +2958,7 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
pos5,pos3,/*plusp*/true,/*genestrand*/0);
if (nmismatches <= nmismatches_allowed) {
debug2a(printf(" => nmismatches %d\n",nmismatches));
- total_nmatches = querylength - nmismatches;
+ total_nmatches = (pos3 - pos5) - nmismatches;
/* Should replace with a rank/select structure */
abortp = false;
@@ -2976,9 +2980,9 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
/* Find initial exonpos */
- abortp = false;
querystart = 0; /* tplus: compute from querystart to queryend */
queryend = querylength;
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -3102,9 +3106,9 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
/* Find initial exonpos */
- abortp = false;
querystart = 0; /* tplus: compute from querystart to queryend */
queryend = querylength;
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -3258,11 +3262,13 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
pos3 = (tr_univdiagonal <= transcriptomelength) ? querylength : (int) (transcriptomelength - tr_left);
debug2a(printf("ULTRAFAST MINUS DIAGONAL %u\n",tminus_diagpairs[k]));
+ /* Verified parameters to Genome_count_mismatches_substring */
nmismatches = Genome_count_mismatches_substring(&ref_nmismatches,transcriptomebits,NULL,query_compress_rev,tr_left,
pos5,pos3,/*plusp*/false,/*genestrand*/0);
+
if (nmismatches <= nmismatches_allowed) {
debug2a(printf(" => nmismatches %d\n",nmismatches));
- total_nmatches = querylength - nmismatches;
+ total_nmatches = (pos3 - pos5) - nmismatches;
/* Should replace with a rank/select structure */
abortp = false;
@@ -3284,9 +3290,9 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
/* Find initial exonpos */
- abortp = false;
queryend = querylength; /* tminus: compute from queryend to querystart */
querystart = 0;
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -3410,9 +3416,9 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
endpoints = (Uintlist_T) NULL;
gaps = (Intlist_T) NULL;
+ abortp = false;
/* Find initial exonpos */
- abortp = false;
queryend = querylength; /* tminus: compute from queryend to querystart */
querystart = 0;
debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
@@ -3589,475 +3595,480 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
adj = (int) (tr_univdiagonala - tr_univdiagonalb);
debug2a(printf("Setting b first, a second, adj %d\n",adj));
} else {
+ /* Read does not extend to the ends, so this method is not applicable */
adj = 0;
}
- abortp = false;
- trnum = Univ_IIT_get_trnum(&troffset,&trhigh,&trlength,transcript_iit,/*low*/low0,/*high*/high0);
- debug2(printf("trnum is %d: %s\n",trnum,Univ_IIT_label(transcript_iit,trnum,&allocp)));
- if ((chrnum = Transcriptome_chrnum(&transcript_genestrand,transcriptome,trnum)) <= 0) {
- abortp = true;
- } else {
- if (transcript_genestrand > 0) {
- want_lowest_coordinate_p = true;
- } else {
- want_lowest_coordinate_p = false;
- }
- debug2(printf("trnum %d, transcript_genestrand %d\n",trnum,transcript_genestrand));
-
- if (adj > 0 &&
- (indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
- &best_ref_nmismatches_i,&best_ref_nmismatches_j,
- /*left*/tr_left0,/*indels*/-adj,
- /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
- /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
- /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_fwd,
- /*querystart*/0,/*queryend*/querylength,querylength,
- nmismatches_allowed,/*plusp*/true,/*genestrand*/0,
- want_lowest_coordinate_p)) > 0) {
- ninserts = 0;
- nindels = adj;
- total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j;
- } else if (adj < 0 &&
- (indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
- &best_ref_nmismatches_i,&best_ref_nmismatches_j,
- /*left*/tr_left0,/*indels*/-adj,
- /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
- /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
- /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_fwd,
- /*querystart*/0,/*queryend*/querylength,querylength,
- nmismatches_allowed,/*plusp*/true,/*genestrand*/0,
- want_lowest_coordinate_p)) > 0) {
- ninserts = nindels = -adj;
- total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j - ninserts;
- } else {
+ if (adj != 0) {
+ trnum = Univ_IIT_get_trnum(&troffset,&trhigh,&trlength,transcript_iit,/*low*/low0,/*high*/high0);
+ debug2(printf("trnum is %d: %s\n",trnum,Univ_IIT_label(transcript_iit,trnum,&allocp)));
+ if ((chrnum = Transcriptome_chrnum(&transcript_genestrand,transcriptome,trnum)) <= 0) {
adj = 0;
- }
+ } else {
+ if (transcript_genestrand > 0) {
+ want_lowest_coordinate_p = true;
+ } else {
+ want_lowest_coordinate_p = false;
+ }
+ debug2(printf("trnum %d, transcript_genestrand %d\n",trnum,transcript_genestrand));
- if (adj != 0) {
- nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+ if (adj > 0 &&
+ (indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
+ &best_ref_nmismatches_i,&best_ref_nmismatches_j,
+ /*left*/tr_left0,/*indels*/-adj,
+ /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
+ /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
+ /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_fwd,
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ nmismatches_allowed,/*plusp*/true,/*genestrand*/0,
+ want_lowest_coordinate_p)) > 0) {
+ ninserts = 0;
+ nindels = adj;
+ total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j;
+ } else if (adj < 0 &&
+ (indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
+ &best_ref_nmismatches_i,&best_ref_nmismatches_j,
+ /*left*/tr_left0,/*indels*/-adj,
+ /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
+ /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
+ /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_fwd,
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ nmismatches_allowed,/*plusp*/true,/*genestrand*/0,
+ want_lowest_coordinate_p)) > 0) {
+ ninserts = nindels = -adj;
+ total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j - ninserts;
+ } else {
+ adj = 0;
+ }
+ }
+ }
- if (transcript_genestrand > 0) {
- /* Case 1 ultrafast gap: tplus, gplus. transcript_plusp == true && transcript_genestrand > 0 */
- debug2(printf("Case 1 ultrafast gap: tplus, gplus\n"));
- /* sensedir = SENSE_FORWARD; */
- /* plusp = true; */
+ if (adj == 0) {
+ /* Skip finding an indel */
- endpoints = (Uintlist_T) NULL;
- gaps = (Intlist_T) NULL;
-
- transcript_diag0 = tr_left0 - troffset;
- transcript_diag1 = tr_left1 - troffset;
+ } else if (transcript_genestrand > 0) {
+ /* Case 1 ultrafast gap: tplus, gplus. transcript_plusp == true && transcript_genestrand > 0 */
+ debug2(printf("Case 1 ultrafast gap: tplus, gplus\n"));
- abortp = false;
+ /* sensedir = SENSE_FORWARD; */
+ /* plusp = true; */
- /* first part */
- tr_left = tr_left0;
- querystart = 0;
- queryend = indel_pos;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+ nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- overall_trstart = trstart = transcript_diag0 + querystart; /* tplus */
+ endpoints = (Uintlist_T) NULL;
+ gaps = (Intlist_T) NULL;
+ abortp = false;
+
+ transcript_diag0 = tr_left0 - troffset;
+ transcript_diag1 = tr_left1 - troffset;
+
+ /* first part */
+ tr_left = tr_left0;
+ querystart = 0;
+ queryend = indel_pos;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ overall_trstart = trstart = transcript_diag0 + querystart; /* tplus */
#ifdef DEBUG2
- trend = transcript_diag0 + queryend; /* tplus */
- printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
- transcript_diag0,trstart,trend, tr_left,adj);
+ trend = transcript_diag0 + queryend; /* tplus */
+ printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
+ transcript_diag0,trstart,trend, tr_left,adj);
#endif
-
- debug2(printf("Searching for exoni for trstart %d\n",trstart));
- exoni = 0;
+
+ debug2(printf("Searching for exoni for trstart %d\n",trstart));
+ exoni = 0;
#if 0 && defined(HAVE_AVX2)
- _trstart = _mm256_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 8;
- ptr += 8;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm256_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 8;
+ ptr += 8;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#elif 0 && defined(HAVE_SSE2)
- _trstart = _mm_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 4;
- ptr += 4;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 4;
+ ptr += 4;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#else
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
#endif
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(9) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ queryend = querystart + len; /* tplus */
+
+ alignstart = chroffset + genomicpos;
+ alignend = alignstart + len; /* gplus */
+ debug2(left = alignstart - querystart); /* tplus == gminus */
+ debug2(printf("tr ultrafast gap case 1 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignstart - chroffset,querystart));
+
+ /* tplus: push alignstart first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("2 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- debug2(printf("(9) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
- debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- queryend = querystart + len; /* tplus */
-
- alignstart = chroffset + genomicpos;
- alignend = alignstart + len; /* gplus */
- debug2(left = alignstart - querystart); /* tplus == gminus */
- debug2(printf("tr ultrafast gap case 1 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignstart - chroffset,querystart));
-
- /* tplus: push alignstart first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("2 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- querystart += len; /* tplus */
- genomicpos = exonstarts[exoni] - 1; /* gplus */
- debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
+ /* nintrons += 1; */
+ align_residual -= len;
+ querystart += len; /* tplus */
+ genomicpos = exonstarts[exoni] - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
+ }
+ }
+
+ /* gap */
+ debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
+ debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
+ if (abortp == true) {
+ /* Skip */
+ } else if (/*len <= nindels ||*/exon_residual <= nindels) {
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (5)\n"));
+ abortp = true;
+ } else {
+ gaps = Intlist_push(gaps,(int) adj);
+ }
+
+ /* second part */
+ tr_left = tr_left1;
+ querystart = indel_pos + ninserts;
+ queryend = querylength;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ trstart = transcript_diag1 + querystart; /* tplus */
+ overall_trend = transcript_diag1 + queryend; /* tplus */
+ debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
+ transcript_diag1,trstart,overall_trend,tr_left));
+
+ debug2(printf("Searching for exoni for trstart %d\n",trstart));
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(10) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
- /* gap */
- debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
- debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
- if (abortp == true) {
- /* Skip */
- } else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ queryend = querystart + len; /* tplus */
+
+ alignstart = chroffset + genomicpos;
+ alignend = alignstart + len; /* gplus */
+ debug2(left = alignstart - querystart); /* tplus == gminus */
+ debug2(printf("tr ultrafast gap case 1 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignstart - chroffset,querystart));
+
+ /* tplus: push alignstart first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("2 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- gaps = Intlist_push(gaps,(int) adj);
- }
-
- /* second part */
- tr_left = tr_left1;
- querystart = indel_pos + ninserts;
- queryend = querylength;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
-
- trstart = transcript_diag1 + querystart; /* tplus */
- overall_trend = transcript_diag1 + queryend; /* tplus */
- debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
- transcript_diag1,trstart,overall_trend,tr_left));
-
- debug2(printf("Searching for exoni for trstart %d\n",trstart));
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
+ /* nintrons += 1; */
+ align_residual -= len;
+ querystart += len; /* tplus */
+ genomicpos = exonstarts[exoni] - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
+ }
+ }
+
+ if (abortp == true) {
+ debug2(printf("ABORTING PATH\n"));
+ Uintlist_free(&endpoints);
+ Intlist_free(&gaps);
+ } else {
+ endpoints = Uintlist_reverse(endpoints); /* gplus */
+ gaps = Intlist_reverse(gaps); /* gplus */
+ debug2(printf("PLUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
+ ngplus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
+ gplus_paths[ngplus++] = Path_new(total_nmatches,trnum,overall_trstart,overall_trend,
+ /*trim_low*/trim5,/*trim_high*/trim3,
+ chrnum,chroffset,chrhigh,chrlength,endpoints,gaps,concordantp);
+ }
+
+ } else {
+ /* Case 2 ultrafast gap: tplus, gminus. transcript_plusp == true && transcript_genestrand < 0 */
+ debug2(printf("Case 2 ultrafast gap: tplus, gminus\n"));
+ /* sensedir = SENSE_FORWARD; */
+ /* plusp = false; */
+
+ nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ endpoints = (Uintlist_T) NULL;
+ gaps = (Intlist_T) NULL;
+ abortp = false;
+
+ transcript_diag0 = tr_left0 - troffset;
+ transcript_diag1 = tr_left1 - troffset;
+
+ /* first part */
+ tr_left = tr_left0;
+ querystart = 0; /* tplus: compute from querystart to queryend */
+ queryend = indel_pos;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ overall_trstart = trstart = transcript_diag0 + querystart; /* tplus */
+#ifdef DEBUG2
+ trend = transcript_diag0 + queryend; /* tplus */
+ printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
+ transcript_diag0,trstart,trend,tr_left,adj);
+#endif
+
+ exoni = 0;
+#if 0 && defined(HAVE_AVX2)
+ _trstart = _mm256_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 8;
+ ptr += 8;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
+#elif 0 && defined(HAVE_SSE2)
+ _trstart = _mm_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 4;
+ ptr += 4;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
+#else
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
+#endif
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(11) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] - exonpos; /* gminus */
+ debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ queryend = querystart + len; /* tplus */
+
+ alignstart = chroffset + genomicpos;
+ alignend = alignstart - len; /* gminus */
+ debug2(left = alignend - (querylength - queryend)); /* tplus != gminus */
+ debug2(printf("tr ultrafast gap case 2 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignend - chroffset,querylength,queryend));
+
+ /* tplus: push alignstart first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("4 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- debug2(printf("(10) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
- debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- queryend = querystart + len; /* tplus */
-
- alignstart = chroffset + genomicpos;
- alignend = alignstart + len; /* gplus */
- debug2(left = alignstart - querystart); /* tplus == gminus */
- debug2(printf("tr ultrafast gap case 1 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignstart - chroffset,querystart));
-
- /* tplus: push alignstart first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("2 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- querystart += len; /* tplus */
- genomicpos = exonstarts[exoni] - 1; /* gplus */
- debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- if (abortp == true) {
- debug2(printf("ABORTING PATH\n"));
- Uintlist_free(&endpoints);
- Intlist_free(&gaps);
- } else {
- endpoints = Uintlist_reverse(endpoints); /* gplus */
- gaps = Intlist_reverse(gaps); /* gplus */
- debug2(printf("PLUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
- ngplus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
- gplus_paths[ngplus++] = Path_new(total_nmatches,trnum,overall_trstart,overall_trend,
- /*trim_low*/trim5,/*trim_high*/trim3,
- chrnum,chroffset,chrhigh,chrlength,endpoints,gaps,concordantp);
+ /* nintrons += 1; */
+ align_residual -= len;
+ querystart += len; /* tplus */
+ genomicpos = exonstarts[exoni]; /* gminus */
+ debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
-
- } else {
- /* Case 2 ultrafast gap: tplus, gminus. transcript_plusp == true && transcript_genestrand < 0 */
- debug2(printf("Case 2 ultrafast gap: tplus, gminus\n"));
- /* sensedir = SENSE_FORWARD; */
- /* plusp = false; */
-
- endpoints = (Uintlist_T) NULL;
- gaps = (Intlist_T) NULL;
+ }
+ }
- transcript_diag0 = tr_left0 - troffset;
- transcript_diag1 = tr_left1 - troffset;
-
- abortp = false;
-
- /* first part */
- tr_left = tr_left0;
- querystart = 0; /* tplus: compute from querystart to queryend */
- queryend = indel_pos;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
-
- overall_trstart = trstart = transcript_diag0 + querystart; /* tplus */
-#ifdef DEBUG2
- trend = transcript_diag0 + queryend; /* tplus */
- printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
- transcript_diag0,trstart,trend,tr_left,adj);
-#endif
-
- exoni = 0;
-#if 0 && defined(HAVE_AVX2)
- _trstart = _mm256_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 8;
- ptr += 8;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
-#elif 0 && defined(HAVE_SSE2)
- _trstart = _mm_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 4;
- ptr += 4;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
-#else
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
-#endif
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
- abortp = true;
- } else {
- debug2(printf("(11) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] - exonpos; /* gminus */
- debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- queryend = querystart + len; /* tplus */
-
- alignstart = chroffset + genomicpos;
- alignend = alignstart - len; /* gminus */
- debug2(left = alignend - (querylength - queryend)); /* tplus != gminus */
- debug2(printf("tr ultrafast gap case 2 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignend - chroffset,querylength,queryend));
-
- /* tplus: push alignstart first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("4 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- querystart += len; /* tplus */
- genomicpos = exonstarts[exoni]; /* gminus */
- debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- /* gap */
- debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
- debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
- if (abortp == true) {
- /* Skip */
- } else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
- abortp = true;
- } else {
- gaps = Intlist_push(gaps,(int) adj);
- }
-
- /* second part */
- tr_left = tr_left1;
- querystart = indel_pos + ninserts;
- queryend = querylength;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
-
- trstart = transcript_diag1 + querystart; /* tplus */
- overall_trend = transcript_diag1 + queryend; /* tplus */
- debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
- transcript_diag1,trstart,overall_trend,tr_left));
+ /* gap */
+ debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
+ debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
+ if (abortp == true) {
+ /* Skip */
+ } else if (/*len <= nindels ||*/exon_residual <= nindels) {
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (6)\n"));
+ abortp = true;
+ } else {
+ gaps = Intlist_push(gaps,(int) adj);
+ }
+
+ /* second part */
+ tr_left = tr_left1;
+ querystart = indel_pos + ninserts;
+ queryend = querylength;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ trstart = transcript_diag1 + querystart; /* tplus */
+ overall_trend = transcript_diag1 + queryend; /* tplus */
+ debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
+ transcript_diag1,trstart,overall_trend,tr_left));
+
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(12) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] - exonpos; /* gminus */
+ debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ queryend = querystart + len; /* tplus */
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
- abortp = true;
- } else {
- debug2(printf("(12) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] - exonpos; /* gminus */
- debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- queryend = querystart + len; /* tplus */
-
- alignstart = chroffset + genomicpos;
- alignend = alignstart - len; /* gminus */
- debug2(left = alignend - (querylength - queryend)); /* tplus != gminus */
- debug2(printf("tr ultrfast gap case 2 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignend - chroffset,querylength,queryend));
-
- /* tplus: push alignstart first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("4 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- querystart += len; /* tplus */
- genomicpos = exonstarts[exoni]; /* gminus */
- debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
+ alignstart = chroffset + genomicpos;
+ alignend = alignstart - len; /* gminus */
+ debug2(left = alignend - (querylength - queryend)); /* tplus != gminus */
+ debug2(printf("tr ultrfast gap case 2 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignend - chroffset,querylength,queryend));
- if (abortp == true) {
- debug2(printf("ABORTING PATH\n"));
- Uintlist_free(&endpoints);
- Intlist_free(&gaps);
+ /* tplus: push alignstart first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("4 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
+ abortp = true;
} else {
- /* gminus: Do not reverse endpoints or gaps */
- debug2(printf("MINUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
- ngminus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
- gminus_paths[ngminus++] = Path_new(total_nmatches,trnum,overall_trstart,overall_trend,
- /*trim_low*/trim3,/*trim_high*/trim5,
- chrnum,chroffset,chrhigh,chrlength,endpoints,gaps,concordantp);
+ /* nintrons += 1; */
+ align_residual -= len;
+ querystart += len; /* tplus */
+ genomicpos = exonstarts[exoni]; /* gminus */
+ debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
}
}
+
+ if (abortp == true) {
+ debug2(printf("ABORTING PATH\n"));
+ Uintlist_free(&endpoints);
+ Intlist_free(&gaps);
+ } else {
+ /* gminus: Do not reverse endpoints or gaps */
+ debug2(printf("MINUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
+ ngminus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
+ gminus_paths[ngminus++] = Path_new(total_nmatches,trnum,overall_trstart,overall_trend,
+ /*trim_low*/trim3,/*trim_high*/trim5,
+ chrnum,chroffset,chrhigh,chrlength,endpoints,gaps,concordantp);
+ }
}
}
@@ -4108,475 +4119,479 @@ search_transcriptome_ends (Path_T **tplus_gplus_paths, int *n_tplus_gplus,
adj = (int) (tr_univdiagonala - tr_univdiagonalb);
debug2a(printf("Setting b first, a second, adj %d\n",adj));
} else {
+ /* Read does not extend to the ends, so this method is not applicable */
adj = 0;
}
- abortp = false;
- trnum = Univ_IIT_get_trnum(&troffset,&trhigh,&trlength,transcript_iit,/*low*/low0,/*high*/high0);
- debug2(printf("trnum is %d: %s\n",trnum,Univ_IIT_label(transcript_iit,trnum,&allocp)));
- if ((chrnum = Transcriptome_chrnum(&transcript_genestrand,transcriptome,trnum)) <= 0) {
- abortp = true;
- } else {
- if (transcript_genestrand > 0) {
- want_lowest_coordinate_p = true;
- } else {
- want_lowest_coordinate_p = false;
- }
- debug2(printf("trnum %d, transcript_genestrand %d\n",trnum,transcript_genestrand));
-
- if (adj > 0 &&
- (indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
- &best_ref_nmismatches_i,&best_ref_nmismatches_j,
- /*left*/tr_left0,/*indels*/-adj,
- /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
- /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
- /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_rev,
- /*querystart*/0,/*queryend*/querylength,querylength,
- nmismatches_allowed,/*plusp*/false,/*genestrand*/0,
- want_lowest_coordinate_p)) > 0) {
- ninserts = 0;
- nindels = adj;
- total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j;
- } else if (adj < 0 &&
- (indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
- &best_ref_nmismatches_i,&best_ref_nmismatches_j,
- /*left*/tr_left0,/*indels*/-adj,
- /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
- /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
- /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_rev,
- /*querystart*/0,/*queryend*/querylength,querylength,
- nmismatches_allowed,/*plusp*/false,/*genestrand*/0,
- want_lowest_coordinate_p)) > 0) {
- ninserts = nindels = -adj;
- total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j - ninserts;
- } else {
+ if (adj != 0) {
+ trnum = Univ_IIT_get_trnum(&troffset,&trhigh,&trlength,transcript_iit,/*low*/low0,/*high*/high0);
+ debug2(printf("trnum is %d: %s\n",trnum,Univ_IIT_label(transcript_iit,trnum,&allocp)));
+ if ((chrnum = Transcriptome_chrnum(&transcript_genestrand,transcriptome,trnum)) <= 0) {
adj = 0;
+ } else {
+ if (transcript_genestrand > 0) {
+ want_lowest_coordinate_p = true;
+ } else {
+ want_lowest_coordinate_p = false;
+ }
+ debug2(printf("trnum %d, transcript_genestrand %d\n",trnum,transcript_genestrand));
+
+ if (adj > 0 &&
+ (indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
+ &best_ref_nmismatches_i,&best_ref_nmismatches_j,
+ /*left*/tr_left0,/*indels*/-adj,
+ /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
+ /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
+ /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_rev,
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ nmismatches_allowed,/*plusp*/false,/*genestrand*/0,
+ want_lowest_coordinate_p)) > 0) {
+ ninserts = 0;
+ nindels = adj;
+ total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j;
+ } else if (adj < 0 &&
+ (indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
+ &best_ref_nmismatches_i,&best_ref_nmismatches_j,
+ /*left*/tr_left0,/*indels*/-adj,
+ /*mismatch_positions_left*/NULL,/*nmismatches_left*/0,
+ /*mismatch_positions_right*/NULL,/*nmismatches_right*/0,
+ /*ome*/transcriptomebits,/*ome_alt*/NULL,query_compress_rev,
+ /*querystart*/0,/*queryend*/querylength,querylength,
+ nmismatches_allowed,/*plusp*/false,/*genestrand*/0,
+ want_lowest_coordinate_p)) > 0) {
+ ninserts = nindels = -adj;
+ total_nmatches = querylength - best_nmismatches_i - best_nmismatches_j - ninserts;
+ } else {
+ adj = 0;
+ }
}
+ }
- if (adj != 0) {
- nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
- Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-
- if (transcript_genestrand > 0) {
- /* Case 3 ultrafast gap: tminus, gplus. transcript_plusp == false && transcript_genestrand > 0 */
- debug2(printf("Case 3 ultrafast gap: tminus, gplus\n"));
- /* sensedir = SENSE_ANTI; */
- /* plusp = false; */
-
- endpoints = (Uintlist_T) NULL;
- gaps = (Intlist_T) NULL;
+ if (adj == 0) {
+ /* Skip finding an indel */
- transcript_diag0 = tr_left0 - troffset;
- transcript_diag1 = tr_left1 - troffset;
+ } else if (transcript_genestrand > 0) {
+ /* Case 3 ultrafast gap: tminus, gplus. transcript_plusp == false && transcript_genestrand > 0 */
+ debug2(printf("Case 3 ultrafast gap: tminus, gplus\n"));
+ /* sensedir = SENSE_ANTI; */
+ /* plusp = false; */
- abortp = false;
+ nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* first part */
- tr_left = tr_left0;
- queryend = querylength; /* tminus: compute from queryend to querystart */
- querystart = querylength - indel_pos;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
-
- overall_trstart = trstart = transcript_diag0 + (querylength - queryend); /* tminus */
+ endpoints = (Uintlist_T) NULL;
+ gaps = (Intlist_T) NULL;
+ abortp = false;
+
+ transcript_diag0 = tr_left0 - troffset;
+ transcript_diag1 = tr_left1 - troffset;
+
+ /* first part */
+ tr_left = tr_left0;
+ queryend = querylength; /* tminus: compute from queryend to querystart */
+ querystart = querylength - indel_pos;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ overall_trstart = trstart = transcript_diag0 + (querylength - queryend); /* tminus */
#ifdef DEBUG2
- trend = transcript_diag0 + (querylength - querystart); /* tminus */
- printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
- transcript_diag0,trstart,trend,tr_left,adj);
+ trend = transcript_diag0 + (querylength - querystart); /* tminus */
+ printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
+ transcript_diag0,trstart,trend,tr_left,adj);
#endif
- exoni = 0;
+ exoni = 0;
#if 0 && defined(HAVE_AVX2)
- _trstart = _mm256_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 8;
- ptr += 8;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm256_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 8;
+ ptr += 8;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#elif 0 && defined(HAVE_SSE2)
- _trstart = _mm_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 4;
- ptr += 4;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 4;
+ ptr += 4;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#else
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
#endif
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
- abortp = true;
- } else {
- debug2(printf("(13) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
- debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d,align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- querystart = queryend - len; /* tminus */
-
- alignend = chroffset + genomicpos;
- alignstart = alignend + len; /* gplus */
- debug2(left = alignend - (querylength - queryend)); /* tminus != gplus */
- debug2(printf("tr ultrafast gap case 3 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignend - chroffset,querylength,queryend));
-
- /* tminus: plus alignend first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("6 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- queryend -= len; /* tminus */
- genomicpos = exonstarts[exoni] - 1; /* gplus */
- debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- /* gap */
- debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
- debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
- if (abortp == true) {
- /* Skip */
- } else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(13) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d,align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ querystart = queryend - len; /* tminus */
+
+ alignend = chroffset + genomicpos;
+ alignstart = alignend + len; /* gplus */
+ debug2(left = alignend - (querylength - queryend)); /* tminus != gplus */
+ debug2(printf("tr ultrafast gap case 3 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignend - chroffset,querylength,queryend));
+
+ /* tminus: plus alignend first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("6 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- gaps = Intlist_push(gaps,(int) adj);
+ /* nintrons += 1; */
+ align_residual -= len;
+ queryend -= len; /* tminus */
+ genomicpos = exonstarts[exoni] - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
-
- /* second part */
- tr_left = tr_left1;
- queryend = querylength - indel_pos - ninserts; /* tminus: compute from queryend to querystart */
- querystart = 0;
-
- trstart = transcript_diag1 + (querylength - queryend); /* tminus */
- overall_trend = transcript_diag1 + (querylength - querystart); /* tminus */
- debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
- transcript_diag1,trstart,overall_trend,tr_left));
+ }
+ }
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
+ /* gap */
+ debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
+ debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
+ if (abortp == true) {
+ /* Skip */
+ } else if (/*len <= nindels ||*/exon_residual <= nindels) {
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (7)\n"));
+ abortp = true;
+ } else {
+ gaps = Intlist_push(gaps,(int) adj);
+ }
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ /* second part */
+ tr_left = tr_left1;
+ queryend = querylength - indel_pos - ninserts; /* tminus: compute from queryend to querystart */
+ querystart = 0;
+
+ trstart = transcript_diag1 + (querylength - queryend); /* tminus */
+ overall_trend = transcript_diag1 + (querylength - querystart); /* tminus */
+ debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
+ transcript_diag1,trstart,overall_trend,tr_left));
+
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(14) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d,align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ querystart = queryend - len; /* tminus */
+
+ alignend = chroffset + genomicpos;
+ alignstart = alignend + len; /* gplus */
+ debug2(left = alignend - (querylength - queryend)); /* tminus != gplus */
+ debug2(printf("tr ultrafast gap case 3 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignend - chroffset,querylength,queryend));
+
+ /* tminus: plus alignend first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("6 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- debug2(printf("(14) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] + exonpos - 1; /* gplus */
- debug2(printf("genomicpos %u = %u + %u - 1\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d,align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- querystart = queryend - len; /* tminus */
-
- alignend = chroffset + genomicpos;
- alignstart = alignend + len; /* gplus */
- debug2(left = alignend - (querylength - queryend)); /* tminus != gplus */
- debug2(printf("tr ultrafast gap case 3 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - (%d - %d)\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignend - chroffset,querylength,queryend));
-
- /* tminus: plus alignend first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("6 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- queryend -= len; /* tminus */
- genomicpos = exonstarts[exoni] - 1; /* gplus */
- debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- if (abortp == true) {
- debug2(printf("ABORTING PATH\n"));
- Uintlist_free(&endpoints);
- Intlist_free(&gaps);
- } else {
- endpoints = Uintlist_reverse(endpoints); /* gplus */
- gaps = Intlist_reverse(gaps); /* gplus */
- debug2(printf("MINUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
- ngminus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
- gminus_paths[ngminus++] = Path_new(total_nmatches,trnum,overall_trend,overall_trstart,
- /*trim_low*/trim3,/*trim_high*/trim5,chrnum,chroffset,chrhigh,chrlength,
- endpoints,gaps,concordantp);
+ /* nintrons += 1; */
+ align_residual -= len;
+ queryend -= len; /* tminus */
+ genomicpos = exonstarts[exoni] - 1; /* gplus */
+ debug2(printf("genomicpos %u = %u - 1\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
+ }
+ }
+
+ if (abortp == true) {
+ debug2(printf("ABORTING PATH\n"));
+ Uintlist_free(&endpoints);
+ Intlist_free(&gaps);
+ } else {
+ endpoints = Uintlist_reverse(endpoints); /* gplus */
+ gaps = Intlist_reverse(gaps); /* gplus */
+ debug2(printf("MINUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
+ ngminus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
+ gminus_paths[ngminus++] = Path_new(total_nmatches,trnum,overall_trend,overall_trstart,
+ /*trim_low*/trim3,/*trim_high*/trim5,chrnum,chroffset,chrhigh,chrlength,
+ endpoints,gaps,concordantp);
+ }
- } else {
- /* Case 4 ultrafast gap: tminus, gminus. transcript_plusp == false && transcript_genestrand < 0 */
- debug2(printf("Case 4 ultrafast gap: tminus, gminus\n"));
- /* sensedir = SENSE_ANTI; */
- /* plusp = true; */
-
- endpoints = (Uintlist_T) NULL;
- gaps = (Intlist_T) NULL;
-
- transcript_diag0 = tr_left0 - troffset;
- transcript_diag1 = tr_left1 - troffset;
+ } else {
+ /* Case 4 ultrafast gap: tminus, gminus. transcript_plusp == false && transcript_genestrand < 0 */
+ debug2(printf("Case 4 ultrafast gap: tminus, gminus\n"));
+ /* sensedir = SENSE_ANTI; */
+ /* plusp = true; */
- abortp = false;
+ nexons = Transcriptome_exons(&exonbounds,&exonstarts,transcriptome,trnum);
+ Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
- /* first part */
- tr_left = tr_left0;
- queryend = querylength; /* tminus: compute from queryend to querystart */
- querystart = querylength - indel_pos;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+ endpoints = (Uintlist_T) NULL;
+ gaps = (Intlist_T) NULL;
+ abortp = false;
- overall_trstart = trstart = transcript_diag0 + (querylength - queryend); /* tminus */
+ transcript_diag0 = tr_left0 - troffset;
+ transcript_diag1 = tr_left1 - troffset;
+
+ /* first part */
+ tr_left = tr_left0;
+ queryend = querylength; /* tminus: compute from queryend to querystart */
+ querystart = querylength - indel_pos;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ overall_trstart = trstart = transcript_diag0 + (querylength - queryend); /* tminus */
#ifdef DEBUG2
- trend = transcript_diag0 + (querylength - querystart); /* tminus */
- printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
- transcript_diag0,trstart,trend,tr_left,adj);
+ trend = transcript_diag0 + (querylength - querystart); /* tminus */
+ printf("transcript_diag %d, trstart %d, trend %d, tr_left %u, adj %d\n",
+ transcript_diag0,trstart,trend,tr_left,adj);
#endif
-
- exoni = 0;
+
+ exoni = 0;
#if 0 && defined(HAVE_AVX2)
- _trstart = _mm256_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 8;
- ptr += 8;
- _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
- _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm256_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 8;
+ ptr += 8;
+ _exonbounds = _mm256_loadu_si256((__m256i *) ptr);
+ _diff = _mm256_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm256_movemask_ps(_mm256_castsi256_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#elif 0 && defined(HAVE_SSE2)
- _trstart = _mm_set1_epi32(trstart);
- ptr = exonbounds;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- while (all_zero_p(_diff)) {
- exoni += 4;
- ptr += 4;
- _exonbounds = _mm_loadu_si128((__m128i *) ptr);
- _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
- }
- matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
- exoni += count_trailing_zeroes_32(matchbits);
+ _trstart = _mm_set1_epi32(trstart);
+ ptr = exonbounds;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ while (all_zero_p(_diff)) {
+ exoni += 4;
+ ptr += 4;
+ _exonbounds = _mm_loadu_si128((__m128i *) ptr);
+ _diff = _mm_cmpgt_epi32(_exonbounds,_trstart);
+ }
+ matchbits = _mm_movemask_ps(_mm_castsi128_ps(_diff));
+ exoni += count_trailing_zeroes_32(matchbits);
#else
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
- }
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
#endif
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
- abortp = true;
- } else {
- debug2(printf("(15) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] - exonpos; /* gminus */
- debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- querystart = queryend - len; /* tminus */
-
- alignend = chroffset + genomicpos;
- alignstart = alignend - len; /* gminus */
- debug2(left = alignstart - querystart); /* tminus == gminus */
- debug2(printf("tr ultrafast gap case 4 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignstart - chroffset,querystart));
-
- /* tminus: push alignend first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("8 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- queryend -= len; /* tminus */
- genomicpos = exonstarts[exoni]; /* gminus */
- debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- /* gap */
- debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
- debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
- if (abortp == true) {
- /* Skip */
- } else if (/*len <= nindels ||*/exon_residual <= nindels) {
- debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE\n"));
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(15) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] - exonpos; /* gminus */
+ debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ querystart = queryend - len; /* tminus */
+
+ alignend = chroffset + genomicpos;
+ alignstart = alignend - len; /* gminus */
+ debug2(left = alignstart - querystart); /* tminus == gminus */
+ debug2(printf("tr ultrafast gap case 4 1st part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignstart - chroffset,querystart));
+
+ /* tminus: push alignend first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("8 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- gaps = Intlist_push(gaps,(int) adj);
- }
-
- /* second part */
- tr_left = tr_left1;
- queryend = querylength - indel_pos - ninserts; /* tminus: compute from queryend to querystart */
- querystart = 0;
- debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
-
- trstart = transcript_diag1 + (querylength - queryend); /* tminus */
- overall_trend = transcript_diag1 + (querylength - querystart); /* tminus */
- debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
- transcript_diag1,trstart,overall_trend,tr_left));
-
- while (exoni < nexons && exonbounds[exoni] <= trstart) {
- exoni++;
+ /* nintrons += 1; */
+ align_residual -= len;
+ queryend -= len; /* tminus */
+ genomicpos = exonstarts[exoni]; /* gminus */
+ debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
-
- if (exoni >= nexons) {
- debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ }
+ }
+
+ /* gap */
+ debug2(printf("INDEL: last piece %d | nindels %d | next piece %d\n",len,nindels,exon_residual));
+ debug2(printf("exon residual %d vs nindels %d\n",exon_residual,nindels));
+ if (abortp == true) {
+ /* Skip */
+ } else if (/*len <= nindels ||*/exon_residual <= nindels) {
+ debug2(printf("ABORTING BECAUSE INDEL OVERLAPS SPLICE SITE (8)\n"));
+ abortp = true;
+ } else {
+ gaps = Intlist_push(gaps,(int) adj);
+ }
+
+ /* second part */
+ tr_left = tr_left1;
+ queryend = querylength - indel_pos - ninserts; /* tminus: compute from queryend to querystart */
+ querystart = 0;
+ debug2(printf("\nAnalyzing query %d..%d\n",querystart,queryend));
+
+ trstart = transcript_diag1 + (querylength - queryend); /* tminus */
+ overall_trend = transcript_diag1 + (querylength - querystart); /* tminus */
+ debug2(printf("transcript_diag %d, trstart %d, trend %d, tr_left %u\n",
+ transcript_diag1,trstart,overall_trend,tr_left));
+
+ while (exoni < nexons && exonbounds[exoni] <= trstart) {
+ exoni++;
+ }
+
+ if (exoni >= nexons) {
+ debug2(printf("ABORTING BECAUSE OF EXONI %d >= NEXONS %d\n",exoni,nexons));
+ abortp = true;
+ } else {
+ debug2(printf("(16) exoni %d out of nexons %d\n",exoni,nexons));
+ if (exoni == 0) {
+ exonpos = trstart;
+ } else {
+ exonpos = trstart - exonbounds[exoni - 1];
+ }
+ exon_residual = exonbounds[exoni] - trstart;
+ debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
+
+ /* Generate one substring per exon */
+ genomicpos = exonstarts[exoni] - exonpos; /* gminus */
+ debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
+ align_residual = queryend - querystart;
+ while (align_residual > 0) {
+ debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
+ len = (align_residual <= exon_residual) ? align_residual : exon_residual;
+ querystart = queryend - len; /* tminus */
+
+ alignend = chroffset + genomicpos;
+ alignstart = alignend - len; /* gminus */
+ debug2(left = alignstart - querystart); /* tminus == gminus */
+ debug2(printf("tr ultrafast gap case 4 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
+ querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
+ left - chroffset,alignstart - chroffset,querystart));
+
+ /* tminus: push alignend first */
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
+ endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
+
+ if (align_residual <= exon_residual) {
+ exon_residual -= align_residual; /* Need to know this if we have a following deletion */
+ align_residual = 0;
+ } else if (++exoni >= nexons) {
+ /* Aligning past the end of the transcript */
+ debug2(printf("8 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
+ align_residual = 0;
abortp = true;
} else {
- debug2(printf("(16) exoni %d out of nexons %d\n",exoni,nexons));
- if (exoni == 0) {
- exonpos = trstart;
- } else {
- exonpos = trstart - exonbounds[exoni - 1];
- }
- exon_residual = exonbounds[exoni] - trstart;
- debug2(printf("exoni %d => exon_residual %d, exonpos %d\n",exoni,exon_residual,exonpos));
-
- /* Generate one substring per exon */
- genomicpos = exonstarts[exoni] - exonpos; /* gminus */
- debug2(printf("genomicpos %u = %u - %u\n",genomicpos,exonstarts[exoni],exonpos));
- align_residual = queryend - querystart;
- while (align_residual > 0) {
- debug2(printf("abortp %d, align_residual %d, exon_residual %d\n",abortp,align_residual,exon_residual));
- len = (align_residual <= exon_residual) ? align_residual : exon_residual;
- querystart = queryend - len; /* tminus */
-
- alignend = chroffset + genomicpos;
- alignstart = alignend - len; /* gminus */
- debug2(left = alignstart - querystart); /* tminus == gminus */
- debug2(printf("tr ultrafast gap case 4 2nd part query %d..%d, align %u..%u [%u..%u], left %u = %u - %d\n",
- querystart,queryend,alignstart,alignend,alignstart - chroffset,alignend - chroffset,
- left - chroffset,alignstart - chroffset,querystart));
-
- /* tminus: push alignend first */
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignend - chroffset));
- endpoints = Uintlist_push(endpoints,(Chrpos_T) (alignstart - chroffset));
-
- if (align_residual <= exon_residual) {
- exon_residual -= align_residual; /* Need to know this if we have a following deletion */
- align_residual = 0;
- } else if (++exoni >= nexons) {
- /* Aligning past the end of the transcript */
- debug2(printf("8 ABORTING BECAUSE ALIGNING PAST END OF TRANSCRIPT\n"));
- align_residual = 0;
- abortp = true;
- } else {
- /* nintrons += 1; */
- align_residual -= len;
- queryend -= len; /* tminus */
- genomicpos = exonstarts[exoni]; /* gminus */
- debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
- exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
- debug2(printf("INTRON\n"));
- gaps = Intlist_push(gaps,0);
- }
- }
- }
-
- if (abortp == true) {
- debug2(printf("ABORTING PATH\n"));
- Uintlist_free(&endpoints);
- Intlist_free(&gaps);
- } else {
- /* gminus: Do not reverse endpoints or gaps */
- debug2(printf("PLUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
- ngplus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
- gplus_paths[ngplus++] = Path_new(total_nmatches,trnum,overall_trend,overall_trstart,
- /*trim_low*/trim3,/*trim_high*/trim5,chrnum,chroffset,chrhigh,chrlength,
- endpoints,gaps,concordantp);
+ /* nintrons += 1; */
+ align_residual -= len;
+ queryend -= len; /* tminus */
+ genomicpos = exonstarts[exoni]; /* gminus */
+ debug2(printf("genomicpos %u = %u\n",genomicpos,exonstarts[exoni]));
+ exon_residual = exonbounds[exoni] - exonbounds[exoni - 1];
+ debug2(printf("INTRON\n"));
+ gaps = Intlist_push(gaps,0);
}
}
}
+
+ if (abortp == true) {
+ debug2(printf("ABORTING PATH\n"));
+ Uintlist_free(&endpoints);
+ Intlist_free(&gaps);
+ } else {
+ /* gminus: Do not reverse endpoints or gaps */
+ debug2(printf("PLUS PATH %d. ENDPOINTS: %s. GAPS: %s\n\n",
+ ngplus,Uintlist_to_string(endpoints),Intlist_to_string(gaps)));
+ gplus_paths[ngplus++] = Path_new(total_nmatches,trnum,overall_trend,overall_trstart,
+ /*trim_low*/trim3,/*trim_high*/trim5,chrnum,chroffset,chrhigh,chrlength,
+ endpoints,gaps,concordantp);
+ }
}
}
-
+
FREE(tminus_diagpairs);
*tminus_gplus_paths = gplus_paths;
*n_tminus_gplus = ngplus;
@@ -4628,6 +4643,7 @@ Kmer_search_transcriptome_single (int *found_score_overall, int *found_score_wit
query_compress_fwd,query_compress_rev,
transcript_iit,transcriptome,transcriptomebits,
nmismatches_allowed,/*concordantp*/true) > 0) {
+ /* Found hits */
} else {
/* Will reassign paths from search_transcriptome_complete */
FREE(tplus_gplus_paths);
@@ -4732,6 +4748,7 @@ Kmer_search_transcriptome_paired (int *found_score_5, int *found_score_3,
query5_compress_fwd,query5_compress_rev,
transcript_iit,transcriptome,transcriptomebits,
nmismatches_allowed_5,/*concordantp*/false) > 0) {
+ /* Found hits */
} else {
/* Will reassign paths from search_transcriptome_complete */
FREE(tplus_gplus_paths_5);
@@ -4774,6 +4791,7 @@ Kmer_search_transcriptome_paired (int *found_score_5, int *found_score_3,
query3_compress_fwd,query3_compress_rev,
transcript_iit,transcriptome,transcriptomebits,
nmismatches_allowed_3,/*concordantp*/false) > 0) {
+ /* Found hits */
} else {
/* Will reassign paths from search_transcriptome_complete */
FREE(tplus_gplus_paths_3);
@@ -4857,6 +4875,7 @@ Kmer_search_transcriptome_paired (int *found_score_5, int *found_score_3,
/* Simplified from search_transcriptome_ends */
+/* Appears to be unused currently */
List_T
Kmer_remap_transcriptome (char *remap_sequence, int remap_seqlength,
Chrnum_T chrnum, Chrpos_T lowbound, Chrpos_T highbound,
@@ -5232,6 +5251,7 @@ binary_search_univcoord (int lowi, int highi, Univcoord_T *positions, Univcoord_
}
+/* Genomic procedure */
/* univdiagonal0/left0 is for the beginning of the query; univdiagonal1/left1 is for the end */
/* Indel_resolve_middle_insertion is expecting left for the beginning of the query, or left0 */
static void
@@ -5761,6 +5781,7 @@ combine_ends_plus (int *found_score_overall, int *found_score_within_trims,
}
+/* Genomic procedure */
/* left0 is for the beginning of the query; left1 is for the end */
/* Indel_resolve_middle_insertion is expecting left for the beginning of the query, or left0 */
static void
=====================================
src/pair.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 223009 2020-07-10 15:13:26Z twu $";
+static char rcsid[] = "$Id: pair.c 223198 2020-10-15 22:24:59Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -145,6 +145,7 @@ static bool novelsplicingp;
static IIT_T splicesites_iit;
static int trim_indel_score;
+static bool print_margin_p;
static bool gff3_separators_p;
static bool sam_insert_0M_p = false;
static bool force_xs_direction_p;
@@ -159,7 +160,8 @@ static Cigar_action_T cigar_action;
void
Pair_setup (bool novelsplicingp_in, IIT_T splicesites_iit_in, int trim_indel_score_in,
- bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
+ bool print_margin_p_in, bool gff3_separators_p_in,
+ bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in,
bool gff3_phase_swap_p_in, CDStype_T cdstype_in,
bool cigar_extended_p_in, Cigar_action_T cigar_action_in) {
@@ -168,6 +170,7 @@ Pair_setup (bool novelsplicingp_in, IIT_T splicesites_iit_in, int trim_indel_sco
splicesites_iit = splicesites_iit_in;
trim_indel_score = trim_indel_score_in;
+ print_margin_p = print_margin_p_in;
gff3_separators_p = gff3_separators_p_in;
sam_insert_0M_p = sam_insert_0M_p_in;
force_xs_direction_p = force_xs_direction_p_in;
@@ -572,19 +575,37 @@ Pair_protect_list (List_T pairs) {
static char *RULER = " . : . : . : . : . :";
static void
print_top_ruler (Filestring_T fp, int n, int npairs, int margin, int wraplength) {
- FPRINTF(fp,"%*d ",margin,n);
- if (n + wraplength < npairs) {
- FPRINTF(fp,"%s\n",RULER);
+ int length, i;
+
+ if (npairs - n > wraplength) {
+ /* Complete line */
+ length = wraplength;
} else {
- FPRINTF(fp,"%.*s\n",npairs-n,RULER);
+ /* Final line */
+ length = npairs - n;
+ }
+
+ if (print_margin_p == true) {
+ FPRINTF(fp,"%*d ",margin,n);
}
+ i = 0;
+ while (i + (int) strlen(RULER) < length) {
+ FPRINTF(fp,"%s",RULER);
+ i += (int) strlen(RULER);
+ }
+ if (i < length) {
+ FPRINTF(fp,"%.*s\n",length-i,RULER);
+ }
+
return;
}
/*
static void
print_bottom_ruler (int n, int npairs, int margin, int wraplength) {
- printf("%*s ",margin,"");
+ if (print_margin_p == true) {
+ printf("%*s ",margin,"");
+ }
if (n + wraplength < npairs) {
printf("%s\n",RULER);
} else {
@@ -601,7 +622,9 @@ print_cdna_sequence (Filestring_T fp, struct T *ptr, int n, int npairs, int marg
int i;
this = ptr;
- FPRINTF(fp,"%*u ",margin,this->querypos + ONEBASEDP);
+ if (print_margin_p == true) {
+ FPRINTF(fp,"%*u ",margin,this->querypos + ONEBASEDP);
+ }
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
PUTC(this->cdna,fp);
@@ -640,7 +663,9 @@ print_peptide (Filestring_T fp, struct T *ptr, int n, int npairs, int margin,
struct T *this;
int aapos, i;
- if ((aapos = find_aapos_in_line(ptr,n,npairs,wraplength,genomep)) < 0) {
+ if (print_margin_p == false) {
+ /* Skip */
+ } else if ((aapos = find_aapos_in_line(ptr,n,npairs,wraplength,genomep)) < 0) {
FPRINTF(fp,"%*s ",margin,"");
} else {
/* 4 is length of "aa.c" and "aa.g" */
@@ -673,7 +698,9 @@ print_alignment (Filestring_T fp, struct T *ptr, int n, int npairs,
struct T *this;
int i;
- FPRINTF(fp,"%*s ",margin,"");
+ if (print_margin_p == true) {
+ FPRINTF(fp,"%*s ",margin,"");
+ }
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
@@ -729,7 +756,9 @@ print_genomic_sequence (Filestring_T fp, struct T *ptr, int n, int npairs,
} else {
sprintf(Buffer,"%s:%llu",chrstring,(unsigned long long) (this->genomepos + ONEBASEDP));
}
- FPRINTF(fp,"%*s ",margin,Buffer);
+ if (print_margin_p == true) {
+ FPRINTF(fp,"%*s ",margin,Buffer);
+ }
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
if (this->comp == EXTRAEXON_COMP) {
@@ -756,7 +785,9 @@ print_genomicalt_sequence (Filestring_T fp, struct T *ptr, int n, int npairs,
} else {
sprintf(Buffer,"%s:%llu",chrstring, (unsigned long long) (this->genomepos + ONEBASEDP));
}
- FPRINTF(fp,"%*s ",margin,Buffer);
+ if (print_margin_p == true) {
+ FPRINTF(fp,"%*s ",margin,Buffer);
+ }
for (i = 0; n < npairs && i < wraplength; n++, i++) {
this = ptr++;
if (this->comp == EXTRAEXON_COMP) {
=====================================
src/pair.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: pair.h 222194 2020-03-23 13:44:44Z twu $ */
+/* $Id: pair.h 223198 2020-10-15 22:24:59Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -29,7 +29,8 @@ typedef enum {CIGAR_ACTION_IGNORE, CIGAR_ACTION_WARNING, CIGAR_ACTION_NOPRINT, C
extern void
Pair_setup (bool novelsplicingp_in, IIT_T splicesites_iit_in, int trim_indel_score_in,
- bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
+ bool print_margin_p_in, bool gff3_separators_p_in,
+ bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in,
bool gff3_phase_swap_p_in, CDStype_T cdstype_in,
bool cigar_extended_p_in, Cigar_action_T cigar_action_in);
=====================================
src/stage3hr.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 223081 2020-09-13 14:21:03Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 223176 2020-10-15 01:51:23Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -173,6 +173,7 @@ static char rcsid[] = "$Id: stage3hr.c 223081 2020-09-13 14:21:03Z twu $";
#define MAPQ_MAXIMUM_SCORE 40
+/* #define REMAP_TRANSCRIPTOME */
static bool omit_concordant_uniq_p = false;
static bool omit_concordant_mult_p = false;
@@ -197,7 +198,6 @@ static int circular_typeint;
static Genome_T transcriptomebits;
static Transcriptome_T transcriptome;
static Univ_IIT_T transcript_iit;
-static bool remap_transcriptome_p = false;
static IIT_T tally_iit;
static int *tally_divint_crosstable;
@@ -3782,7 +3782,7 @@ Stage3end_new_precomputed (int *found_score_overall, int *found_score_within_tri
#ifdef DEBUG0
- printf("NEW SUBSTRINGS (query order)\n");
+ printf("NEW SUBSTRINGS (query order) (1)\n");
for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
substring = List_head(p);
if (Substring_ambiguous_p(substring) == true) {
@@ -5468,7 +5468,7 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
}
#ifdef DEBUG0
- printf("NEW SUBSTRINGS (query order)\n");
+ printf("NEW SUBSTRINGS (query order) (2)\n");
for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
substring = List_head(p);
if (Substring_ambiguous_p(substring) == true) {
@@ -5570,7 +5570,8 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
} else {
}
- if (transcriptomep == true && remap_transcriptome_p == true && substring_for_concordance != NULL) {
+#ifdef REMAP_TRANSCRIPTOME
+ if (transcriptomep == true && substring_for_concordance != NULL) {
/* Remap substring_for_concordance */
remap_sequence = Substring_genomic_sequence(&remap_seqlength,substring_for_concordance,genomecomp);
if ((transcripts = Kmer_remap_transcriptome(remap_sequence,remap_seqlength,new->effective_chrnum,
@@ -5591,6 +5592,7 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
}
FREE(remap_sequence);
}
+#endif
debug0(printf("*****Method %s: Returning new splice %p at genomic %u..%u, donor %p (%u => %u), acceptor %p (%u => %u), score %d\n\n",
Method_string(method),new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,donor,
@@ -5771,7 +5773,7 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
new->plusp = Substring_plusp(substring_for_concordance);
#ifdef DEBUG0
- printf("NEW SUBSTRINGS (query order)\n");
+ printf("NEW SUBSTRINGS (query order) (3)\n");
for (p = new->substrings_1toN; p != NULL; p = List_next(p)) {
substring = List_head(p);
if (Substring_ambiguous_p(substring) == true) {
@@ -5862,7 +5864,8 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
} else {
}
- if (transcriptomep == true && remap_transcriptome_p == true && substring_for_concordance != NULL) {
+#ifdef REMAP_TRANSCRIPTOME
+ if (transcriptomep == true && substring_for_concordance != NULL) {
/* Remap substring_for_concordance */
remap_sequence = Substring_genomic_sequence(&remap_seqlength,substring_for_concordance,genomecomp);
if ((transcripts = Kmer_remap_transcriptome(remap_sequence,remap_seqlength,new->effective_chrnum,
@@ -5883,6 +5886,7 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
}
FREE(remap_sequence);
}
+#endif
debug0(printf("*****Method distant: Returning new distant %p at genomic %u..%u, startfrag %p (%u => ), endfrag %p (%u => ), score %d\n\n",
new,new->genomicstart - new->chroffset,new->genomicend - new->chroffset,
@@ -11802,10 +11806,8 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
unalias_circular(hit3);
}
- if (remap_transcriptome_p == false) {
- /* Do not remap */
-
- } else if (hit5->transcripts != NULL && hit3->transcripts != NULL) {
+#ifdef REMAP_TRANSCRIPTOME
+ if (hit5->transcripts != NULL && hit3->transcripts != NULL) {
/* No need to remap */
} else if (hit5->transcripts != NULL && hit3->transcripts == NULL) {
@@ -11836,6 +11838,7 @@ Stage3pair_new (T hit5_orig, T hit3_orig, int genestrand, int sensedir, Pairtype
}
FREE(remap_sequence);
}
+#endif
#if 0
/* Need this in addition to Stage3end_filter_concordant_tr, to
=====================================
src/substring.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 223162 2020-10-12 22:47:42Z twu $";
+static char rcsid[] = "$Id: substring.c 223173 2020-10-15 01:50:21Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -5066,7 +5066,7 @@ Substring_count_mismatches_region (int *ref_nmismatches, T this, int trim_querys
assert(left_bound <= right_bound);
#endif
- if (left_bound > right_bound) {
+ if (left_bound >= right_bound) {
return 0;
} else if (this->plusp) {
return Genome_count_mismatches_substring(&(*ref_nmismatches),genomebits,genomebits_alt,this->query_compress,this->left,
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/e36c478dd913ea44b8ccff832e1ed3c2409790ca...c42803c614bca47c818ce65af0a53b368a57e334
--
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/e36c478dd913ea44b8ccff832e1ed3c2409790ca...c42803c614bca47c818ce65af0a53b368a57e334
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201021/efe3c9e1/attachment-0001.html>
More information about the debian-med-commit
mailing list