[med-svn] [daligner] 01/09: Imported Upstream version 1.0+20151214
Afif Elghraoui
afif-guest at moszumanska.debian.org
Sat Jan 9 07:25:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif-guest pushed a commit to branch master
in repository daligner.
commit c71ad71d8dc658d51501273f9efb9ef60a761be5
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Fri Jan 8 21:11:29 2016 -0800
Imported Upstream version 1.0+20151214
---
DB.c | 198 +++--
DB.h | 50 +-
HPCdaligner.c | 37 -
HPCmapper.c | 60 +-
LAcat.c | 41 +-
LAcheck.c | 37 -
LAdump.c | 478 +++++++++++
LAindex.c | 201 +++++
LAmerge.c | 108 ++-
LAshow.c | 66 +-
LAsort.c | 84 +-
LAsplit.c | 37 -
LAupgrade.Dec.31.2014.c | 44 +-
LICENSE | 34 +
Makefile | 10 +-
QV.c | 37 -
QV.h | 37 -
README | 110 ++-
align.c | 2075 +++++++++++++++++++++++++++++++++++++++--------
align.h | 72 +-
daligner.c | 268 +++---
filter.c | 703 ++++++++++++----
filter.h | 42 +-
23 files changed, 3515 insertions(+), 1314 deletions(-)
diff --git a/DB.c b/DB.c
index 27b202e..c3f8129 100644
--- a/DB.c
+++ b/DB.c
@@ -1,47 +1,9 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Compressed data base module. Auxiliary routines to open and manipulate a data base for
* which the sequence and read information are separated into two separate files, and the
* sequence is compressed into 2-bits for each base. Support for tracks of additional
- * information, and trimming according to the current partition. Eventually will also
- * support compressed quality information.
+ * information, and trimming according to the current partition.
*
* Author : Gene Myers
* Date : July 2013
@@ -509,6 +471,8 @@ int Open_DB(char* path, HITS_DB *db)
nreads = ulast-ufirst;
if (part <= 0)
{ db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+ if (db->reads == NULL)
+ goto error2;
db->reads += 1;
if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
@@ -521,7 +485,9 @@ int Open_DB(char* path, HITS_DB *db)
int i, r, maxlen;
int64 totlen;
- reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+ reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+ if (reads == NULL)
+ goto error2;
reads += 1;
fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
@@ -679,6 +645,111 @@ void Trim_DB(HITS_DB *db)
}
}
+// The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded.
+// Trim the track by rereading the untrimmed DB index from the file system.
+
+static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
+{ int i, j, r;
+ int allflag, cutoff;
+ int ureads;
+ char *root;
+ HITS_READ read;
+ FILE *indx;
+
+ if (!db->trimmed) return (0);
+
+ if (db->cutoff <= 0 && db->all) return (0);
+
+ cutoff = db->cutoff;
+ if (db->all)
+ allflag = 0;
+ else
+ allflag = DB_BEST;
+
+ root = rindex(db->path,'/') + 2;
+ indx = Fopen(Catenate(db->path,"","",".idx"),"r");
+ fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*db->ufirst,SEEK_SET);
+ if (ispart)
+ ureads = ((int *) (db->reads))[-1];
+ else
+ ureads = db->ureads;
+
+ if (strcmp(track->name,". at qvs") == 0)
+ { EPRINTF(EPLACE,"%s: Cannot load QV track after trimming\n",Prog_Name);
+ fclose(indx);
+ EXIT(1);
+ }
+
+ { int *anno4, size;
+ int64 *anno8;
+ char *anno, *data;
+
+ size = track->size;
+ data = (char *) track->data;
+ if (data == NULL)
+ { anno = (char *) track->anno;
+ j = r = 0;
+ for (i = r = 0; i < ureads; i++, r += size)
+ { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+ fclose(indx);
+ EXIT(1);
+ }
+ if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+ { memmove(anno+j,anno+r,size);
+ j += size;
+ }
+ r += size;
+ }
+ memmove(anno+j,anno+r,size);
+ }
+ else if (size == 4)
+ { int ai;
+
+ anno4 = (int *) (track->anno);
+ j = anno4[0] = 0;
+ for (i = 0; i < ureads; i++)
+ { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+ fclose(indx);
+ EXIT(1);
+ }
+ if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+ { ai = anno4[i];
+ anno4[j+1] = anno4[j] + (anno4[i+1]-ai);
+ memmove(data+anno4[j],data+ai,anno4[i+1]-ai);
+ j += 1;
+ }
+ }
+ track->data = Realloc(track->data,anno4[j],NULL);
+ }
+ else // size == 8
+ { int64 ai;
+
+ anno8 = (int64 *) (track->anno);
+ j = anno8[0] = 0;
+ for (i = 0; i < ureads; i++)
+ { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
+ fclose(indx);
+ EXIT(1);
+ }
+ if ((read.flags & DB_BEST) >= allflag && read.rlen >= cutoff)
+ { ai = anno8[i];
+ anno8[j+1] = anno8[j] + (anno8[i+1]-ai);
+ memmove(data+anno8[j],data+ai,anno8[i+1]-ai);
+ j += 1;
+ }
+ }
+ track->data = Realloc(track->data,anno8[j],NULL);
+ }
+ track->anno = Realloc(track->anno,track->size*(j+1),NULL);
+ }
+
+ fclose(indx);
+ return (0);
+}
+
// Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
@@ -690,7 +761,8 @@ void Close_DB(HITS_DB *db)
free(((char *) (db->bases)) - 1);
else if (db->bases != NULL)
fclose((FILE *) db->bases);
- free(db->reads-1);
+ if (db->reads != NULL)
+ free(db->reads-1);
free(db->path);
Close_QVs(db);
@@ -969,9 +1041,9 @@ void Close_QVs(HITS_DB *db)
// -1: Track is not the right size of DB either trimmed or untrimmed
// -2: Could not find the track
-int Check_Track(HITS_DB *db, char *track)
+int Check_Track(HITS_DB *db, char *track, int *kind)
{ FILE *afile;
- int tracklen, ispart;
+ int tracklen, size, ispart;
int ureads, treads;
afile = NULL;
@@ -988,7 +1060,16 @@ int Check_Track(HITS_DB *db, char *track)
if (fread(&tracklen,sizeof(int),1,afile) != 1)
return (-1);
+ if (fread(&size,sizeof(int),1,afile) != 1)
+ return (-1);
+ if (size == 0)
+ *kind = MASK_TRACK;
+ else if (size > 0)
+ *kind = CUSTOM_TRACK;
+ else
+ return (-1);
+
fclose(afile);
if (ispart)
@@ -1000,10 +1081,10 @@ int Check_Track(HITS_DB *db, char *track)
treads = db->treads;
}
- if (tracklen == treads)
- return (1);
- else if (tracklen == ureads)
+ if (tracklen == ureads)
return (0);
+ else if (tracklen == treads)
+ return (1);
else
return (-1);
}
@@ -1067,10 +1148,13 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
{ EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
goto error;
}
- if (size <= 0)
+
+ if (size < 0)
{ EPRINTF(EPLACE,"%s: Track '%s' annotation file is junk\n",Prog_Name,track);
goto error;
}
+ if (size == 0)
+ size = 8;
if (ispart)
{ ureads = ((int *) (db->reads))[-1];
@@ -1082,16 +1166,23 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
}
if (db->trimmed)
- { if (tracklen != treads)
+ { if (tracklen != treads && tracklen != ureads)
{ EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
goto error;
}
if ( ! ispart && db->part > 0)
- fseeko(afile,size*db->tfirst,SEEK_CUR);
+ { if (tracklen == treads)
+ fseeko(afile,size*db->tfirst,SEEK_CUR);
+ else
+ fseeko(afile,size*db->ufirst,SEEK_CUR);
+ }
}
else
{ if (tracklen != ureads)
- { EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
+ { if (tracklen == treads)
+ EPRINTF(EPLACE,"%s: Track '%s' is for a trimmed DB !\n",Prog_Name,track);
+ else
+ EPRINTF(EPLACE,"%s: Track '%s' not same size as database !\n",Prog_Name,track);
goto error;
}
if ( ! ispart && db->part > 0)
@@ -1166,6 +1257,11 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
record->anno = anno;
record->size = size;
+ if (db->trimmed && tracklen != treads)
+ { if (Late_Track_Trim(db,record,ispart))
+ goto error;
+ }
+
if (db->tracks != NULL && strcmp(db->tracks->name,". at qvs") == 0)
{ record->next = db->tracks->next;
db->tracks->next = record;
@@ -1178,7 +1274,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
return (record);
error:
- if (record == NULL)
+ if (record != NULL)
free(record);
if (data != NULL)
free(data);
diff --git a/DB.h b/DB.h
index 567a91a..48b319e 100644
--- a/DB.h
+++ b/DB.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Compressed data base module. Auxiliary routines to open and manipulate a data base for
@@ -358,8 +321,15 @@ void Close_QVs(HITS_DB *db);
// 0: Track is for untrimmed DB
// -1: Track is not the right size of DB either trimmed or untrimmed
// -2: Could not find the track
+ // In addition, if opened (0 or 1 returned), then kind points at an integer indicating
+ // the type of track as follows:
+ // CUSTOM 0 => a custom track
+ // MASK 1 => a mask track
+
+#define CUSTOM_TRACK 0
+#define MASK_TRACK 1
-int Check_Track(HITS_DB *db, char *track);
+int Check_Track(HITS_DB *db, char *track, int *kind);
// If track is not already in the db's track list, then allocate all the storage for it,
// read it in from the appropriate file, add it to the track list, and return a pointer
@@ -427,11 +397,11 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
int Read_All_Sequences(HITS_DB *db, int ascii);
- // For the DB or DAM "path" = "prefix/root[.db|.dam]", find all the files for that DB, i.e. all
+ // For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
// pointed at by path, and the suffix of the path by extension. The . proceeds the root
// name if the defined constant HIDE_FILES is set. Always the first call is with the
- // path "prefix/root.db" and extension "db". There will always be calls for
+ // path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
// so this routine gives one a way to know all the tracks associated with a given DB.
// -1 is returned if the path could not be found, and 1 is returned if an error (reported
diff --git a/HPCdaligner.c b/HPCdaligner.c
index 67ddaff..cae006d 100644
--- a/HPCdaligner.c
+++ b/HPCdaligner.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*********************************************************************************************\
*
* Produce a script to compute overlaps for all block pairs of a DB, and then sort and merge
diff --git a/HPCmapper.c b/HPCmapper.c
index 3649b5f..f46c649 100644
--- a/HPCmapper.c
+++ b/HPCmapper.c
@@ -1,44 +1,7 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*********************************************************************************************\
*
- * Produce a script to compute overlaps for all block pairs of a DB, and then sort and merge
- * them into as many .las files as their are blocks.
+ * Produce a script to compute overlaps for all block pairs between two DBs, and then sort
+ * and merge * them into as many .las files as their are blocks of the 1st DB.
*
* Author: Gene Myers
* Date : December 31, 2014
@@ -63,7 +26,7 @@ static char *Usage[] =
{ "[-vb] [-k<int(20)>] [-w<int(6)>] [-h<int(50)>] [-t<int>] [-M<int>]",
" [-e<double(.85)] [-l<int(1000)>] [-s<int(100)] [-H<int>]",
" [-m<track>]+ [-dal<int(4)>] [-deg<int(25)>]",
- " <ref:db|dam> <reads:db|dam> [<first:int>[-<last:int>]]"
+ " <ref:db|dam> <reads:db|dam> [<first:int>[-<last:int>]]"
};
static int power(int base, int exp)
@@ -90,7 +53,7 @@ int main(int argc, char *argv[])
char *pwd2, *root2;
int MUNIT, DUNIT;
- int VON, BON;
+ int VON, BON, CON;
int WINT, TINT, HGAP, HINT, KINT, SINT, LINT, MINT;
double EREL;
int MMAX, MTOP;
@@ -125,7 +88,7 @@ int main(int argc, char *argv[])
if (argv[i][0] == '-')
switch (argv[i][1])
{ default:
- ARG_FLAGS("vbd");
+ ARG_FLAGS("vbc");
break;
case 'k':
ARG_POSITIVE(KINT,"K-mer length")
@@ -209,6 +172,7 @@ int main(int argc, char *argv[])
VON = flags['v'];
BON = flags['b'];
+ CON = flags['c'];
if (argc < 3 || argc > 4)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage[0]);
@@ -455,6 +419,8 @@ int main(int argc, char *argv[])
printf("LAsort");
if (VON)
printf(" -v");
+ if (CON)
+ printf(" -c");
for (k = 0; k < NTHREADS; k++)
{ if (useblock1)
printf(" %s.%d",root1,i);
@@ -476,6 +442,8 @@ int main(int argc, char *argv[])
printf(" && LAmerge");
if (VON)
printf(" -v");
+ if (CON)
+ printf(" -c");
if (nblocks1 == 1)
if (useblock2)
printf(" %s.%s.%d",root1,root2,j);
@@ -547,7 +515,7 @@ int main(int argc, char *argv[])
if (nblocks1 > 1)
{ int pow, mway;
- // Determine most balance mway for merging in ceil(log_mrg lblock) levels
+ // Determine most balance mway for merging in ceil(log_mrg nblock1) levels
pow = 1;
for (level = 0; pow < nblocks1; level++)
@@ -586,6 +554,8 @@ int main(int argc, char *argv[])
printf("LAmerge");
if (VON)
printf(" -v");
+ if (CON)
+ printf(" -c");
if (i == level)
if (useblock2)
printf(" %s.%s.%d",root1,root2,j);
@@ -594,10 +564,10 @@ int main(int argc, char *argv[])
else
printf(" L%d.%d.%d",i+1,j,p);
for (k = low; k <= hgh; k++)
- printf(" L%d.%d.%d",i,j,k);
+ printf(" L%d.%d.%d",i,k,j);
printf(" && rm");
for (k = low; k <= hgh; k++)
- printf(" L%d.%d.%d.las",i,j,k);
+ printf(" L%d.%d.%d.las",i,k,j);
#ifdef LSF
printf("\"");
#endif
diff --git a/LAcat.c b/LAcat.c
index f7f79fc..b8b8b0e 100644
--- a/LAcat.c
+++ b/LAcat.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Merge together in index order, overlap files <XXX>.1.las, <XXX>.2.las, ... into a
@@ -92,7 +55,7 @@ int main(int argc, char *argv[])
novl = 0;
tspace = 0;
mspace = 0;
- tbytes = sizeof(uint8);
+ tbytes = 0;
for (i = 0; 1; i++)
{ char *name = Catenate(pwd,"/",root,Numbered_Suffix(".",i+1,".las"));
if ((input = fopen(name,"r")) == NULL) break;
@@ -117,7 +80,7 @@ int main(int argc, char *argv[])
fclose(input);
}
fwrite(&novl,sizeof(int64),1,stdout);
- fwrite(&tspace,sizeof(int32),1,stdout);
+ fwrite(&tspace,sizeof(int),1,stdout);
}
{ int i, j;
diff --git a/LAcheck.c b/LAcheck.c
index a0f2fba..326d140 100644
--- a/LAcheck.c
+++ b/LAcheck.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Check the structural integrity of .las files
diff --git a/LAdump.c b/LAdump.c
new file mode 100644
index 0000000..6d48694
--- /dev/null
+++ b/LAdump.c
@@ -0,0 +1,478 @@
+/*******************************************************************************************
+ *
+ * Utility for displaying the information in the overlaps of a .las file in a very
+ * simple to parse format.
+ *
+ * Author: Gene Myers
+ * Creation: July 2013
+ * Last Mod: Jan 2015
+ *
+ *******************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "DB.h"
+#include "align.h"
+
+static char *Usage =
+ "[-cdt] [-o] <src1:db|dam> [ <src2:db|dam> ] <align:las> [ <reads:FILE> | <reads:range> ... ]";
+
+#define LAST_READ_SYMBOL '$'
+
+static int ORDER(const void *l, const void *r)
+{ int x = *((int *) l);
+ int y = *((int *) r);
+ return (x-y);
+}
+
+int main(int argc, char *argv[])
+{ HITS_DB _db1, *db1 = &_db1;
+ HITS_DB _db2, *db2 = &_db2;
+ Overlap _ovl, *ovl = &_ovl;
+
+ FILE *input;
+ int64 novl;
+ int tspace, tbytes, small;
+ int reps, *pts;
+ int input_pts;
+
+ int OVERLAP;
+ int DOCOORDS, DODIFFS, DOTRACE;
+ int ISTWO;
+
+ // Process options
+
+ { int i, j, k;
+ int flags[128];
+
+ ARG_INIT("LAdump")
+
+ j = 1;
+ for (i = 1; i < argc; i++)
+ if (argv[i][0] == '-')
+ switch (argv[i][1])
+ { default:
+ ARG_FLAGS("ocdtUF")
+ break;
+ }
+ else
+ argv[j++] = argv[i];
+ argc = j;
+
+ OVERLAP = flags['o'];
+ DOCOORDS = flags['c'];
+ DODIFFS = flags['d'];
+ DOTRACE = flags['t'];
+
+ if (DOTRACE)
+ DOCOORDS = 1;
+
+ if (argc <= 2)
+ { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+ exit (1);
+ }
+ }
+
+ // Open trimmed DB or DB pair
+
+ { int status;
+ char *pwd, *root;
+ FILE *input;
+
+ ISTWO = 0;
+ status = Open_DB(argv[1],db1);
+ if (status < 0)
+ exit (1);
+ if (db1->part > 0)
+ { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[1]);
+ exit (1);
+ }
+
+ if (argc > 3)
+ { pwd = PathTo(argv[3]);
+ root = Root(argv[3],".las");
+ if ((input = fopen(Catenate(pwd,"/",root,".las"),"r")) != NULL)
+ { ISTWO = 1;
+ fclose(input);
+ status = Open_DB(argv[2],db2);
+ if (status < 0)
+ exit (1);
+ if (db2->part > 0)
+ { fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[2]);
+ exit (1);
+ }
+ Trim_DB(db2);
+ }
+ else
+ db2 = db1;
+ free(root);
+ free(pwd);
+ }
+ else
+ db2 = db1;
+ Trim_DB(db1);
+ }
+
+ // Process read index arguments into a sorted list of read ranges
+
+ input_pts = 0;
+ if (argc == ISTWO+4)
+ { if (argv[ISTWO+3][0] != LAST_READ_SYMBOL || argv[ISTWO+3][1] != '\0')
+ { char *eptr, *fptr;
+ int b, e;
+
+ b = strtol(argv[ISTWO+3],&eptr,10);
+ if (eptr > argv[ISTWO+3] && b > 0)
+ { if (*eptr == '-')
+ { if (eptr[1] != LAST_READ_SYMBOL || eptr[2] != '\0')
+ { e = strtol(eptr+1,&fptr,10);
+ input_pts = (fptr <= eptr+1 || *fptr != '\0' || e <= 0);
+ }
+ }
+ else
+ input_pts = (*eptr != '\0');
+ }
+ else
+ input_pts = 1;
+ }
+ }
+
+ if (input_pts)
+ { int v, x;
+ FILE *input;
+
+ input = Fopen(argv[ISTWO+3],"r");
+ if (input == NULL)
+ exit (1);
+
+ reps = 0;
+ while ((v = fscanf(input," %d",&x)) != EOF)
+ if (v == 0)
+ { fprintf(stderr,"%s: %d'th item of input file %s is not an integer\n",
+ Prog_Name,reps+1,argv[2]);
+ exit (1);
+ }
+ else
+ reps += 1;
+
+ reps *= 2;
+ pts = (int *) Malloc(sizeof(int)*reps,"Allocating read parameters");
+ if (pts == NULL)
+ exit (1);
+
+ rewind(input);
+ for (v = 0; v < reps; v += 2)
+ { fscanf(input," %d",&x);
+ pts[v] = pts[v+1] = x;
+ }
+
+ fclose(input);
+ }
+
+ else
+ { pts = (int *) Malloc(sizeof(int)*2*argc,"Allocating read parameters");
+ if (pts == NULL)
+ exit (1);
+
+ reps = 0;
+ if (argc > 3+ISTWO)
+ { int c, b, e;
+ char *eptr, *fptr;
+
+ for (c = 3+ISTWO; c < argc; c++)
+ { if (argv[c][0] == LAST_READ_SYMBOL)
+ { b = db1->nreads;
+ eptr = argv[c]+1;
+ }
+ else
+ b = strtol(argv[c],&eptr,10);
+ if (eptr > argv[c])
+ { if (b <= 0)
+ { fprintf(stderr,"%s: %d is not a valid index\n",Prog_Name,b);
+ exit (1);
+ }
+ if (*eptr == '\0')
+ { pts[reps++] = b;
+ pts[reps++] = b;
+ continue;
+ }
+ else if (*eptr == '-')
+ { if (eptr[1] == LAST_READ_SYMBOL)
+ { e = INT32_MAX;
+ fptr = eptr+2;
+ }
+ else
+ e = strtol(eptr+1,&fptr,10);
+ if (fptr > eptr+1 && *fptr == 0 && e > 0)
+ { pts[reps++] = b;
+ pts[reps++] = e;
+ if (b > e)
+ { fprintf(stderr,"%s: Empty range '%s'\n",Prog_Name,argv[c]);
+ exit (1);
+ }
+ continue;
+ }
+ }
+ }
+ fprintf(stderr,"%s: argument '%s' is not an integer range\n",Prog_Name,argv[c]);
+ exit (1);
+ }
+
+ qsort(pts,reps/2,sizeof(int64),ORDER);
+
+ b = 0;
+ for (c = 0; c < reps; c += 2)
+ if (b > 0 && pts[b-1] >= pts[c]-1)
+ { if (pts[c+1] > pts[b-1])
+ pts[b-1] = pts[c+1];
+ }
+ else
+ { pts[b++] = pts[c];
+ pts[b++] = pts[c+1];
+ }
+ pts[b++] = INT32_MAX;
+ reps = b;
+ }
+ else
+ { pts[reps++] = 1;
+ pts[reps++] = INT32_MAX;
+ }
+ }
+
+ // Initiate file reading and read header
+
+ { char *over, *pwd, *root;
+
+ pwd = PathTo(argv[2+ISTWO]);
+ root = Root(argv[2+ISTWO],".las");
+ over = Catenate(pwd,"/",root,".las");
+ input = Fopen(over,"r");
+ if (input == NULL)
+ exit (1);
+
+ if (fread(&novl,sizeof(int64),1,input) != 1)
+ SYSTEM_ERROR
+ if (fread(&tspace,sizeof(int),1,input) != 1)
+ SYSTEM_ERROR
+
+ if (tspace <= TRACE_XOVR)
+ { small = 1;
+ tbytes = sizeof(uint8);
+ }
+ else
+ { small = 0;
+ tbytes = sizeof(uint16);
+ }
+
+ free(pwd);
+ free(root);
+ }
+
+ // Scan to count sizes of things
+
+ { int j, al, tlen;
+ int in, npt, idx, ar;
+ int64 novls, odeg, omax, sdeg, smax, ttot, tmax;
+
+ in = 0;
+ npt = pts[0];
+ idx = 1;
+
+ // For each record do
+
+ novls = omax = smax = ttot = tmax = 0;
+ sdeg = odeg = 0;
+
+ al = 0;
+ for (j = 0; j < novl; j++)
+
+ // Read it in
+
+ { Read_Overlap(input,ovl);
+ tlen = ovl->path.tlen;
+ fseeko(input,tlen*tbytes,SEEK_CUR);
+
+ // Determine if it should be displayed
+
+ ar = ovl->aread+1;
+ if (in)
+ { while (ar > npt)
+ { npt = pts[idx++];
+ if (ar < npt)
+ { in = 0;
+ break;
+ }
+ npt = pts[idx++];
+ }
+ }
+ else
+ { while (ar >= npt)
+ { npt = pts[idx++];
+ if (ar <= npt)
+ { in = 1;
+ break;
+ }
+ npt = pts[idx++];
+ }
+ }
+ if (!in)
+ continue;
+
+ // If -o check display only overlaps
+
+ if (OVERLAP)
+ { if (ovl->path.abpos != 0 && ovl->path.bbpos != 0)
+ continue;
+ if (ovl->path.aepos != db1->reads[ovl->aread].rlen &&
+ ovl->path.bepos != db2->reads[ovl->bread].rlen)
+ continue;
+ }
+
+ if (ar != al)
+ { if (sdeg > smax)
+ smax = sdeg;
+ if (odeg > omax)
+ omax = odeg;
+ sdeg = odeg = 0;
+ al = ar;
+ }
+
+ novls += 1;
+ odeg += 1;
+ sdeg += tlen;
+ ttot += tlen;
+ if (tlen > tmax)
+ tmax = tlen;
+ }
+
+ if (sdeg > smax)
+ smax = sdeg;
+ if (odeg > omax)
+ omax = odeg;
+
+ printf("+ P %lld\n",novls);
+ printf("%% P %lld\n",omax);
+ printf("+ T %lld\n",ttot);
+ printf("%% T %lld\n",smax);
+ printf("@ T %lld\n",tmax);
+ }
+
+ // Read the file and display selected records
+
+ { int j;
+ uint16 *trace;
+ int tmax;
+ int in, npt, idx, ar;
+ int64 verse;
+
+ rewind(input);
+ fread(&verse,sizeof(int64),1,input);
+ fread(&tspace,sizeof(int),1,input);
+ if (verse < 0)
+ { for (j = 0; j < 5; j++)
+ fread(&verse,sizeof(int64),1,input);
+ }
+
+ tmax = 1000;
+ trace = (uint16 *) Malloc(sizeof(uint16)*tmax,"Allocating trace vector");
+ if (trace == NULL)
+ exit (1);
+
+ in = 0;
+ npt = pts[0];
+ idx = 1;
+
+ // For each record do
+
+ for (j = 0; j < novl; j++)
+
+ // Read it in
+
+ { Read_Overlap(input,ovl);
+ if (ovl->path.tlen > tmax)
+ { tmax = ((int) 1.2*ovl->path.tlen) + 100;
+ trace = (uint16 *) Realloc(trace,sizeof(uint16)*tmax,"Allocating trace vector");
+ if (trace == NULL)
+ exit (1);
+ }
+ ovl->path.trace = (void *) trace;
+ Read_Trace(input,ovl,tbytes);
+
+ // Determine if it should be displayed
+
+ ar = ovl->aread+1;
+ if (in)
+ { while (ar > npt)
+ { npt = pts[idx++];
+ if (ar < npt)
+ { in = 0;
+ break;
+ }
+ npt = pts[idx++];
+ }
+ }
+ else
+ { while (ar >= npt)
+ { npt = pts[idx++];
+ if (ar <= npt)
+ { in = 1;
+ break;
+ }
+ npt = pts[idx++];
+ }
+ }
+ if (!in)
+ continue;
+
+ // If -o check display only overlaps
+
+ if (OVERLAP)
+ { if (ovl->path.abpos != 0 && ovl->path.bbpos != 0)
+ continue;
+ if (ovl->path.aepos != db1->reads[ovl->aread].rlen &&
+ ovl->path.bepos != db2->reads[ovl->bread].rlen)
+ continue;
+ }
+
+ // Display it
+
+ printf("P %d %d",ovl->aread+1,ovl->bread+1);
+ if (COMP(ovl->flags))
+ printf(" c\n");
+ else
+ printf(" n\n");
+
+ if (DOCOORDS)
+ printf("C %d %d %d %d\n",ovl->path.abpos,ovl->path.aepos,ovl->path.bbpos,ovl->path.bepos);
+
+ if (DODIFFS)
+ printf("D %d\n",ovl->path.diffs);
+
+ if (DOTRACE)
+ { uint16 *trace = (uint16 *) ovl->path.trace;
+ int tlen = ovl->path.tlen;
+
+ if (small)
+ Decompress_TraceTo16(ovl);
+ printf("T %d\n",tlen>>1);
+ for (j = 0; j < tlen; j += 2)
+ printf(" %3d %3d\n",trace[j],trace[j+1]);
+ }
+ }
+
+ free(trace);
+ }
+
+ Close_DB(db1);
+ if (ISTWO)
+ Close_DB(db2);
+
+ exit (0);
+}
diff --git a/LAindex.c b/LAindex.c
new file mode 100644
index 0000000..3f2445a
--- /dev/null
+++ b/LAindex.c
@@ -0,0 +1,201 @@
+/*******************************************************************************************
+ *
+ * Create an index with extension .las.idx for a .las file.
+ * Utility expects the .las file to be sorted.
+ * Header contains total # of trace points, max # of trace points for
+ * a given overlap, max # of trace points in all the overlaps for a given aread, and
+ * max # of overlaps for a given aread. The remainder are the offsets into each pile.
+ *
+ * Author: Gene Myers
+ * Date : Sept 2015
+ *
+ *******************************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "DB.h"
+#include "align.h"
+
+static char *Usage = "[-v] <source:las> ...";
+
+#define MEMORY 1000 // How many megabytes for output buffer
+
+int main(int argc, char *argv[])
+{ char *iblock;
+ FILE *input, *output;
+ int64 novl, bsize, ovlsize, ptrsize;
+ int tspace, tbytes;
+ char *pwd, *root;
+ int64 tmax, ttot;
+ int64 omax, smax;
+ int64 odeg, sdeg;
+ int i;
+
+ int VERBOSE;
+
+ // Process options
+
+ { int j, k;
+ int flags[128];
+
+ ARG_INIT("LAindex")
+
+ j = 1;
+ for (i = 1; i < argc; i++)
+ if (argv[i][0] == '-')
+ { ARG_FLAGS("v") }
+ else
+ argv[j++] = argv[i];
+ argc = j;
+
+ VERBOSE = flags['v'];
+
+ if (argc <= 1)
+ { fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
+ exit (1);
+ }
+ }
+
+ // For each file do
+
+ ptrsize = sizeof(void *);
+ ovlsize = sizeof(Overlap) - ptrsize;
+ bsize = MEMORY * 1000000ll;
+ iblock = (char *) Malloc(bsize + ptrsize,"Allocating input block");
+ if (iblock == NULL)
+ exit (1);
+ iblock += ptrsize;
+
+ for (i = 1; i < argc; i++)
+ { pwd = PathTo(argv[i]);
+ root = Root(argv[i],".las");
+ input = Fopen(Catenate(pwd,"/",root,".las"),"r");
+ if (input == NULL)
+ exit (1);
+
+ if (fread(&novl,sizeof(int64),1,input) != 1)
+ SYSTEM_ERROR
+ if (fread(&tspace,sizeof(int),1,input) != 1)
+ SYSTEM_ERROR
+ if (tspace <= TRACE_XOVR)
+ tbytes = sizeof(uint8);
+ else
+ tbytes = sizeof(uint16);
+
+ output = Fopen(Catenate(pwd,"/.",root,".las.idx"),"w");
+ if (output == NULL)
+ exit (1);
+
+ free(pwd);
+ free(root);
+
+ if (VERBOSE)
+ { printf(" Indexing %s: ",root);
+ Print_Number(novl,0,stdout);
+ printf(" records ... ");
+ fflush(stdout);
+ }
+
+ fwrite(&novl,sizeof(int64),1,output);
+ fwrite(&novl,sizeof(int64),1,output);
+ fwrite(&novl,sizeof(int64),1,output);
+ fwrite(&novl,sizeof(int64),1,output);
+
+ { int j, alst;
+ Overlap *w;
+ int64 tsize;
+ int64 optr;
+ char *iptr, *itop;
+ int64 tlen;
+
+ optr = sizeof(int64) + sizeof(int32);
+ iptr = iblock;
+ itop = iblock + fread(iblock,1,bsize,input);
+
+ alst = -1;
+ odeg = sdeg = 0;
+ omax = smax = 0;
+ tmax = ttot = 0;
+ for (j = 0; j < novl; j++)
+ { if (iptr + ovlsize > itop)
+ { int64 remains = itop-iptr;
+ if (remains > 0)
+ memcpy(iblock,iptr,remains);
+ iptr = iblock;
+ itop = iblock + remains;
+ itop += fread(itop,1,bsize-remains,input);
+ }
+
+ w = (Overlap *) (iptr - ptrsize);
+
+ tlen = w->path.tlen;
+ if (alst < 0)
+ { fwrite(&optr,sizeof(int64),1,output);
+ alst = w->aread;
+ }
+ else
+ while (alst < w->aread)
+ { if (sdeg > smax)
+ smax = sdeg;
+ if (odeg > omax)
+ omax = odeg;
+ fwrite(&optr,sizeof(int64),1,output);
+ odeg = sdeg = 0;
+ alst += 1;
+ }
+ if (tlen > tmax)
+ tmax = tlen;
+ ttot += tlen;
+ odeg += 1;
+ sdeg += tlen;
+
+ iptr += ovlsize;
+
+ tsize = tlen*tbytes;
+ if (iptr + tsize > itop)
+ { int64 remains = itop-iptr;
+ if (remains > 0)
+ memcpy(iblock,iptr,remains);
+ iptr = iblock;
+ itop = iblock + remains;
+ itop += fread(itop,1,bsize-remains,input);
+ }
+
+ optr += ovlsize + tsize;
+ iptr += tsize;
+ }
+ fwrite(&optr,sizeof(int64),1,output);
+ }
+
+ if (sdeg > smax)
+ smax = sdeg;
+ if (odeg > omax)
+ omax = odeg;
+
+ rewind(output);
+
+ fwrite(&omax,sizeof(int64),1,output);
+ fwrite(&ttot,sizeof(int64),1,output);
+ fwrite(&smax,sizeof(int64),1,output);
+ fwrite(&tmax,sizeof(int64),1,output);
+
+ if (VERBOSE)
+ { Print_Number(ttot,0,stdout);
+ printf(" trace points\n");
+ fflush(stdout);
+ }
+
+ fclose(input);
+ fclose(output);
+ }
+
+ free(iblock-ptrsize);
+
+ exit (0);
+}
diff --git a/LAmerge.c b/LAmerge.c
index 93c9266..00954e0 100644
--- a/LAmerge.c
+++ b/LAmerge.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Given a list of sorted .las files, merge them into a single sorted .las file.
@@ -120,6 +83,57 @@ static void reheap(int s, Overlap **heap, int hsize)
heap[c] = hs;
}
+ // Heap sort of records according to (aread,abpos) order
+
+#define MAPARE(lp,rp) \
+ if (lp->aread > rp->aread) \
+ bigger = 1; \
+ else if (lp->aread < rp->aread) \
+ bigger = 0; \
+ else if (lp->path.abpos > rp->path.abpos) \
+ bigger = 1; \
+ else \
+ bigger = 0;
+
+static void maheap(int s, Overlap **heap, int hsize)
+{ int c, l, r;
+ int bigger;
+ Overlap *hs, *hr, *hl;
+
+ c = s;
+ hs = heap[s];
+ while ((l = 2*c) <= hsize)
+ { r = l+1;
+ hl = heap[l];
+ if (r > hsize)
+ bigger = 1;
+ else
+ { hr = heap[r];
+ MAPARE(hr,hl)
+ }
+ if (bigger)
+ { MAPARE(hs,hl)
+ if (bigger)
+ { heap[c] = hl;
+ c = l;
+ }
+ else
+ break;
+ }
+ else
+ { MAPARE(hs,hr)
+ if (bigger)
+ { heap[c] = hr;
+ c = r;
+ }
+ else
+ break;
+ }
+ }
+ if (c != s)
+ heap[c] = hs;
+}
+
#ifdef DEBUG
static void showheap(Overlap **heap, int hsize)
@@ -168,6 +182,7 @@ int main(int argc, char *argv[])
char *optr, *otop;
int VERBOSE;
+ int MAP_SORT;
// Process command line
@@ -179,12 +194,13 @@ int main(int argc, char *argv[])
j = 1;
for (i = 1; i < argc; i++)
if (argv[i][0] == '-')
- { ARG_FLAGS("v") }
+ { ARG_FLAGS("vc") }
else
argv[j++] = argv[i];
argc = j;
- VERBOSE = flags['v'];
+ VERBOSE = flags['v'];
+ MAP_SORT = flags['c'];
if (argc < 3)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
@@ -295,8 +311,13 @@ int main(int argc, char *argv[])
}
if (hsize > 3)
- for (i = hsize/2; i > 1; i--)
- reheap(i,heap,hsize);
+ { if (MAP_SORT)
+ for (i = hsize/2; i > 1; i--)
+ maheap(i,heap,hsize);
+ else
+ for (i = hsize/2; i > 1; i--)
+ reheap(i,heap,hsize);
+ }
// While the heap is not empty do
@@ -305,7 +326,10 @@ int main(int argc, char *argv[])
IO_block *src;
int64 tsize, span;
- reheap(1,heap,hsize);
+ if (MAP_SORT)
+ maheap(1,heap,hsize);
+ else
+ reheap(1,heap,hsize);
ov = heap[1];
src = in + (ov - ovls);
diff --git a/LAshow.c b/LAshow.c
index ea5c8ba..66926e9 100644
--- a/LAshow.c
+++ b/LAshow.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Utility for displaying the overlaps in a .las file in a variety of ways including
@@ -66,8 +29,8 @@ static char *Usage[] =
#define LAST_READ_SYMBOL '$'
static int ORDER(const void *l, const void *r)
-{ int x = *((int32 *) l);
- int y = *((int32 *) r);
+{ int x = *((int *) l);
+ int y = *((int *) r);
return (x-y);
}
@@ -78,6 +41,7 @@ int main(int argc, char *argv[])
Alignment _aln, *aln = &_aln;
FILE *input;
+ int sameDB;
int64 novl;
int tspace, tbytes, small;
int reps, *pts;
@@ -141,6 +105,7 @@ int main(int argc, char *argv[])
{ int status;
char *pwd, *root;
FILE *input;
+ struct stat stat1, stat2;
ISTWO = 0;
status = Open_DB(argv[1],db1);
@@ -151,6 +116,7 @@ int main(int argc, char *argv[])
exit (1);
}
+ sameDB = 1;
if (argc > 3)
{ pwd = PathTo(argv[3]);
root = Root(argv[3],".las");
@@ -164,6 +130,10 @@ int main(int argc, char *argv[])
{ fprintf(stderr,"%s: Cannot be called on a block: %s\n",Prog_Name,argv[2]);
exit (1);
}
+ stat(Catenate(db1->path,"","",".idx"),&stat1);
+ stat(Catenate(db2->path,"","",".idx"),&stat2);
+ if (stat1.st_ino != stat2.st_ino)
+ sameDB = 0;
Trim_DB(db2);
}
else
@@ -517,12 +487,15 @@ int main(int argc, char *argv[])
{ char *aseq, *bseq;
int amin, amax;
int bmin, bmax;
+ int self;
if (FLIP)
Flip_Alignment(aln,0);
if (small)
Decompress_TraceTo16(ovl);
+ self = sameDB && (ovl->aread == ovl->bread) && !COMP(ovl->flags);
+
amin = ovl->path.abpos - BORDER;
if (amin < 0) amin = 0;
amax = ovl->path.aepos + BORDER;
@@ -538,20 +511,31 @@ int main(int argc, char *argv[])
if (bmin < 0) bmin = 0;
bmax = ovl->path.bepos + BORDER;
if (bmax > aln->blen) bmax = aln->blen;
+ if (self)
+ { if (bmin < amin)
+ amin = bmin;
+ if (bmax > amax)
+ amax = bmax;
+ }
}
aseq = Load_Subread(db1,ovl->aread,amin,amax,abuffer,0);
- bseq = Load_Subread(db2,ovl->bread,bmin,bmax,bbuffer,0);
+ if (!self)
+ bseq = Load_Subread(db2,ovl->bread,bmin,bmax,bbuffer,0);
+ else
+ bseq = aseq;
aln->aseq = aseq - amin;
if (COMP(aln->flags))
{ Complement_Seq(bseq,bmax-bmin);
aln->bseq = bseq - (aln->blen - bmax);
}
+ else if (self)
+ aln->bseq = aln->aseq;
else
aln->bseq = bseq - bmin;
- Compute_Trace_PTS(aln,work,tspace);
+ Compute_Trace_PTS(aln,work,tspace,GREEDIEST);
if (FLIP)
{ if (COMP(aln->flags))
diff --git a/LAsort.c b/LAsort.c
index f3dc7a1..df54b0a 100644
--- a/LAsort.c
+++ b/LAsort.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Load a file U.las of overlaps into memory, sort them all by A,B index,
@@ -69,24 +32,50 @@ static int SORT_OVL(const void *x, const void *y)
Overlap *ol, *or;
int al, ar;
int bl, br;
+ int cl, cr;
int pl, pr;
ol = (Overlap *) (IBLOCK+l);
- al = ol->aread;
- bl = ol->bread;
-
or = (Overlap *) (IBLOCK+r);
- ar = or->aread;
- br = or->bread;
+ al = ol->aread;
+ ar = or->aread;
if (al != ar)
return (al-ar);
+
+ bl = ol->bread;
+ br = or->bread;
if (bl != br)
return (bl-br);
+ cl = COMP(ol->flags);
+ cr = COMP(ol->flags);
+ if (cl != cr)
+ return (cl-cr);
+
pl = ol->path.abpos;
pr = or->path.abpos;
+ return (pl-pr);
+}
+
+static int SORT_MAP(const void *x, const void *y)
+{ int64 l = *((int64 *) x);
+ int64 r = *((int64 *) y);
+
+ Overlap *ol, *or;
+ int al, ar;
+ int pl, pr;
+
+ ol = (Overlap *) (IBLOCK+l);
+ or = (Overlap *) (IBLOCK+r);
+ al = ol->aread;
+ ar = or->aread;
+ if (al != ar)
+ return (al-ar);
+
+ pl = ol->path.abpos;
+ pr = or->path.abpos;
return (pl-pr);
}
@@ -98,6 +87,7 @@ int main(int argc, char *argv[])
int i;
int VERBOSE;
+ int MAP_ORDER;
// Process options
@@ -109,12 +99,13 @@ int main(int argc, char *argv[])
j = 1;
for (i = 1; i < argc; i++)
if (argv[i][0] == '-')
- { ARG_FLAGS("v") }
+ { ARG_FLAGS("vc") }
else
argv[j++] = argv[i];
argc = j;
- VERBOSE = flags['v'];
+ VERBOSE = flags['v'];
+ MAP_ORDER = flags['c'];
if (argc <= 1)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
@@ -218,7 +209,10 @@ int main(int argc, char *argv[])
// Sort permutation array of ptrs to records
IBLOCK = iblock;
- qsort(perm,novl,sizeof(int64),SORT_OVL);
+ if (MAP_ORDER)
+ qsort(perm,novl,sizeof(int64),SORT_MAP);
+ else
+ qsort(perm,novl,sizeof(int64),SORT_OVL);
// Output the records in sorted order
diff --git a/LAsplit.c b/LAsplit.c
index 28b561f..49d5ed0 100644
--- a/LAsplit.c
+++ b/LAsplit.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Split an OVL file arriving from the standard input into 'parts' equal sized .las-files
diff --git a/LAupgrade.Dec.31.2014.c b/LAupgrade.Dec.31.2014.c
index f3b2321..f569caa 100644
--- a/LAupgrade.Dec.31.2014.c
+++ b/LAupgrade.Dec.31.2014.c
@@ -1,47 +1,9 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
- * Merge together in index order, overlap files <XXX>.1.las, <XXX>.2.las, ... into a
- * single overlap file and output to the standard output
+ * Convert older .las files so that the alen and blen fields are removed.
*
* Author: Gene Myers
- * Date : July 2013
+ * Date : Dec 2014
*
*******************************************************************************************/
@@ -119,7 +81,7 @@ int main(int argc, char *argv[])
tbytes = sizeof(uint16);
fwrite(&novl,sizeof(int64),1,stdout);
- fwrite(&tspace,sizeof(int32),1,stdout);
+ fwrite(&tspace,sizeof(int),1,stdout);
{ int j;
OverlapOld *w;
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..9aa819c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+
+ Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ · Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+ · Redistributions in binary form must reproduce the above copyright notice, this
+ list of conditions and the following disclaimer in the documentation and/or
+ other materials provided with the distribution.
+
+ · The name of EWM may not be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ For any issues regarding this software and its use, contact EWM at:
+
+ Eugene W. Myers Jr.
+ Bautzner Str. 122e
+ 01099 Dresden
+ GERMANY
+ Email: gene.myers at gmail.com
+
diff --git a/Makefile b/Makefile
index fdfe3f5..c9b027a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-CFLAGS = -O3 -Wall -Wextra -fno-strict-aliasing
+CFLAGS = -O3 -Wall -Wextra -Wno-unused-result -fno-strict-aliasing
-ALL = daligner HPCdaligner HPCmapper LAsort LAmerge LAsplit LAcat LAshow LAcheck
+ALL = daligner HPCdaligner HPCmapper LAsort LAmerge LAsplit LAcat LAshow LAdump LAcheck LAindex
all: $(ALL)
@@ -22,6 +22,9 @@ LAmerge: LAmerge.c align.h DB.c DB.h QV.c QV.h
LAshow: LAshow.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAshow LAshow.c align.c DB.c QV.c -lm
+LAdump: LAdump.c align.c align.h DB.c DB.h QV.c QV.h
+ gcc $(CFLAGS) -o LAdump LAdump.c align.c DB.c QV.c -lm
+
LAcat: LAcat.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAcat LAcat.c DB.c QV.c -lm
@@ -34,6 +37,9 @@ LAcheck: LAcheck.c align.c align.h DB.c DB.h QV.c QV.h
LAupgrade.Dec.31.2014: LAupgrade.Dec.31.2014.c align.c align.h DB.c DB.h QV.c QV.h
gcc $(CFLAGS) -o LAupgrade.Dec.31.2014 LAupgrade.Dec.31.2014.c align.c DB.c QV.c -lm
+LAindex: LAindex.c align.c align.h DB.c DB.h QV.c QV.h
+ gcc $(CFLAGS) -o LAindex LAindex.c align.c DB.c QV.c -lm
+
clean:
rm -f $(ALL)
rm -fr *.dSYM
diff --git a/QV.c b/QV.c
index 38f6db4..2c4f3bd 100644
--- a/QV.c
+++ b/QV.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Compressor/decompressor for .quiv files: customized Huffman codes for each stream based on
diff --git a/QV.h b/QV.h
index 35fbadc..1ea7dc8 100644
--- a/QV.h
+++ b/QV.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Compressor/decompressor for .quiv files: customized Huffman codes for each stream based on
diff --git a/README b/README
index 6b82715..439af9e 100644
--- a/README
+++ b/README
@@ -1,39 +1,9 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
+
+
+
+*** PLEASE GO TO THE DAZZLER BLOG (https://dazzlerblog.wordpress.com) FOR TYPESET ***
+ DOCUMENTATION, EXAMPLES OF USE, AND DESIGN PHILOSOPHY.
+
/************************************************************************************\
@@ -42,7 +12,7 @@ UPGRADE & DEVELOPER NOTES ! ! !
If you have already performed a big comparison and don't want to recompute all your
local alignments in .las files, but do want to use a more recent version of the
software that entails a change to the data structures (currently the update on
-December 31, 2014), please note the routine LAupgrade.31.2014. This take an .las file,
+December 31, 2014), please note the routine LAupgrade.Dec.31.2014. This take a .las file,
say X.las, as an argument, and writes to standard output the .las file in the new
format.
@@ -198,8 +168,58 @@ uppercase should be used for DNA sequence instead of the default lowercase. If
-o option is set then only alignments that are proper overlaps (a sequence end occurs
at the each end of the alignment) are displayed.
-
-5. LAcat <source:las> > <target>.las
+5. LAdump [-cdt] [-o] <src1:db|dam> [ <src2:db|dam> ]
+ <align:las> [ <reads:FILE> | <reads:range> ... ]
+
+Like LAshow, LAdump allows one to display the local alignments (LAs) of a subset of the
+piles in an .las file and select which information to show about them. The difference
+is that the information is written in a very simple "1-code" ASCII format that makes it
+easy for one to read and parse the information for further use. For each LA the pair of
+reads is output on a line. -c requests that one further output the coordinates of the
+LA segments be output. The -d option requests that the number of difference in the LA
+be output, and -t requests that the tracepoint information be output. Finally, -o
+requests that only LAs that are proper overlaps be output.
+
+The format is very simple. Each requested piece of information occurs on a line. The
+first character of every line is a "1-code" character that tells you what information
+to expect on the line. The rest of the line contains information where each item is
+separated by a single blank space. The trace point line gives the number of trace
+point intervals in the LA and is immediately followed by that many lines containing
+a pair of integers giving the # of differences and b-displacement in each successive
+trace point interval.
+
+ P #a #b - (#a,#b) have an LA between them
+ C #ab #ae #bb #be - [#ab,#ae] aligns with [#bb,#be]
+ D # - there are # differences in the LA
+ T #n - there are #n trace point intervals for the LA
+ (#d #y )^#n - there are #d difference aligning the #y bp's of B with the
+ next fixed-size interval of A
+ + X # - Total amount of X (X = P or T)
+ % X # - Maximum amount of X in any pile (X = P or T)
+ @ T # - Maximum number of trace points in any trace
+
+1-code lines that begin with +, %, or @ are always the first lines in the output.
+They give size information about what is contained in the output. Specifically,
+'+ X #' gives the total number of LAs (X=P), or the total number of trace point
+intervals (X=T) in the file . '% X #' gives the maximum number of LAs (X=P) or
+the maximum number of trace point intervals (X=T) in a given *pile* (collection of
+LAs all with the same a-read (applies only to sorted .las files). Finally @ T #
+gives the maximum # of trace point intervals in any trace within the file.
+
+6. LAindex -v <source:las> ...
+
+LAindex takes a series of one or more sorted .las files and produces a "pile
+index" for each one. If the input file has name "X.las", then the name of its
+index file is ".X.las.idx". For each A-read pile encoded in the .las file,
+the index contains the offset to the first local alignment with A in the file.
+The index starts with four 64-bit integers that encode the numbers % P, + T, % T,
+and @ T described for LAdump above, and then an offset for each pile beginning
+with the first A-read in the file (which may not be read 0). The index is meant
+to allow programs that process piles to more efficiently read just the piles
+they need at any momment int time, as opposed to having to sequentially scan
+through the .las file.
+
+7. LAcat <source:las> > <target>.las
Given argument <source>, find all files <source>.1.las, <source>.2.las, ...
<source>.n.<las> where <source>.i.las exists for every i in [1,n]. Then
@@ -207,7 +227,7 @@ concatenate these files in order into a single .las file and pipe the result
to the standard output.
-6. LAsplit <target:las> (<parts:int> | <path:db|dam>) < <source>.las
+8. LAsplit <target:las> (<parts:int> | <path:db|dam>) < <source>.las
If the second argument is an integer n, then divide the alignment file <source>, piped
in through the standard input, as evenly as possible into n alignment files with the
@@ -219,7 +239,7 @@ divide the input alignment file into block .las files where all records whose a-
in <path>.i.db are in <align>.i.las.
-7. LAcheck [-vS] <src1:db|dam> [ <src2:db|dam> ] <align:las> ...
+9. LAcheck [-vS] <src1:db|dam> [ <src2:db|dam> ] <align:las> ...
LAcheck checks each .las file for structural integrity, where the a- and b-sequences
come from src1 or from src1 and scr2, respectively. That is, it makes sure each file
@@ -232,10 +252,10 @@ runs silently. The exit status is 0 if every file is deemed good, and 1 if at l
one of the files looks corrupted.
-8. HPCdaligner [-vbAI] [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>]
- [-e<double(.70)] [-l<int(1000)] [-s<int(100)>] [-H<int>]
- [-m<track>]+ [-dal<int(4)>] [-deg<int(25)>]
- <path:db|dam> [<first:int>[-<last:int>]]
+10. HPCdaligner [-vbAI] [-k<int(14)>] [-w<int(6)>] [-h<int(35)>] [-t<int>] [-M<int>]
+ [-e<double(.70)] [-l<int(1000)] [-s<int(100)>] [-H<int>]
+ [-m<track>]+ [-dal<int(4)>] [-deg<int(25)>]
+ <path:db|dam> [<first:int>[-<last:int>]]
HPCdaligner writes a UNIX shell script to the standard output that consists of a
sequence of commands that effectively run daligner on all pairs of blocks of a split
diff --git a/align.c b/align.c
index 4666fe0..de55eed 100644
--- a/align.c
+++ b/align.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Fast alignment discovery and trace generation along with utilites for displaying alignments
@@ -255,9 +218,9 @@ Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq)
match = 1.-match;
bias = (int) ((match+.025)*20.-1.);
if (match < .2)
- { EPRINTF(EPLACE,"Base bias worse than 80/20%% ! (New_Align_Spec)\n");
- free(spec);
- EXIT(NULL);
+ { fprintf(stderr,"Warning: Base bias worse than 80/20%% ! (New_Align_Spec)\n");
+ fprintf(stderr," Capping bias at this ratio.\n");
+ bias = 3;
}
spec->ave_path = (int) (PATH_LEN * (1. - Bias_Factor[bias] * (1. - ave_corr)));
@@ -608,6 +571,9 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int am, ac, ap;
char *a;
+ low -= 1;
+ hgh += 1;
+
if (low <= vmin || hgh >= vmax)
{ int span, wing;
int64 move;
@@ -684,20 +650,22 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
T = _T-vmin;
}
- if (low > minp)
- { low -= 1;
- NA[low] = NA[low+1];
+ if (low >= minp)
+ { NA[low] = NA[low+1];
NB[low] = NB[low+1];
V[low] = -1;
}
- if (hgh < maxp)
- { hgh += 1;
- NA[hgh] = NA[hgh-1];
+ else
+ low += 1;
+
+ if (hgh <= maxp)
+ { NA[hgh] = NA[hgh-1];
NB[hgh] = NB[hgh-1];
V[hgh] = am = -1;
}
else
- am = V[hgh];
+ am = V[--hgh];
+
dif += 1;
ac = V[hgh+1] = V[low-1] = -1;
@@ -1272,6 +1240,9 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int am, ac, ap;
char *a;
+ low -= 1;
+ hgh += 1;
+
if (low <= vmin || hgh >= vmax)
{ int span, wing;
int64 move, vd, md, had, hbd, nad, nbd, td;
@@ -1347,20 +1318,22 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
T = _T-vmin;
}
- if (low > minp)
- { low -= 1;
- NA[low] = NA[low+1];
+ if (low >= minp)
+ { NA[low] = NA[low+1];
NB[low] = NB[low+1];
V[low] = ap = INT32_MAX;
}
else
- ap = V[low];
- if (hgh < maxp)
- { hgh += 1;
- NA[hgh] = NA[hgh-1];
+ ap = V[++low];
+
+ if (hgh <= maxp)
+ { NA[hgh] = NA[hgh-1];
NB[hgh] = NB[hgh-1];
V[hgh] = INT32_MAX;
}
+ else
+ hgh -= 1;
+
dif += 1;
ac = V[hgh+1] = V[low-1] = INT32_MAX;
@@ -1882,171 +1855,1314 @@ Path *Local_Alignment(Alignment *align, Work_Data *ework, Align_Spec *espec,
/****************************************************************************************\
* *
-* OVERLAP MANIPULATION *
+* EXTENSION VERSION OF LOCAL ALIGNMENT *
* *
\****************************************************************************************/
-static int64 PtrSize = sizeof(void *);
-static int64 OvlIOSize = sizeof(Overlap) - sizeof(void *);
+static int VectorEn = 4*sizeof(int) + sizeof(BVEC);
-int Read_Overlap(FILE *input, Overlap *ovl)
-{ if (fread( ((char *) ovl) + PtrSize, OvlIOSize, 1, input) != 1)
- return (1);
- return (0);
-}
+static int forward_extend(_Work_Data *work, _Align_Spec *spec, Alignment *align,
+ int midd, int mida, int minp, int maxp)
+{ char *aseq = align->aseq;
+ char *bseq = align->bseq;
+ Path *apath = align->path;
-int Read_Trace(FILE *input, Overlap *ovl, int tbytes)
-{ if (tbytes > 0 && ovl->path.tlen > 0)
- { if (fread(ovl->path.trace, tbytes*ovl->path.tlen, 1, input) != 1)
- return (1);
- }
- return (0);
-}
+ int hgh, low, dif;
+ int vlen, vmin, vmax;
+ int *V, *M;
+ int *_V, *_M;
+ BVEC *T;
+ BVEC *_T;
-void Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
-{ fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output);
- if (ovl->path.trace != NULL)
- fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output);
-}
+ int *HA, *NA;
+ int *_HA, *_NA;
+ Pebble *cells;
+ int avail, cmax;
-void Compress_TraceTo8(Overlap *ovl)
-{ uint16 *t16 = (uint16 *) ovl->path.trace;
- uint8 *t8 = (uint8 *) ovl->path.trace;
- int j;
+ int TRACE_SPACE = spec->trace_space;
+ int PATH_AVE = spec->ave_path;
+ int16 *SCORE = spec->score;
+ int16 *TABLE = spec->table;
- for (j = 0; j < ovl->path.tlen; j++)
- t8[j] = (uint8) (t16[j]);
-}
+ int besta, besty;
+ int trima, trimy, trimd;
+ int trimha;
+ int morea, morey, mored;
+ int moreha;
+ int more, morem, lasta;
+ int aclip, bclip;
-void Decompress_TraceTo16(Overlap *ovl)
-{ uint16 *t16 = (uint16 *) ovl->path.trace;
- uint8 *t8 = (uint8 *) ovl->path.trace;
- int j;
+ hgh = midd;
+ low = midd;
+ dif = 0;
- for (j = ovl->path.tlen-1; j >= 0; j--)
- t16[j] = t8[j];
-}
+ { int span, wing;
-void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent)
-{ int i;
+ span = (hgh-low)+1;
+ vlen = work->vecmax/VectorEn;
+ wing = (vlen - span)/2;
+ vmin = low - wing;
+ vmax = hgh + wing;
- fprintf(output,"%*s%d vs. ",indent,"",ovl->aread);
- if (COMP(ovl->flags))
- fprintf(output,"c(%d)\n",ovl->bread);
- else
- fprintf(output,"%d\n",ovl->bread);
- fprintf(output,"%*s [%d,%d] vs [%d,%d] w. %d diffs\n",indent,"",
- ovl->path.abpos,ovl->path.aepos,ovl->path.bbpos,ovl->path.bepos,ovl->path.diffs);
+ _V = ((int *) work->vector);
+ _M = _V + vlen;
+ _HA = _M + vlen;
+ _NA = _HA + vlen;
+ _T = ((BVEC *) (_NA + vlen));
- if (tbytes == 1)
- { uint8 *trace = (uint8 *) (ovl->path.trace);
- if (trace != NULL)
- { int p = ovl->path.bbpos + trace[1];
- fprintf(output,"%*sTrace: %3d/%5d",indent,"",trace[0],p);
- for (i = 3; i < ovl->path.tlen; i += 2)
- { if (i%10 == 0)
- fprintf(output,"\n%*s",indent+6,"");
- p += trace[i];
- fprintf(output," %3d/%5d",trace[i-1],p);
- }
- fprintf(output,"\n");
- }
- }
- else
- { uint16 *trace = (uint16 *) (ovl->path.trace);
- if (trace != NULL)
- { int p = ovl->path.bbpos + trace[1];
- fprintf(output,"%*sTrace: %3d/%5d",indent,"",trace[0],p);
- for (i = 3; i < ovl->path.tlen; i += 2)
- { if (i%10 == 0)
- fprintf(output,"\n%*s",indent+6,"");
- p += trace[i];
- fprintf(output," %3d/%5d",trace[i-1],p);
- }
- fprintf(output,"\n");
- }
- }
-}
+ V = _V-vmin;
+ M = _M-vmin;
+ HA = _HA-vmin;
+ NA = _NA-vmin;
+ T = _T-vmin;
-int Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname)
-{ int i, p;
+ cells = (Pebble *) (work->cells);
+ cmax = work->celmax;
+ avail = 0;
+ }
- if (((ovl->path.aepos-1)/tspace - ovl->path.abpos/tspace)*2 != ovl->path.tlen-2)
- { if (verbose)
- EPRINTF(EPLACE," %s: Wrong number of trace points\n",fname);
- return (1);
- }
- p = ovl->path.bbpos;
- if (tspace <= TRACE_XOVR)
- { uint8 *trace8 = (uint8 *) ovl->path.trace;
- for (i = 1; i < ovl->path.tlen; i += 2)
- p += trace8[i];
- }
- else
- { uint16 *trace16 = (uint16 *) ovl->path.trace;
- for (i = 1; i < ovl->path.tlen; i += 2)
- p += trace16[i];
- }
- if (p != ovl->path.bepos)
- { if (verbose)
- EPRINTF(EPLACE," %s: Trace point sum != aligned interval\n",fname);
- return (1);
- }
- return (0);
-}
+ /* Compute 0-wave starting from mid-line */
+ more = 1;
+ aclip = INT32_MAX;
+ bclip = -INT32_MAX;
-void Flip_Alignment(Alignment *align, int full)
-{ char *aseq = align->aseq;
- char *bseq = align->bseq;
- int alen = align->alen;
- int blen = align->blen;
- Path *path = align->path;
- int comp = COMP(align->flags);
+ besta = trima = morea = lasta = mida;
+ besty = trimy = morey = (mida-hgh) >> 1;
+ trimd = mored = 0;
+ trimha = moreha = 0;
+ morem = -1;
- int *trace = (int *) path->trace;
- int tlen = path->tlen;
+ { int k;
+ char *a;
- int i, j, p;
+ a = aseq + hgh;
+ for (k = hgh; k >= low; k--)
+ { int y, c, d;
+ int ha, na;
+ Pebble *pb;
- if (comp)
- { p = path->abpos;
- path->abpos = blen - path->bepos;
- path->bepos = alen - p;
- p = path->aepos;
- path->aepos = blen - path->bbpos;
- path->bbpos = alen - p;
+ y = (mida-k) >> 1;
- if (full)
- { alen += 2;
- blen += 2;
+ if (avail >= cmax-1)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),"Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
- for (i = 0; i < tlen; i++)
- if ((p = trace[i]) < 0)
- trace[i] = alen + p;
- else
- trace[i] = p - blen;
+ na = ((y+k)/TRACE_SPACE)*TRACE_SPACE;
+#ifdef SHOW_TPS
+ printf(" A %d: %d,%d,0,%d\n",avail,-1,k,na); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = -1;
+ pb->diag = k;
+ pb->diff = 0;
+ pb->mark = na;
+ ha = avail++;
+ na += TRACE_SPACE;
- i = tlen-1;
- j = 0;
- while (j < i)
- { p = trace[i];
- trace[i] = trace[j];
- trace[j] = p;
- i -= 1;
- j += 1;
- }
+ while (1)
+ { c = bseq[y];
+ if (c == 4)
+ { more = 0;
+ if (bclip < k)
+ bclip = k;
+ break;
+ }
+ d = a[y];
+ if (c != d)
+ { if (d == 4)
+ { more = 0;
+ aclip = k;
+ }
+ break;
+ }
+ y += 1;
+ }
+ c = (y << 1) + k;
- alen -= 2;
- blen -= 2;
- }
- }
- else
- { p = path->abpos;
- path->abpos = path->bbpos;
- path->bbpos = p;
- p = path->aepos;
- path->aepos = path->bepos;
+ while (y+k >= na)
+ { if (avail >= cmax)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),"Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
+#ifdef SHOW_TPS
+ printf(" A %d: %d,%d,0,%d\n",avail,ha,k,na); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = ha;
+ pb->diag = k;
+ pb->diff = 0;
+ pb->mark = na;
+ ha = avail++;
+ na += TRACE_SPACE;
+ }
+
+ if (c > besta)
+ { besta = trima = lasta = c;
+ besty = trimy = y;
+ trimha = ha;
+ }
+
+ V[k] = c;
+ T[k] = PATH_INT;
+ M[k] = PATH_LEN;
+ HA[k] = ha;
+ NA[k] = na;
+
+ a -= 1;
+ }
+ }
+
+ if (more == 0)
+ { if (bseq[besty] != 4 && aseq[besta - besty] != 4)
+ more = 1;
+ if (hgh >= aclip)
+ { hgh = aclip-1;
+ if (morem <= M[aclip])
+ { morem = M[aclip];
+ morea = V[aclip];
+ morey = (morea - aclip)/2;
+ moreha = HA[aclip];
+ }
+ }
+ if (low <= bclip)
+ { low = bclip+1;
+ if (morem <= M[bclip])
+ { morem = M[bclip];
+ morea = V[bclip];
+ morey = (morea - bclip)/2;
+ moreha = HA[bclip];
+ }
+ }
+ aclip = INT32_MAX;
+ bclip = -INT32_MAX;
+ }
+
+#ifdef DEBUG_WAVE
+ printf("\nFORWARD WAVE:\n");
+ print_wave(V,M,low,hgh,besta);
+#endif
+
+ /* Compute successive waves until no furthest reaching points remain */
+
+ while (more && lasta >= besta - TRIM_MLAG)
+ { int k, n;
+ int ua;
+ BVEC t;
+ int am, ac, ap;
+ char *a;
+
+ if (low <= vmin || hgh >= vmax)
+ { int span, wing;
+ int64 move;
+ int64 vd, md, had, nad, td;
+
+ span = (hgh-low)+1;
+ if (.8*vlen < span)
+ { if (enlarge_vector(work,vlen*VectorEn))
+ EXIT(1);
+
+ move = ((void *) _V) - work->vector;
+ vlen = work->vecmax/VectorEn;
+
+ _V = (int *) work->vector;
+ _M = _V + vlen;
+ _HA = _M + vlen;
+ _NA = _HA + vlen;
+ _T = ((BVEC *) (_NA + vlen));
+ }
+ else
+ move = 0;
+
+ wing = (vlen - span)/2;
+
+ vd = ((void *) ( _V+wing)) - (((void *) ( V+low)) - move);
+ md = ((void *) ( _M+wing)) - (((void *) ( M+low)) - move);
+ had = ((void *) (_HA+wing)) - (((void *) (HA+low)) - move);
+ nad = ((void *) (_NA+wing)) - (((void *) (NA+low)) - move);
+ td = ((void *) ( _T+wing)) - (((void *) ( T+low)) - move);
+
+ if (vd < 0)
+ memmove( _V+wing, ((void *) ( V+low)) - move, span*sizeof(int));
+ if (md < 0)
+ memmove( _M+wing, ((void *) ( M+low)) - move, span*sizeof(int));
+ if (had < 0)
+ memmove(_HA+wing, ((void *) (HA+low)) - move, span*sizeof(int));
+ if (nad < 0)
+ memmove(_NA+wing, ((void *) (NA+low)) - move, span*sizeof(int));
+ if (td < 0)
+ memmove( _T+wing, ((void *) ( T+low)) - move, span*sizeof(BVEC));
+
+ if (td > 0)
+ memmove( _T+wing, ((void *) ( T+low)) - move, span*sizeof(BVEC));
+ if (nad > 0)
+ memmove(_NA+wing, ((void *) (NA+low)) - move, span*sizeof(int));
+ if (had > 0)
+ memmove(_HA+wing, ((void *) (HA+low)) - move, span*sizeof(int));
+ if (md > 0)
+ memmove( _M+wing, ((void *) ( M+low)) - move, span*sizeof(int));
+ if (vd > 0)
+ memmove( _V+wing, ((void *) ( V+low)) - move, span*sizeof(int));
+
+ vmin = low-wing;
+ vmax = hgh+wing;
+
+ V = _V-vmin;
+ M = _M-vmin;
+ HA = _HA-vmin;
+ NA = _NA-vmin;
+ T = _T-vmin;
+ }
+
+ if (low > minp)
+ { low -= 1;
+ NA[low] = NA[low+1];
+ V[low] = -1;
+ }
+ if (hgh < maxp)
+ { hgh += 1;
+ NA[hgh] = NA[hgh-1];
+ V[hgh] = am = -1;
+ }
+ else
+ am = V[hgh];
+ dif += 1;
+
+ ac = V[hgh+1] = V[low-1] = -1;
+ a = aseq + hgh;
+ t = PATH_INT;
+ n = PATH_LEN;
+ ua = -1;
+ for (k = hgh; k >= low; k--)
+ { int y, m;
+ int ha;
+ int c, d;
+ BVEC b;
+ Pebble *pb;
+
+ ap = ac;
+ ac = am;
+ am = V[d = k-1];
+
+ if (ac < am)
+ if (am < ap)
+ { c = ap+1;
+ m = n;
+ b = t;
+ ha = ua;
+ }
+ else
+ { c = am+1;
+ m = M[d];
+ b = T[d];
+ ha = HA[d];
+ }
+ else
+ if (ac < ap)
+ { c = ap+1;
+ m = n;
+ b = t;
+ ha = ua;
+ }
+ else
+ { c = ac+2;
+ m = M[k];
+ b = T[k];
+ ha = HA[k];
+ }
+
+ if ((b & PATH_TOP) != 0)
+ m -= 1;
+ b <<= 1;
+
+ y = (c-k) >> 1;
+ while (1)
+ { c = bseq[y];
+ if (c == 4)
+ { more = 0;
+ if (bclip < k)
+ bclip = k;
+ break;
+ }
+ d = a[y];
+ if (c != d)
+ { if (d == 4)
+ { more = 0;
+ aclip = k;
+ }
+ break;
+ }
+ y += 1;
+ if ((b & PATH_TOP) == 0)
+ m += 1;
+ b = (b << 1) | 1;
+ }
+ c = (y << 1) + k;
+
+ while (y+k >= NA[k])
+ { if (cells[ha].mark < NA[k])
+ { if (avail >= cmax)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),
+ "Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
+#ifdef SHOW_TPS
+ printf(" A %d: %d,%d,%d,%d\n",avail,ha,k,dif,NA[k]); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = ha;
+ pb->diag = k;
+ pb->diff = dif;
+ pb->mark = NA[k];
+ ha = avail++;
+ }
+ NA[k] += TRACE_SPACE;
+ }
+
+ if (c > besta)
+ { besta = c;
+ besty = y;
+ if (m >= PATH_AVE)
+ { lasta = c;
+ if (TABLE[b & TRIM_MASK] >= 0)
+ if (TABLE[(b >> TRIM_LEN) & TRIM_MASK] + SCORE[b & TRIM_MASK] >= 0)
+ { trima = c;
+ trimy = y;
+ trimd = dif;
+ trimha = ha;
+ }
+ }
+ }
+
+ t = T[k];
+ n = M[k];
+ ua = HA[k];
+ V[k] = c;
+ T[k] = b;
+ M[k] = m;
+ HA[k] = ha;
+
+ a -= 1;
+ }
+
+ if (more == 0)
+ { if (bseq[besty] != 4 && aseq[besta-besty] != 4)
+ more = 1;
+ if (hgh >= aclip)
+ { hgh = aclip-1;
+ if (morem <= M[aclip])
+ { morem = M[aclip];
+ morea = V[aclip];
+ morey = (morea - aclip)/2;
+ mored = dif;
+ moreha = HA[aclip];
+ }
+ }
+ if (low <= bclip)
+ { low = bclip+1;
+ if (morem <= M[bclip])
+ { morem = M[bclip];
+ morea = V[bclip];
+ morey = (morea - bclip)/2;
+ mored = dif;
+ moreha = HA[bclip];
+ }
+ }
+ aclip = INT32_MAX;
+ bclip = -INT32_MAX;
+ }
+
+ n = besta - WAVE_LAG;
+ while (hgh >= low)
+ if (V[hgh] < n)
+ hgh -= 1;
+ else
+ { while (V[low] < n)
+ low += 1;
+ break;
+ }
+
+#ifdef WAVE_STATS
+ k = (hgh-low)+1;
+ if (k > MAX)
+ MAX = k;
+ TOT += k;
+ NWV += 1;
+#endif
+
+#ifdef DEBUG_WAVE
+ print_wave(V,M,low,hgh,besta);
+#endif
+ }
+
+ { uint16 *atrace = (uint16 *) apath->trace;
+ int atlen;
+ int trimx;
+ int a, b, k, h;
+ int d, e;
+
+ if (morem >= 0)
+ { trimx = morea-morey;
+ trimy = morey;
+ trimd = mored;
+ trimha = moreha;
+ }
+ else
+ trimx = trima-trimy;
+
+ atlen = 0;
+
+ a = -1;
+ for (h = trimha; h >= 0; h = b)
+ { b = cells[h].ptr;
+ cells[h].ptr = a;
+ a = h;
+ }
+ h = a;
+
+ k = cells[h].diag;
+ b = (mida-k)/2;
+ e = 0;
+#ifdef SHOW_TRAIL
+ printf(" A path = (%5d,%5d)\n",(mida+k)/2,b); fflush(stdout);
+#endif
+ for (h = cells[h].ptr; h >= 0; h = cells[h].ptr)
+ { k = cells[h].diag;
+ a = cells[h].mark - k;
+ d = cells[h].diff;
+ atrace[atlen++] = (uint16) (d-e);
+ atrace[atlen++] = (uint16) (a-b);
+#ifdef SHOW_TRAIL
+ printf(" %4d: (%5d,%5d): %3d / %3d\n",h,a+k,a,d-e,a-b); fflush(stdout);
+#endif
+ b = a;
+ e = d;
+ }
+ if (b+k != trimx)
+ { atrace[atlen++] = (uint16) (trimd-e);
+ atrace[atlen++] = (uint16) (trimy-b);
+#ifdef SHOW_TRAIL
+ printf(" (%5d,%5d): %3d / %3d\n",trimx,trimy,trimd-e,trimy-b); fflush(stdout);
+#endif
+ }
+ else if (b != trimy)
+ { atrace[atlen-1] = (uint16) (atrace[atlen-1] + (trimy-b));
+ atrace[atlen-2] = (uint16) (atrace[atlen-2] + (trimd-e));
+#ifdef SHOW_TRAIL
+ printf(" @ (%5d,%5d): %3d / %3d\n",trimx,trimy,trimd-e,trimy-b); fflush(stdout);
+#endif
+ }
+
+ apath->aepos = trimx;
+ apath->bepos = trimy;
+ apath->diffs = trimd;
+ apath->tlen = atlen;
+ }
+
+ return (0);
+}
+
+static int reverse_extend(_Work_Data *work, _Align_Spec *spec, Alignment *align,
+ int midd, int mida, int minp, int maxp)
+{ char *aseq = align->aseq - 1;
+ char *bseq = align->bseq - 1;
+ Path *apath = align->path;
+
+ int hgh, low, dif;
+ int vlen, vmin, vmax;
+ int *V, *M;
+ int *_V, *_M;
+ BVEC *T;
+ BVEC *_T;
+
+ int *HA, *NA;
+ int *_HA, *_NA;
+ Pebble *cells;
+ int avail, cmax;
+
+ int TRACE_SPACE = spec->trace_space;
+ int PATH_AVE = spec->ave_path;
+ int16 *SCORE = spec->score;
+ int16 *TABLE = spec->table;
+
+ int besta, besty;
+ int trima, trimy, trimd;
+ int trimha;
+ int morea, morey, mored;
+ int moreha;
+ int more, morem, lasta;
+ int aclip, bclip;
+
+ hgh = midd;
+ low = midd;
+ dif = 0;
+
+ { int span, wing;
+
+ span = (hgh-low)+1;
+ vlen = work->vecmax/VectorEn;
+ wing = (vlen - span)/2;
+ vmin = low - wing;
+ vmax = hgh + wing;
+
+ _V = ((int *) work->vector);
+ _M = _V + vlen;
+ _HA = _M + vlen;
+ _NA = _HA + vlen;
+ _T = ((BVEC *) (_NA + vlen));
+
+ V = _V-vmin;
+ M = _M-vmin;
+ HA = _HA-vmin;
+ NA = _NA-vmin;
+ T = _T-vmin;
+
+ cells = (Pebble *) (work->cells);
+ cmax = work->celmax;
+ avail = 0;
+ }
+
+ more = 1;
+ aclip = -INT32_MAX;
+ bclip = INT32_MAX;
+
+ besta = trima = morea = lasta = mida;
+ besty = trimy = morey = (mida-hgh) >> 1;
+ trimd = mored = 0;
+ trimha = moreha = 0;
+ morem = -1;
+
+ { int k;
+ char *a;
+
+ a = aseq + low;
+ for (k = low; k <= hgh; k++)
+ { int y, c, d;
+ int ha, na;
+ Pebble *pb;
+
+ y = (mida-k) >> 1;
+
+ if (avail >= cmax-1)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),"Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
+
+ na = ((y+k+TRACE_SPACE-1)/TRACE_SPACE-1)*TRACE_SPACE;
+#ifdef SHOW_TPS
+ printf(" A %d: -1,%d,0,%d\n",avail,k,na+TRACE_SPACE); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = -1;
+ pb->diag = k;
+ pb->diff = 0;
+ pb->mark = y+k;
+ ha = avail++;
+
+ while (1)
+ { c = bseq[y];
+ if (c == 4)
+ { more = 0;
+ if (bclip > k)
+ bclip = k;
+ break;
+ }
+ d = a[y];
+ if (c != d)
+ { if (d == 4)
+ { more = 0;
+ aclip = k;
+ }
+ break;
+ }
+ y -= 1;
+ }
+ c = (y << 1) + k;
+
+ while (y+k <= na)
+ { if (avail >= cmax)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),"Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
+#ifdef SHOW_TPS
+ printf(" A %d: %d,%d,0,%d\n",avail,ha,k,na); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = ha;
+ pb->diag = k;
+ pb->diff = 0;
+ pb->mark = na;
+ ha = avail++;
+ na -= TRACE_SPACE;
+ }
+
+ if (c < besta)
+ { besta = trima = lasta = c;
+ besty = trimy = y;
+ trimha = ha;
+ }
+
+ V[k] = c;
+ T[k] = PATH_INT;
+ M[k] = PATH_LEN;
+ HA[k] = ha;
+ NA[k] = na;
+
+ a += 1;
+ }
+ }
+
+ if (more == 0)
+ { if (bseq[besty] != 4 && aseq[besta - besty] != 4)
+ more = 1;
+ if (low <= aclip)
+ { low = aclip+1;
+ if (morem <= M[aclip])
+ { morem = M[aclip];
+ morea = V[aclip];
+ morey = (morea - aclip)/2;
+ moreha = HA[aclip];
+ }
+ }
+ if (hgh >= bclip)
+ { hgh = bclip-1;
+ if (morem <= M[bclip])
+ { morem = M[bclip];
+ morea = V[bclip];
+ morey = (morea - bclip)/2;
+ moreha = HA[bclip];
+ }
+ }
+ aclip = -INT32_MAX;
+ bclip = INT32_MAX;
+ }
+
+#ifdef DEBUG_WAVE
+ printf("\nREVERSE WAVE:\n");
+ print_wave(V,M,low,hgh,besta);
+#endif
+
+ while (more && lasta <= besta + TRIM_MLAG)
+ { int k, n;
+ int ua;
+ BVEC t;
+ int am, ac, ap;
+ char *a;
+
+ if (low <= vmin || hgh >= vmax)
+ { int span, wing;
+ int64 move, vd, md, had, nad, td;
+
+ span = (hgh-low)+1;
+ if (.8*vlen < span)
+ { if (enlarge_vector(work,vlen*VectorEn))
+ EXIT(1);
+
+ move = ((void *) _V) - work->vector;
+ vlen = work->vecmax/VectorEn;
+
+ _V = (int *) work->vector;
+ _M = _V + vlen;
+ _HA = _M + vlen;
+ _NA = _HA + vlen;
+ _T = ((BVEC *) (_NA + vlen));
+ }
+ else
+ move = 0;
+
+ wing = (vlen - span)/2;
+
+ vd = ((void *) ( _V+wing)) - (((void *) ( V+low)) - move);
+ md = ((void *) ( _M+wing)) - (((void *) ( M+low)) - move);
+ had = ((void *) (_HA+wing)) - (((void *) (HA+low)) - move);
+ nad = ((void *) (_NA+wing)) - (((void *) (NA+low)) - move);
+ td = ((void *) ( _T+wing)) - (((void *) ( T+low)) - move);
+
+ if (vd < 0)
+ memmove( _V+wing, ((void *) ( V+low)) - move, span*sizeof(int));
+ if (md < 0)
+ memmove( _M+wing, ((void *) ( M+low)) - move, span*sizeof(int));
+ if (had < 0)
+ memmove(_HA+wing, ((void *) (HA+low)) - move, span*sizeof(int));
+ if (nad < 0)
+ memmove(_NA+wing, ((void *) (NA+low)) - move, span*sizeof(int));
+ if (td < 0)
+ memmove( _T+wing, ((void *) ( T+low)) - move, span*sizeof(BVEC));
+
+ if (td > 0)
+ memmove( _T+wing, ((void *) ( T+low)) - move, span*sizeof(BVEC));
+ if (nad > 0)
+ memmove(_NA+wing, ((void *) (NA+low)) - move, span*sizeof(int));
+ if (had > 0)
+ memmove(_HA+wing, ((void *) (HA+low)) - move, span*sizeof(int));
+ if (md > 0)
+ memmove( _M+wing, ((void *) ( M+low)) - move, span*sizeof(int));
+ if (vd > 0)
+ memmove( _V+wing, ((void *) ( V+low)) - move, span*sizeof(int));
+
+ vmin = low-wing;
+ vmax = hgh+wing;
+
+ V = _V-vmin;
+ M = _M-vmin;
+ HA = _HA-vmin;
+ NA = _NA-vmin;
+ T = _T-vmin;
+ }
+
+ if (low > minp)
+ { low -= 1;
+ NA[low] = NA[low+1];
+ V[low] = ap = INT32_MAX;
+ }
+ else
+ ap = V[low];
+ if (hgh < maxp)
+ { hgh += 1;
+ NA[hgh] = NA[hgh-1];
+ V[hgh] = INT32_MAX;
+ }
+ dif += 1;
+
+ ac = V[hgh+1] = V[low-1] = INT32_MAX;
+ a = aseq + low;
+ t = PATH_INT;
+ n = PATH_LEN;
+ ua = -1;
+ for (k = low; k <= hgh; k++)
+ { int y, m;
+ int ha;
+ int c, d;
+ BVEC b;
+ Pebble *pb;
+
+ am = ac;
+ ac = ap;
+ ap = V[d = k+1];
+
+ if (ac > ap)
+ if (ap > am)
+ { c = am-1;
+ m = n;
+ b = t;
+ ha = ua;
+ }
+ else
+ { c = ap-1;
+ m = M[d];
+ b = T[d];
+ ha = HA[d];
+ }
+ else
+ if (ac > am)
+ { c = am-1;
+ m = n;
+ b = t;
+ ha = ua;
+ }
+ else
+ { c = ac-2;
+ m = M[k];
+ b = T[k];
+ ha = HA[k];
+ }
+
+ if ((b & PATH_TOP) != 0)
+ m -= 1;
+ b <<= 1;
+
+ y = (c-k) >> 1;
+ while (1)
+ { c = bseq[y];
+ if (c == 4)
+ { more = 0;
+ if (bclip > k)
+ bclip = k;
+ break;
+ }
+ d = a[y];
+ if (c != d)
+ { if (d == 4)
+ { more = 0;
+ aclip = k;
+ }
+ break;
+ }
+ y -= 1;
+ if ((b & PATH_TOP) == 0)
+ m += 1;
+ b = (b << 1) | 1;
+ }
+ c = (y << 1) + k;
+
+ while (y+k <= NA[k])
+ { if (cells[ha].mark > NA[k])
+ { if (avail >= cmax)
+ { cmax = ((int) (avail*1.2)) + 10000;
+ cells = (Pebble *) Realloc(cells,cmax*sizeof(Pebble),
+ "Reallocating trace cells");
+ if (cells == NULL)
+ EXIT(1);
+ work->celmax = cmax;
+ work->cells = (void *) cells;
+ }
+#ifdef SHOW_TPS
+ printf(" A %d: %d,%d,%d,%d\n",avail,ha,k,dif,NA[k]); fflush(stdout);
+#endif
+ pb = cells+avail;
+ pb->ptr = ha;
+ pb->diag = k;
+ pb->diff = dif;
+ pb->mark = NA[k];
+ ha = avail++;
+ }
+ NA[k] -= TRACE_SPACE;
+ }
+
+ if (c < besta)
+ { besta = c;
+ besty = y;
+ if (m >= PATH_AVE)
+ { lasta = c;
+ if (TABLE[b & TRIM_MASK] >= 0)
+ if (TABLE[(b >> TRIM_LEN) & TRIM_MASK] + SCORE[b & TRIM_MASK] >= 0)
+ { trima = c;
+ trimy = y;
+ trimd = dif;
+ trimha = ha;
+ }
+ }
+ }
+
+ t = T[k];
+ n = M[k];
+ ua = HA[k];
+ V[k] = c;
+ T[k] = b;
+ M[k] = m;
+ HA[k] = ha;
+
+ a += 1;
+ }
+
+ if (more == 0)
+ { if (bseq[besty] != 4 && aseq[besta - besty] != 4)
+ more = 1;
+ if (low <= aclip)
+ { low = aclip+1;
+ if (morem <= M[aclip])
+ { morem = M[aclip];
+ morea = V[aclip];
+ morey = (morea - aclip)/2;
+ mored = dif;
+ moreha = HA[aclip];
+ }
+ }
+ if (hgh >= bclip)
+ { hgh = bclip-1;
+ if (morem <= M[bclip])
+ { morem = M[bclip];
+ morea = V[bclip];
+ morey = (morea - bclip)/2;
+ mored = dif;
+ moreha = HA[bclip];
+ }
+ }
+ aclip = -INT32_MAX;
+ bclip = INT32_MAX;
+ }
+
+ n = besta + WAVE_LAG;
+ while (hgh >= low)
+ if (V[hgh] > n)
+ hgh -= 1;
+ else
+ { while (V[low] > n)
+ low += 1;
+ break;
+ }
+
+#ifdef WAVE_STATS
+ k = (hgh-low)+1;
+ if (k > MAX)
+ MAX = k;
+ TOT += k;
+ NWV += 1;
+#endif
+
+#ifdef DEBUG_WAVE
+ print_wave(V,M,low,hgh,besta);
+#endif
+ }
+
+ { uint16 *atrace = (uint16 *) apath->trace;
+ int atlen;
+ int trimx;
+ int a, b, k, h;
+ int d, e;
+
+ if (morem >= 0)
+ { trimx = morea-morey;
+ trimy = morey;
+ trimd = mored;
+ trimha = moreha;
+ }
+ else
+ trimx = trima-trimy;
+
+ atlen = 0;
+
+ a = -1;
+ for (h = trimha; h >= 0; h = b)
+ { b = cells[h].ptr;
+ cells[h].ptr = a;
+ a = h;
+ }
+ h = a;
+
+ k = cells[h].diag;
+ b = cells[h].mark - k;
+ e = 0;
+#ifdef SHOW_TRAIL
+ printf(" A path = (%5d,%5d)\n",b+k,b); fflush(stdout);
+#endif
+ if ((b+k)%TRACE_SPACE != 0)
+ { h = cells[h].ptr;
+ if (h < 0)
+ { a = trimy;
+ d = trimd;
+ }
+ else
+ { k = cells[h].diag;
+ a = cells[h].mark - k;
+ d = cells[h].diff;
+ }
+#ifdef SHOW_TRAIL
+ printf(" +%4d: (%5d,%5d): %3d / %3d\n",h,a+k,a,d-e,b-a); fflush(stdout);
+#endif
+ atrace[--atlen] = (uint16) (b-a);
+ atrace[--atlen] = (uint16) (d-e);
+ b = a;
+ e = d;
+ }
+ if (h >= 0)
+ { for (h = cells[h].ptr; h >= 0; h = cells[h].ptr)
+ { k = cells[h].diag;
+ a = cells[h].mark - k;
+ atrace[--atlen] = (uint16) (b-a);
+ d = cells[h].diff;
+ atrace[--atlen] = (uint16) (d-e);
+#ifdef SHOW_TRAIL
+ printf(" %4d: (%5d,%5d): %3d / %3d\n",h,a+k,a,d-e,b-a); fflush(stdout);
+#endif
+ b = a;
+ e = d;
+ }
+ if (b+k != trimx)
+ { atrace[--atlen] = (uint16) (b-trimy);
+ atrace[--atlen] = (uint16) (trimd-e);
+#ifdef SHOW_TRAIL
+ printf(" (%5d,%5d): %3d / %3d\n",trimx,trimy,trimd-e,b-trimy); fflush(stdout);
+#endif
+ }
+ else if (b != trimy)
+ { atrace[atlen+1] = (uint16) (atrace[atlen+1] + (b-trimy));
+ atrace[atlen] = (uint16) (atrace[atlen] + (trimd-e));
+#ifdef SHOW_TRAIL
+ printf(" @ (%5d,%5d): %3d / %3d\n",trimx,trimy,trimd-e,b-trimy); fflush(stdout);
+#endif
+ }
+ }
+
+ apath->abpos = trimx;
+ apath->bbpos = trimy;
+ apath->diffs = trimd;
+ apath->tlen = - atlen;
+ apath->trace = atrace + atlen;
+ }
+
+ return (0);
+}
+
+
+/* Find the longest local alignment between aseq and bseq through (xcnt,ycnt)
+ See associated .h file for the precise definition of the interface.
+*/
+
+int Find_Extension(Alignment *align, Work_Data *ework, Align_Spec *espec,
+ int diag, int anti, int lbord, int hbord, int prefix)
+{ _Work_Data *work = ( _Work_Data *) ework;
+ _Align_Spec *spec = (_Align_Spec *) espec;
+
+ Path *apath;
+ int minp, maxp;
+
+ { int alen, blen;
+ int maxtp, wsize;
+
+ alen = align->alen;
+ blen = align->blen;
+
+ wsize = VectorEn*10000;
+ if (wsize >= work->vecmax)
+ if (enlarge_vector(work,wsize))
+ EXIT(1);
+
+ if (alen < blen)
+ maxtp = 2*(blen/spec->trace_space+2);
+ else
+ maxtp = 2*(alen/spec->trace_space+2);
+ wsize = 2*maxtp*sizeof(uint16);
+ if (wsize > work->pntmax)
+ if (enlarge_points(work,wsize))
+ EXIT(1);
+
+ apath = align->path;
+ apath->trace = ((uint16 *) work->points) + maxtp;
+ }
+
+#ifdef DEBUG_PASSES
+ printf("\n");
+#endif
+
+ if (lbord < 0)
+ minp = -INT32_MAX;
+ else
+ minp = diag-lbord;
+ if (hbord < 0)
+ maxp = INT32_MAX;
+ else
+ maxp = diag+hbord;
+
+ if (prefix)
+ { if (reverse_extend(work,spec,align,diag,anti,minp,maxp))
+ EXIT(1);
+ apath->aepos = (anti-diag)/2;
+ apath->bepos = (anti+diag)/2;
+#ifdef DEBUG_PASSES
+ printf("E1 (%d,%d) => (%d,%d) %d\n",
+ (anti+diag)/2,(anti-diag)/2,apath->abpos,apath->bbpos,apath->diffs);
+#endif
+ }
+ else
+ { if (forward_extend(work,spec,align,diag,anti,minp,maxp))
+ EXIT(1);
+ apath->abpos = (anti-diag)/2;
+ apath->bbpos = (anti+diag)/2;
+#ifdef DEBUG_PASSES
+ printf("F1 (%d,%d) => (%d,%d) %d\n",
+ (anti+diag)/2,(anti-diag)/2,apath->aepos,apath->bepos,apath->diffs);
+#endif
+ }
+
+#ifdef DEBUG_POINTS
+ { uint16 *trace = (uint16 *) apath->trace;
+ int a, h;
+
+ printf("\nA-path (%d,%d)->(%d,%d)",apath->abpos,apath->bbpos,apath->aepos,apath->bepos);
+ printf(" %c\n",(COMP(align->flags) ? 'c' : 'n'));
+ a = apath->bbpos;
+ for (h = 1; h < apath->tlen; h += 2)
+ { int dif = trace[h-1];
+ int del = trace[h];
+ a += del;
+ printf(" %d / %d (%d)\n",dif,del,a);
+ }
+ }
+#endif
+
+ return (0);
+}
+
+
+/****************************************************************************************\
+* *
+* OVERLAP MANIPULATION *
+* *
+\****************************************************************************************/
+
+static int64 PtrSize = sizeof(void *);
+static int64 OvlIOSize = sizeof(Overlap) - sizeof(void *);
+
+int Read_Overlap(FILE *input, Overlap *ovl)
+{ if (fread( ((char *) ovl) + PtrSize, OvlIOSize, 1, input) != 1)
+ return (1);
+ return (0);
+}
+
+int Read_Trace(FILE *input, Overlap *ovl, int tbytes)
+{ if (tbytes > 0 && ovl->path.tlen > 0)
+ { if (fread(ovl->path.trace, tbytes*ovl->path.tlen, 1, input) != 1)
+ return (1);
+ }
+ return (0);
+}
+
+void Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
+{ fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output);
+ if (ovl->path.trace != NULL)
+ fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output);
+}
+
+void Compress_TraceTo8(Overlap *ovl)
+{ uint16 *t16 = (uint16 *) ovl->path.trace;
+ uint8 *t8 = (uint8 *) ovl->path.trace;
+ int j;
+
+ for (j = 0; j < ovl->path.tlen; j++)
+ t8[j] = (uint8) (t16[j]);
+}
+
+void Decompress_TraceTo16(Overlap *ovl)
+{ uint16 *t16 = (uint16 *) ovl->path.trace;
+ uint8 *t8 = (uint8 *) ovl->path.trace;
+ int j;
+
+ for (j = ovl->path.tlen-1; j >= 0; j--)
+ t16[j] = t8[j];
+}
+
+void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent)
+{ int i;
+
+ fprintf(output,"%*s%d vs. ",indent,"",ovl->aread);
+ if (COMP(ovl->flags))
+ fprintf(output,"c(%d)\n",ovl->bread);
+ else
+ fprintf(output,"%d\n",ovl->bread);
+ fprintf(output,"%*s [%d,%d] vs [%d,%d] w. %d diffs\n",indent,"",
+ ovl->path.abpos,ovl->path.aepos,ovl->path.bbpos,ovl->path.bepos,ovl->path.diffs);
+
+ if (tbytes == 1)
+ { uint8 *trace = (uint8 *) (ovl->path.trace);
+ if (trace != NULL)
+ { int p = ovl->path.bbpos + trace[1];
+ fprintf(output,"%*sTrace: %3d/%5d",indent,"",trace[0],p);
+ for (i = 3; i < ovl->path.tlen; i += 2)
+ { if (i%10 == 0)
+ fprintf(output,"\n%*s",indent+6,"");
+ p += trace[i];
+ fprintf(output," %3d/%5d",trace[i-1],p);
+ }
+ fprintf(output,"\n");
+ }
+ }
+ else
+ { uint16 *trace = (uint16 *) (ovl->path.trace);
+ if (trace != NULL)
+ { int p = ovl->path.bbpos + trace[1];
+ fprintf(output,"%*sTrace: %3d/%5d",indent,"",trace[0],p);
+ for (i = 3; i < ovl->path.tlen; i += 2)
+ { if (i%10 == 0)
+ fprintf(output,"\n%*s",indent+6,"");
+ p += trace[i];
+ fprintf(output," %3d/%5d",trace[i-1],p);
+ }
+ fprintf(output,"\n");
+ }
+ }
+}
+
+int Check_Trace_Points(Overlap *ovl, int tspace, int verbose, char *fname)
+{ int i, p;
+
+ if (((ovl->path.aepos-1)/tspace - ovl->path.abpos/tspace)*2 != ovl->path.tlen-2)
+ { if (verbose)
+ EPRINTF(EPLACE," %s: Wrong number of trace points\n",fname);
+ return (1);
+ }
+ p = ovl->path.bbpos;
+ if (tspace <= TRACE_XOVR)
+ { uint8 *trace8 = (uint8 *) ovl->path.trace;
+ for (i = 1; i < ovl->path.tlen; i += 2)
+ p += trace8[i];
+ }
+ else
+ { uint16 *trace16 = (uint16 *) ovl->path.trace;
+ for (i = 1; i < ovl->path.tlen; i += 2)
+ p += trace16[i];
+ }
+ if (p != ovl->path.bepos)
+ { if (verbose)
+ EPRINTF(EPLACE," %s: Trace point sum != aligned interval\n",fname);
+ return (1);
+ }
+ return (0);
+}
+
+
+void Flip_Alignment(Alignment *align, int full)
+{ char *aseq = align->aseq;
+ char *bseq = align->bseq;
+ int alen = align->alen;
+ int blen = align->blen;
+ Path *path = align->path;
+ int comp = COMP(align->flags);
+
+ int *trace = (int *) path->trace;
+ int tlen = path->tlen;
+
+ int i, j, p;
+
+ if (comp)
+ { p = path->abpos;
+ path->abpos = blen - path->bepos;
+ path->bepos = alen - p;
+ p = path->aepos;
+ path->aepos = blen - path->bbpos;
+ path->bbpos = alen - p;
+
+ if (full)
+ { alen += 2;
+ blen += 2;
+
+ for (i = 0; i < tlen; i++)
+ if ((p = trace[i]) < 0)
+ trace[i] = alen + p;
+ else
+ trace[i] = p - blen;
+
+ i = tlen-1;
+ j = 0;
+ while (j < i)
+ { p = trace[i];
+ trace[i] = trace[j];
+ trace[j] = p;
+ i -= 1;
+ j += 1;
+ }
+
+ alen -= 2;
+ blen -= 2;
+ }
+ }
+ else
+ { p = path->abpos;
+ path->abpos = path->bbpos;
+ path->bbpos = p;
+ p = path->aepos;
+ path->aepos = path->bepos;
path->bepos = p;
if (full)
@@ -2086,6 +3202,7 @@ void Complement_Seq(char *aseq, int len)
*s = (char) (3 - *s);
}
+
/* Print an alignment to file between a and b given in trace (unpacked).
Prefix gives the length of the initial prefix of a that is unaligned. */
@@ -2697,6 +3814,7 @@ void Alignment_Cartoon(FILE *file, Alignment *align, int indent, int coord)
* *
\****************************************************************************************/
+
#ifdef DEBUG_AWAVE
static void print_awave(int *V, int low, int hgh)
@@ -3011,7 +4129,7 @@ static int ToA[4] = { 'a', 'c', 'g', 't' };
#endif
-static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave)
+static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
{ int **PVF = wave->PVF;
int **PHF = wave->PHF;
int D;
@@ -3023,12 +4141,12 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave)
int posl, posh;
#ifdef DEBUG_ALIGN
- printf("\n%*s BASE %ld,%ld: %d vs %d\n",depth,"",A-wave->Aabs,B-wave->Babs,M,N);
- printf("%*s A = ",depth,"");
+ printf("\n BASE %ld,%ld: %d vs %d\n",A-wave->Aabs,B-wave->Babs,M,N);
+ printf(" A = ");
for (D = 0; D < M; D++)
printf("%c",ToA[(int) A[D]]);
printf("\n");
- printf("%*s B = ",depth,"");
+ printf(" B = ");
for (D = 0; D < N; D++)
printf("%c",ToA[(int) B[D]]);
printf("\n");
@@ -3150,7 +4268,6 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave)
}
{ int k, h, m, e, c;
- char *a;
int ap = (wave->Aabs-A)-1;
int bp = (B-wave->Babs)+1;
@@ -3160,66 +4277,155 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave)
k = del;
e = PHF[D][k];
PHF[D][k] = 3;
- while (e != 3)
- { h = k+e;
- if (e > 1)
- h -= 3;
- else if (e == 0)
- D -= 1;
- else
- D -= 2;
- if (h < k) // => e = -1 or 2
- { a = A + k;
- if (k < 0)
- m = -k;
- else
- m = 0;
- if (PVF[D][h] <= c)
- c = PVF[D][h]-1;
- while (c >= m && a[c] == B[c])
- c -= 1;
- if (e < 1) // => edge is 2, others are 1, and 0
- { if (c <= PVF[D+2][k+1])
- { e = 4;
- h = k+1;
- D = D+2;
- }
- else if (c == PVF[D+1][k])
- { e = 0;
- h = k;
- D = D+1;
- }
- else
- PVF[D][h] = c+1;
- }
- else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
- { if (k == del)
- m = D;
- else
- m = D-2;
- if (c <= PVF[m][k+1])
- { if (k == del)
- e = 4;
- else
- e = 1;
- h = k+1;
- D = m;
- }
- else if (c == PVF[D-1][k])
- { e = 0;
- h = k;
- D = D-1;
- }
- else
- PVF[D][h] = c+1;
- }
- }
- m = PHF[D][h];
- PHF[D][h] = e;
- e = m;
- k = h;
- }
+ if (mode == UPPERMOST)
+
+ while (e != 3)
+ { h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+
+ if (h < k) // => e = -1 or 2, UPPERMOST
+ { char *a;
+
+ a = A + k;
+ if (k < 0)
+ m = -k;
+ else
+ m = 0;
+ if (PVF[D][h] <= c)
+ c = PVF[D][h]-1;
+ while (c >= m && a[c] == B[c])
+ c -= 1;
+ if (e == -1) // => edge is 2, others are 1, and 0
+ { if (c <= PVF[D+2][k+1])
+ { e = 4;
+ h = k+1;
+ D = D+2;
+ }
+ else if (c == PVF[D+1][k])
+ { e = 0;
+ h = k;
+ D = D+1;
+ }
+ else
+ PVF[D][h] = c+1;
+ }
+ else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
+ { if (k == del)
+ m = D;
+ else
+ m = D-2;
+ if (c <= PVF[m][k+1])
+ { if (k == del)
+ e = 4;
+ else
+ e = 1;
+ h = k+1;
+ D = m;
+ }
+ else if (c == PVF[D-1][k])
+ { e = 0;
+ h = k;
+ D = D-1;
+ }
+ else
+ PVF[D][h] = c+1;
+ }
+ }
+
+ m = PHF[D][h];
+ PHF[D][h] = e;
+ e = m;
+ k = h;
+ }
+
+ else if (mode == LOWERMOST)
+
+ while (e != 3)
+ { h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+
+ if (h > k) // => e = 1 or 4, LOWERMOST
+ { char *a;
+
+ a = A + k;
+ if (k < 0)
+ m = -k;
+ else
+ m = 0;
+ if (PVF[D][h] < c)
+ c = PVF[D][h];
+ while (c >= m && a[c] == B[c])
+ c -= 1;
+ if (e == 1) // => edge is 2, others are 1, and 0
+ { if (c < PVF[D+2][k-1])
+ { e = 2;
+ h = k-1;
+ D = D+2;
+ }
+ else if (c == PVF[D+1][k])
+ { e = 0;
+ h = k;
+ D = D+1;
+ }
+ else
+ PVF[D][h] = c--;
+ }
+ else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
+ { if (k == del)
+ m = D;
+ else
+ m = D-2;
+ if (c < PVF[m][k-1])
+ { if (k == del)
+ e = 2;
+ else
+ e = -1;
+ h = k-1;
+ D = m;
+ }
+ else if (c == PVF[D-1][k])
+ { e = 0;
+ h = k;
+ D = D-1;
+ }
+ else
+ PVF[D][h] = c--;
+ }
+ }
+
+ m = PHF[D][h];
+ PHF[D][h] = e;
+ e = m;
+ k = h;
+ }
+
+ else // mode == GREEDIEST
+
+ while (e != 3)
+ { h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+
+ m = PHF[D][h];
+ PHF[D][h] = e;
+ e = m;
+ k = h;
+ }
k = D = 0;
e = PHF[D][k];
@@ -3232,42 +4438,27 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave)
D += 1;
else
D += 2;
+#ifdef DEBUG_SCRIPT
if (h > k)
- *wave->Stop++ = bp+c;
+ printf(" D %d(%d)\n",(c-k)-(ap-1),c+bp);
else if (h < k)
- *wave->Stop++ = ap-(c+k);
- k = h;
- e = PHF[D][h];
- }
-
-#ifdef DEBUG_SCRIPT
- k = D = 0;
- e = PHF[D][k];
- while (e != 3)
- { h = k-e;
- c = PVF[D][k];
- if (e > 1)
- h += 3;
- else if (e == 0)
- D += 1;
+ printf(" I %d(%d)\n",c+(bp-1),(c+k)-ap);
else
- D += 2;
+ printf(" %d S %d\n",(c+k)-(ap+1),c+(bp-1));
+#endif
if (h > k)
- printf("%*s D %d(%d)\n",depth,"",(c-k)-(ap-1),c+bp);
+ *wave->Stop++ = bp+c;
else if (h < k)
- printf("%*s I %d(%d)\n",depth,"",c+(bp-1),(c+k)-ap);
- else
- printf("%*s %d S %d\n",depth,"",(c+k)-(ap+1),c+(bp-1));
+ *wave->Stop++ = ap-(c+k);
k = h;
e = PHF[D][h];
}
-#endif
}
return (D + abs(del));
}
-static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave)
+static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
{ int **PVF = wave->PVF;
int **PHF = wave->PHF;
int D;
@@ -3378,68 +4569,152 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave)
{ int k, h, m, e, c;
int d, f;
- char *a;
d = D + abs(del);
c = N;
k = del;
- for (f = d/2; d > f; d--)
- { e = PHF[D][k];
- h = k+e;
- if (e > 1)
- h -= 3;
- else if (e == 0)
- D -= 1;
- else
- D -= 2;
- if (h < k) // => e = -1 or 2
- { a = A + k;
- if (k < 0)
- m = -k;
- else
- m = 0;
- if (PVF[D][h] <= c)
- c = PVF[D][h]-1;
- while (c >= m && a[c] == B[c])
- c -= 1;
- if (e < 1) // => edge is 2, others are 1, and 0
- { if (c <= PVF[D+2][k+1])
- { e = 4;
- h = k+1;
- D = D+2;
- }
- else if (c == PVF[D+1][k])
- { e = 0;
- h = k;
- D = D+1;
- }
- else
- PVF[D][h] = c+1;
- }
- else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
- { if (k == del)
- m = D;
- else
- m = D-2;
- if (c <= PVF[m][k+1])
- { if (k == del)
- e = 4;
- else
- e = 1;
- h = k+1;
- D = m;
- }
- else if (c == PVF[D-1][k])
- { e = 0;
- h = k;
- D = D-1;
- }
- else
- PVF[D][h] = c+1;
- }
- }
- k = h;
- }
+
+ if (mode == UPPERMOST)
+
+ for (f = d/2; d > f; d--)
+ { e = PHF[D][k];
+ h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+
+ if (h < k) // => e = -1 or 2, UPPERMOST
+ { char *a;
+
+ a = A + k;
+ if (k < 0)
+ m = -k;
+ else
+ m = 0;
+ if (PVF[D][h] <= c)
+ c = PVF[D][h]-1;
+ while (c >= m && a[c] == B[c])
+ c -= 1;
+ if (e == -1) // => edge is 2, others are 1, and 0
+ { if (c <= PVF[D+2][k+1])
+ { e = 4;
+ h = k+1;
+ D = D+2;
+ }
+ else if (c == PVF[D+1][k])
+ { e = 0;
+ h = k;
+ D = D+1;
+ }
+ else
+ PVF[D][h] = c+1;
+ }
+ else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
+ { if (k == del)
+ m = D;
+ else
+ m = D-2;
+ if (c <= PVF[m][k+1])
+ { if (k == del)
+ e = 4;
+ else
+ e = 1;
+ h = k+1;
+ D = m;
+ }
+ else if (c == PVF[D-1][k])
+ { e = 0;
+ h = k;
+ D = D-1;
+ }
+ else
+ PVF[D][h] = c+1;
+ }
+ }
+
+ k = h;
+ }
+
+ else if (mode == LOWERMOST)
+
+ for (f = d/2; d > f; d--)
+ { e = PHF[D][k];
+ h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+
+ if (h > k) // => e = 1 or 4, LOWERMOST
+ { char *a;
+
+ a = A + k;
+ if (k < 0)
+ m = -k;
+ else
+ m = 0;
+ if (PVF[D][h] < c)
+ c = PVF[D][h];
+ while (c >= m && a[c] == B[c])
+ c -= 1;
+ if (e == 1) // => edge is 2, others are 1, and 0
+ { if (c < PVF[D+2][k-1])
+ { e = 2;
+ h = k-1;
+ D = D+2;
+ }
+ else if (c == PVF[D+1][k])
+ { e = 0;
+ h = k;
+ D = D+1;
+ }
+ else
+ PVF[D][h] = c--;
+ }
+ else // => edge is 0, others are 1, and 2 (if k != del), 0 (otherwise)
+ { if (k == del)
+ m = D;
+ else
+ m = D-2;
+ if (c < PVF[m][k-1])
+ { if (k == del)
+ e = 2;
+ else
+ e = -1;
+ h = k-1;
+ D = m;
+ }
+ else if (c == PVF[D-1][k])
+ { e = 0;
+ h = k;
+ D = D-1;
+ }
+ else
+ PVF[D][h] = c--;
+ }
+ }
+
+ k = h;
+ }
+
+ else // mode == GREEDIEST
+
+ for (f = d/2; d > f; d--)
+ { e = PHF[D][k];
+ h = k+e;
+ if (e > 1)
+ h -= 3;
+ else if (e == 0)
+ D -= 1;
+ else
+ D -= 2;
+ k = h;
+ }
wave->midb = (B-wave->Babs) + PVF[D][k];
wave->mida = (A-wave->Aabs) + k + PVF[D][k];
@@ -3511,7 +4786,7 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
wave.Aabs = aseq;
wave.Babs = bseq;
- D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave);
+ D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST);
if (D < 0)
EXIT(1);
path->diffs = D;
@@ -3521,7 +4796,7 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
return (0);
}
-int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing)
+int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int mode)
{ _Work_Data *work = (_Work_Data *) ework;
Trace_Waves wave;
@@ -3565,8 +4840,6 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing)
}
if (tlen <= 1)
nmax = N;
- if (points[d-1] > dmax)
- dmax = points[d-1];
s = (dmax+3)*2*((trace_spacing+nmax+3)*sizeof(int) + sizeof(int *));
@@ -3600,7 +4873,7 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing)
for (i = 1; i < tlen; i += 2)
{ ae = ae + trace_spacing;
be = bb + points[i];
- d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave);
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
if (d < 0)
EXIT(1);
diffs += d;
@@ -3609,7 +4882,7 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing)
}
ae = path->aepos;
be = path->bepos;
- d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave);
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
if (d < 0)
EXIT(1);
diffs += d;
@@ -3622,7 +4895,7 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing)
return (0);
}
-int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing)
+int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int mode)
{ _Work_Data *work = (_Work_Data *) ework;
Trace_Waves wave;
@@ -3666,8 +4939,6 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing)
}
if (tlen <= 1)
nmax = N;
- if (points[d-1] > dmax)
- dmax = points[d-1];
s = (dmax+3)*4*((trace_spacing+nmax+3)*sizeof(int) + sizeof(int *));
@@ -3703,11 +4974,11 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing)
for (i = 1; i < tlen; i += 2)
{ ae = ae + trace_spacing;
be = bb + points[i];
- if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave))
+ if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
EXIT(1);
af = wave.mida;
bf = wave.midb;
- d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave);
+ d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
if (d < 0)
EXIT(1);
diffs += d;
@@ -3720,18 +4991,18 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing)
ae = path->aepos;
be = path->bepos;
- if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave))
+ if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
EXIT(1);
af = wave.mida;
bf = wave.midb;
- d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave);
+ d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
if (d < 0)
EXIT(1);
diffs += d;
as = af;
bs = bf;
- d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave);
+ d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave,mode);
if (d < 0)
EXIT(1);
diffs += d;
@@ -3743,3 +5014,99 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing)
return (0);
}
+
+int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
+{ _Work_Data *work = (_Work_Data *) ework;
+ Trace_Waves wave;
+
+ Path *path;
+ char *aseq, *bseq;
+ uint16 *points;
+ int tlen;
+ int ab, bb;
+ int ae, be;
+ int diffs;
+
+ path = align->path;
+ aseq = align->aseq;
+ bseq = align->bseq;
+ tlen = path->tlen;
+ points = (uint16 *) path->trace;
+
+ { int64 s;
+ int d;
+ int M, N;
+ int mmax, nmax, dmax;
+ int **PVF, **PHF;
+
+ M = path->aepos-path->abpos;
+ N = path->bepos-path->bbpos;
+ if (M < N)
+ s = N*sizeof(int);
+ else
+ s = M*sizeof(int);
+ if (s > work->tramax)
+ if (enlarge_trace(work,s))
+ EXIT(1);
+
+ nmax = mmax = 0;
+ for (d = 0; d < tlen; d += 2)
+ { if (points[d] > mmax)
+ mmax = points[d];
+ if (points[d+1] > nmax)
+ nmax = points[d+1];
+ }
+ if (tlen <= 1)
+ { mmax = M;
+ nmax = N;
+ }
+ if (mmax > nmax)
+ dmax = nmax;
+ else
+ dmax = mmax;
+
+ s = (dmax+3)*2*((mmax+nmax+3)*sizeof(int) + sizeof(int *));
+
+ if (s > work->vecmax)
+ if (enlarge_vector(work,s))
+ EXIT(1);
+
+ wave.PVF = PVF = ((int **) (work->vector)) + 2;
+ wave.PHF = PHF = PVF + (dmax+3);
+
+ s = mmax+nmax+3;
+ PVF[-2] = ((int *) (PHF + (dmax+1))) + (nmax+1);
+ for (d = -1; d <= dmax; d++)
+ PVF[d] = PVF[d-1] + s;
+ PHF[-2] = PVF[dmax] + s;
+ for (d = -1; d <= dmax; d++)
+ PHF[d] = PHF[d-1] + s;
+ }
+
+ wave.Stop = (int *) (work->trace);
+ wave.Aabs = aseq;
+ wave.Babs = bseq;
+
+ { int i, d;
+
+ diffs = 0;
+ ab = path->abpos;
+ bb = path->bbpos;
+ for (i = 0; i < tlen; i += 2)
+ { ae = ab + points[i];
+ be = bb + points[i+1];
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+ if (d < 0)
+ EXIT(1);
+ diffs += d;
+ ab = ae;
+ bb = be;
+ }
+ }
+
+ path->trace = work->trace;
+ path->tlen = wave.Stop - ((int *) path->trace);
+ path->diffs = diffs;
+
+ return (0);
+}
diff --git a/align.h b/align.h
index 110cfcc..e937b68 100644
--- a/align.h
+++ b/align.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Local alignment module. Routines for finding local alignments given a seed position,
@@ -49,15 +12,15 @@
*
********************************************************************************************/
+#ifndef _A_MODULE
+
+#define _A_MODULE
+
#include "DB.h"
#define TRACE_XOVR 125 // If the trace spacing is not more than this value, then can
// and do compress traces pts to 8-bit unsigned ints
-#ifndef _A_MODULE
-
-#define _A_MODULE
-
/*** INTERACTIVE vs BATCH version
The defined constant INTERACTIVE (set in DB.h) determines whether an interactive or
@@ -231,11 +194,21 @@ void Complement_Seq(char *a, int n);
one wants to retain the bread-path and the two trace point sequences, then they must be
copied to user-allocated storage before calling the routine again. NULL is returned in
the event of an error.
+
+ Find_Extension is a variant of Local_Alignment that simply finds a local alignment that
+ either ends (if prefix is non-zero) or begins (if prefix is zero) at the point
+ (anti+diag)/2,(anti-diag)/2). All other parameters are as before. It returns a non-zero
+ value only when INTERACTIVE is on and it cannot allocate the memory it needs.
+ Only the path and trace with respect to the aread is returned. This routine is experimental
+ and may not persist in later versions of the code.
*/
Path *Local_Alignment(Alignment *align, Work_Data *work, Align_Spec *spec,
int low, int hgh, int anti, int lbord, int hbord);
+ int Find_Extension(Alignment *align, Work_Data *work, Align_Spec *spec, // experimental !!
+ int diag, int anti, int lbord, int hbord, int prefix);
+
/* Given a legitimate Alignment object, Compute_Trace_X computes an exact trace for the alignment.
If 'path.trace' is non-NULL, then it is assumed to be a sequence of pass-through points
and diff levels computed by Local_Alignment. In either case 'path.trace' is set
@@ -255,9 +228,22 @@ void Complement_Seq(char *a, int n);
return 1 if an error occurred and 0 otherwise.
*/
+#define LOWERMOST -1 // Possible modes for "mode" parameter below)
+#define GREEDIEST 0
+#define UPPERMOST 1
+
int Compute_Trace_ALL(Alignment *align, Work_Data *work);
- int Compute_Trace_PTS(Alignment *align, Work_Data *work, int trace_spacing);
- int Compute_Trace_MID(Alignment *align, Work_Data *work, int trace_spacing);
+ int Compute_Trace_PTS(Alignment *align, Work_Data *work, int trace_spacing, int mode);
+ int Compute_Trace_MID(Alignment *align, Work_Data *work, int trace_spacing, int mode);
+
+ /* Compute_Trace_IRR (IRR for IRRegular) computes a trace for the given alignment where
+ it assumes the spacing between trace points between both the A and B read varies, and
+ futher assumes that the A-spacing is given in the short integers normally occupied by
+ the differences in the alignment between the trace points. This routine is experimental
+ and may not persist in later versions of the code.
+ */
+
+ int Compute_Trace_IRR(Alignment *align, Work_Data *work, int mode); // experimental !!
/* Alignment_Cartoon prints an ASCII representation of the overlap relationhip between the
two reads of 'align' to the given 'file' indented by 'indent' space. Coord controls
diff --git a/daligner.c b/daligner.c
index f8ca04e..2914e05 100644
--- a/daligner.c
+++ b/daligner.c
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*********************************************************************************************\
*
* Find all local alignment between long, noisy DNA reads:
@@ -375,24 +338,27 @@ HITS_TRACK *Merge_Tracks(HITS_DB *block, int mtop, int64 nsize)
return (ntrack);
}
-static HITS_DB *read_DB(char *name, char **mask, int *mstat, int mtop, int kmer, int *isdam)
-{ static HITS_DB block;
- int i, status, stop;
+static int read_DB(HITS_DB *block, char *name, char **mask, int *mstat, int mtop, int kmer)
+{ int i, isdam, status, kind, stop;
- status = Open_DB(name,&block);
- if (status < 0)
+ isdam = Open_DB(name,block);
+ if (isdam < 0)
exit (1);
- *isdam = status;
for (i = 0; i < mtop; i++)
- { status = Check_Track(&block,mask[i]);
- if (status > mstat[i])
+ { status = Check_Track(block,mask[i],&kind);
+ if (status >= 0)
+ if (kind == MASK_TRACK)
+ mstat[i] = 0;
+ else
+ mstat[i] = -3;
+ else
mstat[i] = status;
- if (status == 0)
- Load_Track(&block,mask[i]);
+ if (status == 0 && kind == MASK_TRACK)
+ Load_Track(block,mask[i]);
}
- Trim_DB(&block);
+ Trim_DB(block);
stop = 0;
for (i = 0; i < mtop; i++)
@@ -400,14 +366,14 @@ static HITS_DB *read_DB(char *name, char **mask, int *mstat, int mtop, int kmer,
int64 *anno;
int j;
- status = Check_Track(&block,mask[i]);
- if (status < 0)
+ status = Check_Track(block,mask[i],&kind);
+ if (status < 0 || kind != MASK_TRACK)
continue;
stop += 1;
- track = Load_Track(&block,mask[i]);
+ track = Load_Track(block,mask[i]);
anno = (int64 *) (track->anno);
- for (j = 0; j <= block.nreads; j++)
+ for (j = 0; j <= block->nreads; j++)
anno[j] /= sizeof(int);
}
@@ -415,27 +381,27 @@ static HITS_DB *read_DB(char *name, char **mask, int *mstat, int mtop, int kmer,
{ int64 nsize;
HITS_TRACK *track;
- nsize = Merge_Size(&block,stop);
- track = Merge_Tracks(&block,stop,nsize);
+ nsize = Merge_Size(block,stop);
+ track = Merge_Tracks(block,stop,nsize);
- while (block.tracks != NULL)
- Close_Track(&block,block.tracks->name);
+ while (block->tracks != NULL)
+ Close_Track(block,block->tracks->name);
- block.tracks = track;
+ block->tracks = track;
}
- if (block.cutoff < kmer)
- { for (i = 0; i < block.nreads; i++)
- if (block.reads[i].rlen < kmer)
+ if (block->cutoff < kmer)
+ { for (i = 0; i < block->nreads; i++)
+ if (block->reads[i].rlen < kmer)
{ fprintf(stderr,"%s: Block %s contains reads < %dbp long ! Run DBsplit.\n",
Prog_Name,name,kmer);
exit (1);
}
}
- Read_All_Sequences(&block,0);
+ Read_All_Sequences(block,0);
- return (&block);
+ return (isdam);
}
static void complement(char *s, int len)
@@ -454,72 +420,107 @@ static void complement(char *s, int len)
*s = (char) (3-*s);
}
-static HITS_DB *complement_DB(HITS_DB *block)
-{ static HITS_DB cblock;
- int i, nreads;
+static HITS_DB *complement_DB(HITS_DB *block, int inplace)
+{ static HITS_DB _cblock, *cblock = &_cblock;
+ int nreads;
HITS_READ *reads;
char *seq;
- float x;
nreads = block->nreads;
reads = block->reads;
- seq = (char *) Malloc(block->reads[nreads].boff+1,"Allocating dazzler sequence block");
- if (seq == NULL)
- exit (1);
- *seq++ = 4;
- memcpy(seq,block->bases,block->reads[nreads].boff);
+ if (inplace)
+ { seq = (char *) block->bases;
+ cblock = block;
+ }
+ else
+ { seq = (char *) Malloc(block->reads[nreads].boff+1,"Allocating dazzler sequence block");
+ if (seq == NULL)
+ exit (1);
+ *seq++ = 4;
+ memcpy(seq,block->bases,block->reads[nreads].boff);
+ *cblock = *block;
+ cblock->bases = (void *) seq;
+ cblock->tracks = NULL;
+ }
- for (i = 0; i < nreads; i++)
- complement(seq+reads[i].boff,reads[i].rlen);
+ { int i;
+ float x;
- cblock = *block;
- cblock.bases = (void *) seq;
+ x = cblock->freq[0];
+ cblock->freq[0] = cblock->freq[3];
+ cblock->freq[3] = x;
- x = cblock.freq[0];
- cblock.freq[0] = cblock.freq[3];
- cblock.freq[3] = x;
+ x = cblock->freq[1];
+ cblock->freq[1] = cblock->freq[2];
+ cblock->freq[2] = x;
- x = cblock.freq[1];
- cblock.freq[1] = cblock.freq[2];
- cblock.freq[2] = x;
+ for (i = 0; i < nreads; i++)
+ complement(seq+reads[i].boff,reads[i].rlen);
+ }
- { HITS_TRACK *t, *dust;
+ { HITS_TRACK *src, *trg;
int *data, *tata;
- int p, rlen;
- int64 j, *tano;
-
- for (t = block->tracks; t != NULL; t = t->next)
- { tano = (int64 *) t->anno;
- tata = (int *) t->data;
-
- data = (int *) Malloc(sizeof(int)*tano[nreads],"Allocating dazzler .dust index");
- dust = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating dazzler .dust track");
- if (data == NULL || dust == NULL)
- exit (1);
-
- dust->next = NULL;
- dust->name = t->name;
- dust->size = 4;
- dust->anno = (void *) tano;
- dust->data = (void *) data;
- cblock.tracks = dust;
-
- p = 0;
+ int i, x, rlen;
+ int64 *tano, *anno;
+ int64 j, k;
+
+ for (src = block->tracks; src != NULL; src = src->next)
+ { tano = (int64 *) src->anno;
+ tata = (int *) src->data;
+
+ if (inplace)
+ { data = tata;
+ anno = tano;
+ trg = src;
+ }
+ else
+ { data = (int *) Malloc(sizeof(int)*tano[nreads],
+ "Allocating dazzler interval track data");
+ anno = (int64 *) Malloc(sizeof(int64)*(nreads+1),
+ "Allocating dazzler interval track index");
+ trg = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),
+ "Allocating dazzler interval track header");
+ if (data == NULL || trg == NULL || anno == NULL)
+ exit (1);
+
+ trg->name = Strdup(src->name,"Copying track name");
+ if (trg->name == NULL)
+ exit (1);
+
+ trg->size = 4;
+ trg->anno = (void *) anno;
+ trg->data = (void *) data;
+ trg->next = cblock->tracks;
+ cblock->tracks = trg;
+ }
+
for (i = 0; i < nreads; i++)
{ rlen = reads[i].rlen;
- for (j = tano[i+1]-1; j >= tano[i]; j--)
- data[p++] = rlen - tata[j];
+ anno[i] = tano[i];
+ j = tano[i+1]-1;
+ k = tano[i];
+ while (k < j)
+ { x = tata[j];
+ data[j--] = rlen - tata[k];
+ data[k++] = rlen - x;
+ }
+ if (k == j)
+ data[k] = rlen - tata[k];
}
+ anno[nreads] = tano[nreads];
}
}
- return (&cblock);
+ return (cblock);
}
int main(int argc, char *argv[])
-{ HITS_DB ablock, bblock, cblock;
+{ HITS_DB _ablock, _bblock;
+ HITS_DB *ablock = &_ablock, *bblock = &_bblock;
char *afile, *bfile;
char *aroot, *broot;
+ void *aindex, *bindex;
+ int alen, blen;
Align_Spec *asettings;
int isdam;
int MMAX, MTOP, *MSTAT;
@@ -612,7 +613,6 @@ int main(int argc, char *argv[])
if (MASK == NULL || MSTAT == NULL)
exit (1);
}
- MSTAT[MTOP] = -2;
MASK[MTOP++] = argv[i]+2;
break;
}
@@ -641,27 +641,25 @@ int main(int argc, char *argv[])
/* Read in the reads in A */
- afile = argv[1];
- ablock = *read_DB(afile,MASK,MSTAT,MTOP,KMER_LEN,&isdam);
- cblock = *complement_DB(&ablock);
+ afile = argv[1];
+ isdam = read_DB(ablock,afile,MASK,MSTAT,MTOP,KMER_LEN);
if (isdam)
aroot = Root(afile,".dam");
else
aroot = Root(afile,".db");
- if (ablock.cutoff >= HGAP_MIN)
- HGAP_MIN = ablock.cutoff;
-
- asettings = New_Align_Spec( AVE_ERROR, SPACING, ablock.freq);
+ asettings = New_Align_Spec( AVE_ERROR, SPACING, ablock->freq);
/* Compare against reads in B in both orientations */
{ int i, j;
+ aindex = NULL;
+ broot = NULL;
for (i = 2; i < argc; i++)
{ bfile = argv[i];
if (strcmp(afile,bfile) != 0)
- { bblock = *read_DB(bfile,MASK,MSTAT,MTOP,KMER_LEN,&isdam);
+ { isdam = read_DB(bblock,bfile,MASK,MSTAT,MTOP,KMER_LEN);
if (isdam)
broot = Root(bfile,".dam");
else
@@ -674,27 +672,43 @@ int main(int argc, char *argv[])
printf("%s: Warning: -m%s option given but no track found.\n",Prog_Name,MASK[i]);
else if (MSTAT[j] == -1)
printf("%s: Warning: %s track not sync'd with relevant db.\n",Prog_Name,MASK[i]);
+ else if (MSTAT[j] == -3)
+ printf("%s: Warning: %s track is not a mask track.\n",Prog_Name,MASK[i]);
}
if (VERBOSE)
printf("\nBuilding index for %s\n",aroot);
- Build_Table(&ablock);
+ aindex = Sort_Kmers(ablock,&alen);
+ }
+
+ if (strcmp(afile,bfile) != 0)
+ { if (VERBOSE)
+ printf("\nBuilding index for %s\n",broot);
+ bindex = Sort_Kmers(bblock,&blen);
+ Match_Filter(aroot,ablock,broot,bblock,aindex,alen,bindex,blen,0,asettings);
+ bblock = complement_DB(bblock,1);
if (VERBOSE)
- printf("\nBuilding index for c(%s)\n",aroot);
- Build_Table(&cblock);
- }
-
- if (strcmp(afile,bfile) == 0)
- { Match_Filter(aroot,&ablock,aroot,&ablock,1,0,asettings);
- Match_Filter(aroot,&cblock,aroot,&ablock,1,1,asettings);
+ printf("\nBuilding index for c(%s)\n",broot);
+ bindex = Sort_Kmers(bblock,&blen);
+ Match_Filter(aroot,ablock,broot,bblock,aindex,alen,bindex,blen,1,asettings);
+
+ free(broot);
}
else
- { Match_Filter(aroot,&ablock,broot,&bblock,0,0,asettings);
- Match_Filter(aroot,&cblock,broot,&bblock,0,1,asettings);
- Close_DB(&bblock);
- free(broot);
+ { Match_Filter(aroot,ablock,aroot,ablock,aindex,alen,aindex,alen,0,asettings);
+
+ bblock = complement_DB(ablock,0);
+ if (VERBOSE)
+ printf("\nBuilding index for c(%s)\n",aroot);
+ bindex = Sort_Kmers(bblock,&blen);
+ Match_Filter(aroot,ablock,aroot,bblock,aindex,alen,bindex,blen,1,asettings);
+
+ bblock->reads = NULL; // ablock & bblock share "reads" vector, don't let Close_DB
+ // free it !
}
+
+ Close_DB(bblock);
}
}
diff --git a/filter.c b/filter.c
index db80348..f54780d 100644
--- a/filter.c
+++ b/filter.c
@@ -1,45 +1,11 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Fast local alignment filter for long, noisy reads based on "dumbing down" of my RECOMB 2005
* filter with Jens Stoye, and a "smarting up" of the k-mer matching by turning it into
- * a threaded sort and merge paradigm using a super cache coherent radix sort.
+ * a threaded sort and merge paradigm using a super cache coherent radix sort. Local
+ * alignment is accomplised with dynamically-banded O(nd) algorithm that terminates when
+ * it fails to find a e-matching patch for a significant distance, and polishes the match
+ * to the last e-prefix-positive 32-mer.
*
* Author : Gene Myers
* First : June 2013
@@ -70,6 +36,9 @@
#define PANEL_SIZE 50000 // Size to break up very long A-reads
#define PANEL_OVERLAP 10000 // Overlap of A-panels
+#define MATCH_CHUNK 100 // Max expected number of hits between two reads
+#define TRACE_CHUNK 20000 // Max expected trace points in hits between two reads
+
#undef TEST_LSORT
#undef TEST_KSORT
#undef TEST_PAIRS
@@ -77,6 +46,7 @@
#define HOW_MANY 3000 // Print first HOW_MANY items for each of the TEST options above
#undef TEST_GATHER
+#undef TEST_CONTAIN
#undef SHOW_OVERLAP // Show the cartoon
#undef SHOW_ALIGNMENT // Show the alignment
#define ALIGN_WIDTH 80 // Parameters for alignment
@@ -91,6 +61,10 @@
#define NOTHREAD
#endif
+#ifdef TEST_CONTAIN
+#define NOTHREAD
+#endif
+
typedef struct
{ uint64 p1; // The lower half
uint64 p2;
@@ -128,11 +102,6 @@ typedef struct
#endif
-static KmerPos *Asort = NULL;
-static KmerPos *Csort = NULL;
-
-static int Alen, Clen;
-
/*******************************************************************************************
*
* PARAMETER SETUP
@@ -320,7 +289,10 @@ static Double *lex_sort(int bytes[16], Double *src, Double *trg, Lex_Arg *parmx)
{ x = 0;
for (i = 0; i < NTHREADS; i++)
{ parmx[i].beg = x;
- parmx[i].end = x = LEX_zsize*(i+1);
+ x = LEX_zsize*(i+1);
+ if (x > len)
+ x = len;
+ parmx[i].end = x;
for (j = 0; j < BPOWR; j++)
parmx[i].tptr[j] = 0;
}
@@ -674,7 +646,7 @@ static void *compress_thread(void *arg)
return (NULL);
}
-static KmerPos *sort_kmers(HITS_DB *block, int *len)
+void *Sort_Kmers(HITS_DB *block, int *len)
{ THREAD threads[NTHREADS];
Tuple_Arg parmt[NTHREADS];
Comp_Arg parmf[NTHREADS];
@@ -694,7 +666,7 @@ static KmerPos *sort_kmers(HITS_DB *block, int *len)
if (NormShift == NULL && BIASED)
{ double scale;
- NormShift = (int *) Malloc(sizeof(int)*(Kmer+1),"Allocating sort_kmers bias shift");
+ NormShift = (int *) Malloc(sizeof(int)*(Kmer+1),"Allocating Sort_Kmers bias shift");
if (NormShift == NULL)
exit (1);
for (i = 0; i <= Kmer; i++)
@@ -714,12 +686,12 @@ static KmerPos *sort_kmers(HITS_DB *block, int *len)
goto no_mers;
if (( (Kshift-1)/BSHIFT + (TooFrequent < INT32_MAX) ) & 0x1)
- { trg = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating sort_kmers vectors");
- src = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating sort_kmers vectors");
+ { trg = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating Sort_Kmers vectors");
+ src = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating Sort_Kmers vectors");
}
else
- { src = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating sort_kmers vectors");
- trg = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating sort_kmers vectors");
+ { src = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating Sort_Kmers vectors");
+ trg = (KmerPos *) Malloc(sizeof(KmerPos)*(kmers+1),"Allocating Sort_Kmers vectors");
}
if (src == NULL || trg == NULL)
exit (1);
@@ -843,9 +815,9 @@ static KmerPos *sort_kmers(HITS_DB *block, int *len)
if (MEM_LIMIT > 0 && kmers > (int64) (MEM_LIMIT/(4*sizeof(KmerPos))))
{ fprintf(stderr,"Warning: Block size too big, index occupies more than 1/4 of");
if (MEM_LIMIT == MEM_PHYSICAL)
- fprintf(stderr," physical memory\n");
+ fprintf(stderr," physical memory (%.1fGb)\n",(1.*MEM_LIMIT)/0x40000000ll);
else
- fprintf(stderr," desired memory allocation\n");
+ fprintf(stderr," desired memory allocation (%.1fGb)\n",(1.*MEM_LIMIT)/0x40000000ll);
fflush(stderr);
}
@@ -857,13 +829,6 @@ no_mers:
return (NULL);
}
-void Build_Table(HITS_DB *block)
-{ if (Asort == NULL)
- Asort = sort_kmers(block,&Alen);
- else
- Csort = sort_kmers(block,&Clen);
-}
-
/*******************************************************************************************
*
@@ -888,9 +853,12 @@ static int find_tuple(uint64 x, KmerPos *a, int n)
return (l);
}
+ // Determine what *will* be the size of the merged list and histogram of sizes for given cutoffs
+
static KmerPos *MG_alist;
static KmerPos *MG_blist;
static SeedPair *MG_hits;
+static int MG_comp;
static int MG_self;
typedef struct
@@ -936,10 +904,15 @@ static void *count_thread(void *arg)
if (IDENTITY)
do
{ ar = asort[ia].read;
- while (db == cb && bsort[ib].read < ar)
- db = bsort[++ib].code;
- while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < asort[ia].rpos)
- db = bsort[++ib].code;
+ if (MG_comp)
+ while (db == cb && bsort[ib].read <= ar)
+ db = bsort[++ib].code;
+ else
+ { while (db == cb && bsort[ib].read < ar)
+ db = bsort[++ib].code;
+ while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < asort[ia].rpos)
+ db = bsort[++ib].code;
+ }
ct += (ib-jb);
}
while ((da = asort[++ia].code) == ca);
@@ -997,6 +970,9 @@ static void *count_thread(void *arg)
return (NULL);
}
+ // Produce the merged list now that the list has been allocated and
+ // the appropriate cutoff determined.
+
static void *merge_thread(void *arg)
{ Merge_Arg *data = (Merge_Arg *) arg;
int64 *kptr = data->kptr;
@@ -1036,10 +1012,15 @@ static void *merge_thread(void *arg)
do
{ ar = asort[ia].read;
ap = asort[ia].rpos;
- while (db == cb && bsort[ib].read < ar)
- db = bsort[++ib].code;
- while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < ap)
- db = bsort[++ib].code;
+ if (MG_comp)
+ while (db == cb && bsort[ib].read <= ar)
+ db = bsort[++ib].code;
+ else
+ { while (db == cb && bsort[ib].read < ar)
+ db = bsort[++ib].code;
+ while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < ap)
+ db = bsort[++ib].code;
+ }
ct += (ib-jb);
}
while ((da = asort[++ia].code) == ca);
@@ -1061,10 +1042,15 @@ static void *merge_thread(void *arg)
for (a = ja; a < ia; a++)
{ ap = asort[a].rpos;
ar = asort[a].read;
- while (db == cb && bsort[ib].read < ar)
- db = bsort[++ib].code;
- while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < ap)
- db = bsort[++ib].code;
+ if (MG_comp)
+ while (db == cb && bsort[ib].read <= ar)
+ db = bsort[++ib].code;
+ else
+ { while (db == cb && bsort[ib].read < ar)
+ db = bsort[++ib].code;
+ while (db == cb && bsort[ib].read == ar && bsort[ib].rpos < ap)
+ db = bsort[++ib].code;
+ }
if ((ct = ib-jb) > 0)
{ kptr[ap & BMASK] += ct;
for (b = jb; b < ib; b++)
@@ -1138,28 +1124,328 @@ static void *merge_thread(void *arg)
return (NULL);
}
+ // Report threads: given a segment of merged list, find all seeds and from them all alignments.
+
+static HITS_DB *MR_ablock;
+static HITS_DB *MR_bblock;
+static SeedPair *MR_hits;
+static int MR_two;
+static Align_Spec *MR_spec;
+static int MR_tspace;
+
+typedef struct
+ { uint64 max;
+ uint64 top;
+ uint16 *trace;
+ } Trace_Buffer;
+
+static int Entwine(Path *jpath, Path *kpath, Trace_Buffer *tbuf, int *where)
+{ int ac, b2, y2, ae;
+ int i, j, k;
+ int num, den, min;
+#ifdef SEE_ENTWINE
+ int strt = 1;
+ int iflare, oflare;
+#endif
+
+ uint16 *ktrace = tbuf->trace + (uint64) (kpath->trace);
+ uint16 *jtrace = tbuf->trace + (uint64) (jpath->trace);
+
+ min = 10000;
+ num = 0;
+ den = 0;
+
+#ifdef SEE_ENTWINE
+ printf("\n");
+#endif
+
+ y2 = jpath->bbpos;
+ j = jpath->abpos/MR_tspace;
+
+ b2 = kpath->bbpos;
+ k = kpath->abpos/MR_tspace;
+
+ if (j < k)
+ { ac = k*MR_tspace;
+
+ j = 1 + 2*(k-j);
+ k = 1;
+
+ for (i = 1; i < j; i += 2)
+ y2 += jtrace[i];
+ }
+ else
+ { ac = j*MR_tspace;
+
+ k = 1 + 2*(j-k);
+ j = 1;
+
+ for (i = 1; i < k; i += 2)
+ b2 += ktrace[i];
+ }
+
+ ae = jpath->aepos;
+ if (ae > kpath->aepos)
+ ae = kpath->aepos;
+
+ while (1)
+ { ac += MR_tspace;
+ if (ac >= ae)
+ break;
+ y2 += jtrace[j];
+ b2 += ktrace[k];
+ j += 2;
+ k += 2;
+
+#ifdef SEE_ENTWINE
+ printf(" @ %5d : %5d %5d = %4d\n",ac,y2,b2,abs(b2-y2));
+#endif
+
+ i = abs(y2-b2);
+ if (i <= min)
+ { min = i;
+ if (i == 0)
+ *where = ac;
+ }
+ num += i;
+ den += 1;
+#ifdef SEE_ENTWINE
+ if (strt)
+ { strt = 0;
+ iflare = i;
+ }
+ oflare = i;
+#endif
+ }
+
+#ifdef SEE_ENTWINE
+ if (den == 0)
+ printf("Nothing\n");
+ else
+ printf("MINIM = %d AVERAGE = %d IFLARE = %d OFLARE = %d\n",min,num/den,iflare,oflare);
+#endif
+
+ if (den == 0)
+ return (-1);
+ else
+ return (min);
+}
+
+
+// Produce the concatentation of path1 and path2 where they are known to meet at
+// the trace point with coordinate ap. Place this result in a big growing buffer,
+// that gets reset when fusion is called with path1 = NULL
+
+static void Fusion(Path *path1, int ap, Path *path2, Trace_Buffer *tbuf)
+{ int k, k1, k2;
+ int len, diff;
+ uint16 *trace;
+
+ k1 = 2 * ((ap/MR_tspace) - (path1->abpos/MR_tspace));
+ k2 = 2 * ((ap/MR_tspace) - (path2->abpos/MR_tspace));
+
+ len = k1+(path2->tlen-k2);
+
+ if (tbuf->top + len >= tbuf->max)
+ { tbuf->max = 1.2*(tbuf->top+len) + 1000;
+ tbuf->trace = (uint16 *) Realloc(tbuf->trace,sizeof(uint16)*tbuf->max,"Allocating paths");
+ if (tbuf->trace == NULL)
+ exit (1);
+ }
-void Diagonal_Span(Path *path, int tspace, int *mind, int *maxd)
+ trace = tbuf->trace + tbuf->top;
+ tbuf->top += len;
+
+ diff = 0;
+ len = 0;
+ if (k1 > 0)
+ { uint16 *t = tbuf->trace + (uint64) (path1->trace);
+ for (k = 0; k < k1; k += 2)
+ { trace[len++] = t[k];
+ trace[len++] = t[k+1];
+ diff += t[k];
+ }
+ }
+ if (k2 < path2->tlen)
+ { uint16 *t = tbuf->trace + (uint64) (path2->trace);
+ for (k = k2; k < path2->tlen; k += 2)
+ { trace[len++] = t[k];
+ trace[len++] = t[k+1];
+ diff += t[k];
+ }
+ }
+
+ path1->aepos = path2->aepos;
+ path1->bepos = path2->bepos;
+ path1->diffs = diff;
+ path1->trace = (void *) (trace - tbuf->trace);
+ path1->tlen = len;
+}
+
+
+static int Handle_Redundancies(Path *amatch, int novls, Path *bmatch, Trace_Buffer *tbuf)
+{ Path *jpath, *kpath;
+ int j, k, no;
+ int dist, awhen, bwhen;
+ int hasB;
+
+#ifdef TEST_CONTAIN
+ for (j = 0; j < novls; j++)
+ printf(" %3d: [%5d,%5d] x [%5d,%5d]\n",j,amatch[j].abpos,amatch[j].aepos,
+ amatch[j].bbpos,amatch[j].bepos);
+#endif
+
+ hasB = (bmatch != NULL);
+
+ no = 0;
+ for (j = 1; j < novls; j++)
+ { jpath = amatch+j;
+ for (k = no; k >= 0; k--)
+ { kpath = amatch+k;
+
+ if (jpath->abpos < kpath->abpos)
+
+ { if (kpath->abpos <= jpath->aepos && kpath->bbpos <= jpath->bepos)
+ { dist = Entwine(jpath,kpath,tbuf,&awhen);
+ if (dist == 0)
+ { if (kpath->aepos > jpath->aepos)
+ { if (hasB)
+ { if (MG_comp)
+ { dist = Entwine(bmatch+k,bmatch+j,tbuf,&bwhen);
+ if (dist != 0)
+ continue;
+ Fusion(jpath,awhen,kpath,tbuf);
+ amatch[k] = *jpath;
+ Fusion(bmatch+k,bwhen,bmatch+j,tbuf);
+#ifdef TEST_CONTAIN
+ printf(" Really 1");
+#endif
+ }
+ else
+ { dist = Entwine(bmatch+j,bmatch+k,tbuf,&bwhen);
+ if (dist != 0)
+ continue;
+ Fusion(jpath,awhen,kpath,tbuf);
+ amatch[k] = *jpath;
+ Fusion(bmatch+j,bwhen,bmatch+k,tbuf);
+ bmatch[k] = bmatch[j];
+#ifdef TEST_CONTAIN
+ printf(" Really 2");
+#endif
+ }
+ }
+ else
+ { Fusion(jpath,awhen,kpath,tbuf);
+ amatch[k] = *jpath;
+#ifdef TEST_CONTAIN
+ printf(" Really 3");
+#endif
+ }
+ }
+ else
+ { amatch[k] = *jpath;
+ if (hasB)
+ bmatch[k] = bmatch[j];
+ }
+#ifdef TEST_CONTAIN
+ printf(" Fuse! A %d %d\n",j,k);
+#endif
+ break;
+ }
+ }
+ }
+
+ else // kpath->abpos <= jpath->abpos
+
+ { if (jpath->abpos <= kpath->aepos && jpath->bbpos <= kpath->bepos)
+ { dist = Entwine(kpath,jpath,tbuf,&awhen);
+ if (dist == 0)
+ { if (kpath->abpos == jpath->abpos)
+ { if (kpath->aepos < jpath->aepos)
+ { amatch[k] = *jpath;
+ if (hasB)
+ bmatch[k] = bmatch[j];
+ }
+ }
+ else if (jpath->aepos > kpath->aepos)
+ { if (hasB)
+ { if (MG_comp)
+ { dist = Entwine(bmatch+j,bmatch+k,tbuf,&bwhen);
+ if (dist != 0)
+ continue;
+ Fusion(kpath,awhen,jpath,tbuf);
+ Fusion(bmatch+j,bwhen,bmatch+k,tbuf);
+ bmatch[k] = bmatch[j];
+#ifdef TEST_CONTAIN
+ printf(" Really 4");
+#endif
+ }
+ else
+ { dist = Entwine(bmatch+k,bmatch+j,tbuf,&bwhen);
+ if (dist != 0)
+ continue;
+ Fusion(kpath,awhen,jpath,tbuf);
+ Fusion(bmatch+k,bwhen,bmatch+j,tbuf);
+#ifdef TEST_CONTAIN
+ printf(" Really 5");
+#endif
+ }
+ }
+ else
+ { Fusion(kpath,awhen,jpath,tbuf);
+#ifdef TEST_CONTAIN
+ printf(" Really 6");
+#endif
+ }
+ }
+#ifdef TEST_CONTAIN
+ printf(" Fuse! B %d %d\n",j,k);
+#endif
+ break;
+ }
+ }
+ }
+ }
+ if (k < 0)
+ { no += 1;
+ amatch[no] = *jpath;
+ if (hasB)
+ bmatch[no] = bmatch[j];
+ }
+ }
+
+ novls = no+1;
+
+#ifdef TEST_CONTAIN
+ for (j = 0; j < novls; j++)
+ printf(" %3d: [%5d,%5d] x [%5d,%5d]\n",j,amatch[j].abpos,amatch[j].aepos,
+ amatch[j].bbpos,amatch[j].bepos);
+#endif
+
+ return (novls);
+}
+
+void Diagonal_Span(Path *path, int *mind, int *maxd)
{ uint16 *points;
int i, tlen;
int dd, low, hgh;
- points = (uint16 *) path->trace;
+ points = path->trace;
tlen = path->tlen;
- dd = path->bbpos - path->abpos;
+ dd = path->abpos - path->bbpos;
low = hgh = dd;
- dd = path->bepos - path->aepos;
+ dd = path->aepos - path->bepos;
if (dd < low)
low = dd;
else if (dd > hgh)
hgh = dd;
- dd = path->bbpos - (path->abpos/tspace)*tspace;
+ dd = (path->abpos/MR_tspace)*MR_tspace - path->bbpos;
tlen -= 2;
for (i = 1; i < tlen; i += 2)
- { dd += points[i]-tspace;
+ { dd += MR_tspace - points[i];
if (dd < low)
low = dd;
else if (dd > hgh)
@@ -1170,19 +1456,12 @@ void Diagonal_Span(Path *path, int tspace, int *mind, int *maxd)
*maxd = (hgh >> Binshift)+1;
}
-static HITS_DB *MR_ablock;
-static HITS_DB *MR_bblock;
-static SeedPair *MR_hits;
-static int MR_comp;
-static int MR_two;
-
typedef struct
{ int64 beg, end;
int *score;
int *lastp;
int *lasta;
Work_Data *work;
- Align_Spec *aspec;
FILE *ofile1;
FILE *ofile2;
int64 nfilt;
@@ -1203,7 +1482,6 @@ static void *report_thread(void *arg)
int *lastp = data->lastp;
int *lasta = data->lasta;
Work_Data *work = data->work;
- Align_Spec *aspec = data->aspec;
FILE *ofile1 = data->ofile1;
FILE *ofile2 = data->ofile2;
int afirst = MR_ablock->tfirst;
@@ -1214,12 +1492,18 @@ static void *report_thread(void *arg)
Overlap _ovla, *ovla = &_ovla;
Overlap _ovlb, *ovlb = &_ovlb;
Alignment _align, *align = &_align;
- Path *apath;
- Path *bpath = &(ovlb->path);
+ Path *apath = &(ovla->path);
+ Path *bpath;
int64 nfilt = 0;
int64 ahits = 0;
int64 bhits = 0;
- int tspace, small, tbytes;
+ int small, tbytes;
+
+ int AOmax, BOmax;
+ int novla, novlb;
+ Path *amatch, *bmatch;
+
+ Trace_Buffer _tbuf, *tbuf = &_tbuf;
Double *hitc;
int minhit;
@@ -1229,11 +1513,10 @@ static void *report_thread(void *arg)
// In ovl and align roles of A and B are reversed, as the B sequence must be the
// complemented sequence !!
- align->flags = ovla->flags = ovlb->flags = MR_comp;
- align->path = bpath;
+ align->flags = ovla->flags = ovlb->flags = MG_comp;
+ align->path = apath;
- tspace = Trace_Spacing(aspec);
- if (tspace <= TRACE_XOVR)
+ if (MR_tspace <= TRACE_XOVR)
{ small = 1;
tbytes = sizeof(uint8);
}
@@ -1242,11 +1525,21 @@ static void *report_thread(void *arg)
tbytes = sizeof(uint16);
}
+ AOmax = BOmax = MATCH_CHUNK;
+ amatch = Malloc(sizeof(Path)*AOmax,"Allocating match vector");
+ bmatch = Malloc(sizeof(Path)*BOmax,"Allocating match vector");
+
+ tbuf->max = 2*TRACE_CHUNK;
+ tbuf->trace = Malloc(sizeof(short)*tbuf->max,"Allocating trace vector");
+
+ if (amatch == NULL || bmatch == NULL || tbuf->trace == NULL)
+ exit (1);
+
fwrite(&ahits,sizeof(int64),1,ofile1);
- fwrite(&tspace,sizeof(int),1,ofile1);
+ fwrite(&MR_tspace,sizeof(int),1,ofile1);
if (MR_two)
{ fwrite(&bhits,sizeof(int64),1,ofile2);
- fwrite(&tspace,sizeof(int),1,ofile2);
+ fwrite(&MR_tspace,sizeof(int),1,ofile2);
}
minhit = (Hitmin-1)/Kmer + 1;
@@ -1262,6 +1555,7 @@ static void *report_thread(void *arg)
else
{ int ar, br;
int alen, blen;
+ int doA, doB;
int setaln, amark, amark2;
int apos, bpos, diag;
int64 lidx, sidx;
@@ -1282,7 +1576,10 @@ static void *report_thread(void *arg)
printf("%5d vs %5d : %5d x %5d\n",br+bfirst,ar+afirst,blen,alen);
#endif
setaln = 1;
+ doA = doB = 0;
amark2 = 0;
+ novla = novlb = 0;
+ tbuf->top = 0;
for (sidx = nidx; hitd[nidx].p2 == cpair; nidx = h2)
{ amark = amark2 + PANEL_SIZE;
amark2 = amark - PANEL_OVERLAP;
@@ -1321,12 +1618,15 @@ static void *report_thread(void *arg)
(score[diag] + scorp[diag] >= Hitmin || score[diag] + scorm[diag] >= Hitmin))
{ if (setaln)
{ setaln = 0;
- align->bseq = aseq + aread[ar].boff;
- align->aseq = bseq + bread[br].boff;
- align->blen = alen;
- align->alen = blen;
+ align->aseq = aseq + aread[ar].boff;
+ align->bseq = bseq + bread[br].boff;
+ align->alen = alen;
+ align->blen = blen;
ovlb->bread = ovla->aread = ar + afirst;
ovlb->aread = ovla->bread = br + bfirst;
+ doA = (alen >= HGAP_MIN);
+ doB = (SYMMETRIC && blen >= HGAP_MIN &&
+ (ar != br || !MG_self || !MG_comp));
}
#ifdef TEST_GATHER
else
@@ -1341,50 +1641,77 @@ static void *report_thread(void *arg)
#endif
nfilt += 1;
- apath = Local_Alignment(align,work,aspec,bpos-apos,bpos-apos,bpos+apos,-1,-1);
+ bpath = Local_Alignment(align,work,MR_spec,apos-bpos,apos-bpos,apos+bpos,-1,-1);
- { int low, hgh, be;
+ { int low, hgh, ae;
- Diagonal_Span(bpath,tspace,&low,&hgh);
+ Diagonal_Span(apath,&low,&hgh);
if (diag < low)
low = diag;
else if (diag > hgh)
hgh = diag;
- be = bpath->bepos;
+ ae = apath->aepos;
for (diag = low; diag <= hgh; diag++)
- if (be > lasta[diag])
- lasta[diag] = be;
+ if (ae > lasta[diag])
+ lasta[diag] = ae;
#ifdef TEST_GATHER
- printf(" %d - %d @ %d",low,hgh,bpath->bepos);
+ printf(" %d - %d @ %d",low,hgh,apath->aepos);
#endif
}
if ((apath->aepos-apath->abpos) + (apath->bepos-apath->bbpos) >= MINOVER)
- { ovla->path = *apath;
- if (small)
- { Compress_TraceTo8(ovla);
- Compress_TraceTo8(ovlb);
- }
- if (alen >= HGAP_MIN)
- { Write_Overlap(ofile1,ovla,tbytes);
- ahits += 1;
+ { if (doA)
+ { if (novla >= AOmax)
+ { AOmax = 1.2*novla + MATCH_CHUNK;
+ amatch = Realloc(amatch,sizeof(Path)*AOmax,
+ "Reallocating match vector");
+ if (amatch == NULL)
+ exit (1);
+ }
+ if (tbuf->top + apath->tlen > tbuf->max)
+ { tbuf->max = 1.2*(tbuf->top+apath->tlen) + TRACE_CHUNK;
+ tbuf->trace = Realloc(tbuf->trace,sizeof(short)*tbuf->max,
+ "Reallocating trace vector");
+ if (tbuf->trace == NULL)
+ exit (1);
+ }
+ amatch[novla] = *apath;
+ amatch[novla].trace = (void *) (tbuf->top);
+ memcpy(tbuf->trace+tbuf->top,apath->trace,sizeof(short)*apath->tlen);
+ novla += 1;
+ tbuf->top += apath->tlen;
}
- if (blen >= HGAP_MIN && SYMMETRIC)
- { Write_Overlap(ofile2,ovlb,tbytes);
- bhits += 1;
+ if (doB)
+ { if (novlb >= BOmax)
+ { BOmax = 1.2*novlb + MATCH_CHUNK;
+ bmatch = Realloc(bmatch,sizeof(Path)*BOmax,
+ "Reallocating match vector");
+ if (bmatch == NULL)
+ exit (1);
+ }
+ if (tbuf->top + bpath->tlen > tbuf->max)
+ { tbuf->max = 1.2*(tbuf->top+bpath->tlen) + TRACE_CHUNK;
+ tbuf->trace = Realloc(tbuf->trace,sizeof(short)*tbuf->max,
+ "Reallocating trace vector");
+ if (tbuf->trace == NULL)
+ exit (1);
+ }
+ bmatch[novlb] = *bpath;
+ bmatch[novlb].trace = (void *) (tbuf->top);
+ memcpy(tbuf->trace+tbuf->top,bpath->trace,sizeof(short)*bpath->tlen);
+ novlb += 1;
+ tbuf->top += bpath->tlen;
}
#ifdef TEST_GATHER
printf(" [%5d,%5d] x [%5d,%5d] = %4d",
- bpath->abpos,bpath->aepos,bpath->bbpos,bpath->bepos,bpath->diffs);
+ apath->abpos,apath->aepos,apath->bbpos,apath->bepos,apath->diffs);
#endif
#ifdef SHOW_OVERLAP
printf("\n\n %d(%d) vs %d(%d)\n\n",
- ovlb->aread,ovlb->alen,ovlb->bread,ovlb->blen);
+ ovla->aread,ovla->alen,ovla->bread,ovla->blen);
Print_ACartoon(stdout,align,ALIGN_INDENT);
#ifdef SHOW_ALIGNMENT
- if (small)
- Decompress_TraceTo16(ovlb);
Compute_Trace_ALL(align,work);
printf("\n Diff = %d\n",align->path->diffs);
Print_Alignment(stdout,align,work,
@@ -1425,8 +1752,47 @@ static void *report_thread(void *arg)
else
lasta[d] = 0;
}
+
+
+ { int i;
+
+#ifdef TEST_CONTAIN
+ if (novla > 1 || novlb > 1)
+ printf("\n%5d vs %5d:\n",ar,br);
+#endif
+
+ if (novla > 1)
+ { if (novlb > 1)
+ novla = novlb = Handle_Redundancies(amatch,novla,bmatch,tbuf);
+ else
+ novla = Handle_Redundancies(amatch,novla,NULL,tbuf);
+ }
+ else if (novlb > 1)
+ novlb = Handle_Redundancies(bmatch,novlb,NULL,tbuf);
+
+ for (i = 0; i < novla; i++)
+ { ovla->path = amatch[i];
+ ovla->path.trace = tbuf->trace + (uint64) (ovla->path.trace);
+ if (small)
+ Compress_TraceTo8(ovla);
+ Write_Overlap(ofile1,ovla,tbytes);
+ }
+ for (i = 0; i < novlb; i++)
+ { ovlb->path = bmatch[i];
+ ovlb->path.trace = tbuf->trace + (uint64) (ovlb->path.trace);
+ if (small)
+ Compress_TraceTo8(ovlb);
+ Write_Overlap(ofile2,ovlb,tbytes);
+ }
+ ahits += novla;
+ bhits += novlb;
+ }
}
+ free(tbuf->trace);
+ free(bmatch);
+ free(amatch);
+
data->nfilt = nfilt;
data->ncheck = ahits + bhits;
@@ -1453,7 +1819,8 @@ static void *report_thread(void *arg)
********************************************************************************************/
void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
- int self, int comp, Align_Spec *aspec)
+ void *vasort, int alen, void *vbsort, int blen,
+ int comp, Align_Spec *aspec)
{ THREAD threads[NTHREADS];
Merge_Arg parmm[NTHREADS];
Lex_Arg parmx[NTHREADS];
@@ -1466,32 +1833,14 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
int64 nfilt, ncheck;
KmerPos *asort, *bsort;
- int alen, blen;
-
int64 atot, btot;
+ asort = (KmerPos *) vasort;
+ bsort = (KmerPos *) vbsort;
+
atot = ablock->totlen;
btot = bblock->totlen;
- if (comp)
- { asort = Csort;
- alen = Clen;
- }
- else
- { asort = Asort;
- alen = Alen;
- }
-
- if (self)
- { bsort = Asort;
- blen = Alen;
- }
- else
- { if (VERBOSE)
- printf("\nBuilding index for %s\n",bname);
- bsort = sort_kmers(bblock,&blen);
- }
-
{ int64 powr;
int i, nbyte;
@@ -1521,7 +1870,7 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
if (VERBOSE)
{ if (comp)
- printf("\nComparing c(%s) to %s\n",aname,bname);
+ printf("\nComparing %s to c(%s)\n",aname,bname);
else
printf("\nComparing %s to %s\n",aname,bname);
}
@@ -1535,7 +1884,8 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
MG_alist = asort;
MG_blist = bsort;
- MG_self = self;
+ MG_self = (aname == bname);
+ MG_comp = comp;
parmm[0].abeg = parmm[0].bbeg = 0;
for (i = 1; i < NTHREADS; i++)
@@ -1573,10 +1923,10 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
for (j = 0; j < MAXGRAM; j++)
histo[j] += parmm[i].hitgram[j];
- if (self || (int64) (MEM_LIMIT/sizeof(Double)) > Alen + Clen + 2*blen)
- avail = (MEM_LIMIT/sizeof(Double) - (Alen + Clen)) / 2;
+ if (asort == bsort || (int64) (MEM_LIMIT/sizeof(Double)) > alen + 2*blen)
+ avail = (MEM_LIMIT/sizeof(Double) - alen) / 2;
else
- avail = MEM_LIMIT/sizeof(Double) - (Alen + Clen + blen);
+ avail = MEM_LIMIT/sizeof(Double) - (alen + blen);
avail *= .98;
tom = 0;
@@ -1587,29 +1937,33 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
}
limit = j;
- if (limit == 1)
+ if (limit <= 1)
{ fprintf(stderr,"\nError: Insufficient ");
if (MEM_LIMIT == MEM_PHYSICAL)
- fprintf(stderr," physical memory, reduce block size\n");
+ fprintf(stderr," physical memory (%.1fGb), reduce block size\n",
+ (1.*MEM_LIMIT)/0x40000000ll);
else
- fprintf(stderr," memory allocation, reduce block size or increase allocation\n");
+ { fprintf(stderr," memory allocation (%.1fGb),",(1.*MEM_LIMIT)/0x40000000ll);
+ fprintf(stderr," reduce block size or increase allocation\n");
+ }
fflush(stderr);
exit (1);
}
- else if (limit < 10)
+ if (limit < 10)
{ fprintf(stderr,"\nWarning: Sensitivity hampered by low ");
if (MEM_LIMIT == MEM_PHYSICAL)
- fprintf(stderr," physical memory, reduce block size\n");
+ fprintf(stderr," physical memory (%.1fGb), reduce block size\n",
+ (1.*MEM_LIMIT)/0x40000000ll);
else
- fprintf(stderr," memory allocation, reduce block size or increase allocation\n");
+ { fprintf(stderr," memory allocation (%.1fGb),",(1.*MEM_LIMIT)/0x40000000ll);
+ fprintf(stderr," reduce block size or increase allocation\n");
+ }
fflush(stderr);
}
- else
- { if (VERBOSE)
- { printf(" Capping mutual k-mer matches over %d (effectively -t%d)\n",
- limit,(int) sqrt(1.*limit));
- fflush(stdout);
- }
+ if (VERBOSE)
+ { printf(" Capping mutual k-mer matches over %d (effectively -t%d)\n",
+ limit,(int) sqrt(1.*limit));
+ fflush(stdout);
}
for (i = 0; i < NTHREADS; i++)
@@ -1630,26 +1984,26 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
if (VERBOSE)
{ printf(" Hit count = ");
Print_Number(nhits,0,stdout);
- if (self || nhits >= blen)
+ if (asort == bsort || nhits >= blen)
printf("\n Highwater of %.2fGb space\n",
- (1. * (Alen + Clen + 2*nhits)) / 67108864);
+ (1. * (alen + 2*nhits)) / 67108864);
else
printf("\n Highwater of %.2fGb space\n",
- (1. * (Alen + Clen + blen + nhits)) / 67108864);
+ (1. * (alen + blen + nhits)) / 67108864);
fflush(stdout);
}
if (nhits == 0)
goto zerowork;
- if (self)
+ if (asort == bsort)
hhit = work1 = (SeedPair *) Malloc(sizeof(SeedPair)*(nhits+1),
"Allocating dazzler hit vectors");
else
{ if (nhits >= blen)
bsort = (KmerPos *) Realloc(bsort,sizeof(SeedPair)*(nhits+1),
"Reallocating dazzler sort vectors");
- hhit = work1 = (SeedPair *) bsort;
+ hhit = work1 = (SeedPair *) bsort;
}
khit = work2 = (SeedPair *) Malloc(sizeof(SeedPair)*(nhits+1),"Allocating dazzler hit vectors");
if (hhit == NULL || khit == NULL || bsort == NULL)
@@ -1678,7 +2032,7 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
printf("\nSETUP SORT:\n");
for (i = 0; i < HOW_MANY && i < nhits; i++)
{ SeedPair *c = khit+i;
- printf(" %5d / %5d / %5d /%5d\n",c->aread,c->bread,c->apos,c->bpos);
+ printf(" %5d / %5d / %5d /%5d\n",c->aread,c->bread,c->apos,c->apos-c->diag);
}
#endif
}
@@ -1705,7 +2059,7 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
printf("\nCROSS SORT %lld:\n",nhits);
for (i = 0; i < HOW_MANY && i <= nhits; i++)
{ SeedPair *c = khit+i;
- printf(" %5d / %5d / %5d /%5d\n",c->aread,c->bread,c->apos,c->bpos);
+ printf(" %5d / %5d / %5d /%5d\n",c->aread,c->bread,c->apos,c->apos-c->diag);
}
#endif
}
@@ -1718,8 +2072,9 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
MR_ablock = ablock;
MR_bblock = bblock;
MR_hits = khit;
- MR_comp = comp;
- MR_two = ! self && SYMMETRIC;
+ MR_two = ! MG_self && SYMMETRIC;
+ MR_spec = aspec;
+ MR_tspace = Trace_Spacing(aspec);
parmr[0].beg = 0;
for (i = 1; i < NTHREADS; i++)
@@ -1748,13 +2103,12 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
parmr[i].lastp = parmr[i].score + w;
parmr[i].lasta = parmr[i].lastp + w;
parmr[i].work = New_Work_Data();
- parmr[i].aspec = aspec;
parmr[i].ofile1 =
Fopen(Catenate(aname,".",bname,Numbered_Suffix((comp?".C":".N"),i,".las")),"w");
if (parmr[i].ofile1 == NULL)
exit (1);
- if (self)
+ if (MG_self)
parmr[i].ofile2 = parmr[i].ofile1;
else if (SYMMETRIC)
{ parmr[i].ofile2 =
@@ -1796,19 +2150,18 @@ void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
zerowork:
{ FILE *ofile;
- int i, tspace;
+ int i;
nhits = 0;
- tspace = Trace_Spacing(aspec);
for (i = 0; i < NTHREADS; i++)
{ ofile = Fopen(Catenate(aname,".",bname,Numbered_Suffix((comp?".C":".N"),i,".las")),"w");
fwrite(&nhits,sizeof(int64),1,ofile);
- fwrite(&tspace,sizeof(int),1,ofile);
+ fwrite(&MR_tspace,sizeof(int),1,ofile);
fclose(ofile);
- if (! self)
+ if (! MG_self && SYMMETRIC)
{ ofile = Fopen(Catenate(bname,".",aname,Numbered_Suffix((comp?".C":".N"),i,".las")),"w");
fwrite(&nhits,sizeof(int64),1,ofile);
- fwrite(&tspace,sizeof(int),1,ofile);
+ fwrite(&MR_tspace,sizeof(int),1,ofile);
fclose(ofile);
}
}
diff --git a/filter.h b/filter.h
index edfbee5..ba7a34d 100644
--- a/filter.h
+++ b/filter.h
@@ -1,40 +1,3 @@
-/************************************************************************************\
-* *
-* Copyright (c) 2014, Dr. Eugene W. Myers (EWM). All rights reserved. *
-* *
-* Redistribution and use in source and binary forms, with or without modification, *
-* are permitted provided that the following conditions are met: *
-* *
-* · Redistributions of source code must retain the above copyright notice, this *
-* list of conditions and the following disclaimer. *
-* *
-* · Redistributions in binary form must reproduce the above copyright notice, this *
-* list of conditions and the following disclaimer in the documentation and/or *
-* other materials provided with the distribution. *
-* *
-* · The name of EWM may not be used to endorse or promote products derived from *
-* this software without specific prior written permission. *
-* *
-* THIS SOFTWARE IS PROVIDED BY EWM ”AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, *
-* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND *
-* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL EWM BE LIABLE *
-* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES *
-* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
-* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN *
-* IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
-* *
-* For any issues regarding this software and its use, contact EWM at: *
-* *
-* Eugene W. Myers Jr. *
-* Bautzner Str. 122e *
-* 01099 Dresden *
-* GERMANY *
-* Email: gene.myers at gmail.com *
-* *
-\************************************************************************************/
-
/*******************************************************************************************
*
* Filter interface for the dazzler.
@@ -66,9 +29,10 @@ extern uint64 MEM_PHYSICAL;
int Set_Filter_Params(int kmer, int binshift, int suppress, int hitmin);
-void Build_Table(HITS_DB *block);
+void *Sort_Kmers(HITS_DB *block, int *len);
void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
- int self, int comp, Align_Spec *asettings);
+ void *atable, int alen, void *btable, int blen,
+ int comp, Align_Spec *asettings);
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/daligner.git
More information about the debian-med-commit
mailing list