[med-svn] [daligner] 01/05: New upstream version 1.0+20180108
Afif Elghraoui
afif at moszumanska.debian.org
Sun Feb 4 10:28:39 UTC 2018
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository daligner.
commit 153de3c928622a637859f8a43c6ccd416675a9fd
Author: Afif Elghraoui <afif at debian.org>
Date: Sun Feb 4 05:22:21 2018 -0500
New upstream version 1.0+20180108
---
DB.c | 325 +++++++++++++++++++++++++++++++++++++++++++--------------
DB.h | 208 +++++++++++++++++++++++++++++-------
HPC.daligner.c | 20 ++--
LAcat.c | 16 +--
LAcheck.c | 12 +--
LAdump.c | 30 +++---
LAindex.c | 4 +-
LAmerge.c | 12 +--
LAshow.c | 17 ++-
LAsort.c | 14 +--
LAsplit.c | 18 ++--
align.c | 128 +++++++++++++++++------
align.h | 13 ++-
daligner.c | 38 +++----
filter.c | 20 ++--
filter.h | 4 +-
16 files changed, 638 insertions(+), 241 deletions(-)
diff --git a/DB.c b/DB.c
index 69060d0..9b5c85f 100644
--- a/DB.c
+++ b/DB.c
@@ -41,6 +41,24 @@ char Ebuffer[1000];
#endif
+int Count_Args(char *var)
+{ int cnt, lev;
+ char *s;
+
+ cnt = 1;
+ lev = 0;
+ for (s = var; *s != '\0'; s++)
+ if (*s == ',')
+ { if (lev == 0)
+ cnt += 1;
+ }
+ else if (*s == '(')
+ lev += 1;
+ else if (*s == ')')
+ lev -= 1;
+ return (cnt);
+}
+
void *Malloc(int64 size, char *mesg)
{ void *p;
@@ -382,7 +400,7 @@ void Number_Arrow(char *s)
********************************************************************************************/
-// Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has
+// Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has
// a part # in it then just the part is opened. The index array is allocated (for all or
// just the part) and read in.
// Return status of routine:
@@ -390,8 +408,8 @@ void Number_Arrow(char *s)
// 0: Open of DB proceeded without mishap
// 1: Open of DAM proceeded without mishap
-int Open_DB(char* path, HITS_DB *db)
-{ HITS_DB dbcopy;
+int Open_DB(char* path, DAZZ_DB *db)
+{ DAZZ_DB dbcopy;
char *root, *pwd, *bptr, *fptr, *cat;
int nreads;
FILE *index, *dbvis;
@@ -437,7 +455,7 @@ int Open_DB(char* path, HITS_DB *db)
if ((index = Fopen(Catenate(pwd,PATHSEP,root,".idx"),"r")) == NULL)
goto error1;
- if (fread(db,sizeof(HITS_DB),1,index) != 1)
+ if (fread(db,sizeof(DAZZ_DB),1,index) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
goto error2;
}
@@ -505,28 +523,28 @@ int Open_DB(char* path, HITS_DB *db)
nreads = ulast-ufirst;
if (part <= 0)
- { db->reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+ { db->reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index");
if (db->reads == NULL)
goto error2;
db->reads += 1;
- if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
+ if (fread(db->reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
free(db->reads-1);
goto error2;
}
}
else
- { HITS_READ *reads;
+ { DAZZ_READ *reads;
int i, r, maxlen;
int64 totlen;
- reads = (HITS_READ *) Malloc(sizeof(HITS_READ)*(nreads+2),"Allocating Open_DB index");
+ reads = (DAZZ_READ *) Malloc(sizeof(DAZZ_READ)*(nreads+2),"Allocating Open_DB index");
if (reads == NULL)
goto error2;
reads += 1;
- fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
- if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
+ fseeko(index,sizeof(DAZZ_READ)*ufirst,SEEK_CUR);
+ if (fread(reads,sizeof(DAZZ_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
free(reads-1);
goto error2;
@@ -580,13 +598,13 @@ error:
// of the current DB partition. Reallocate smaller memory blocks for the information kept
// for the retained reads.
-void Trim_DB(HITS_DB *db)
+void Trim_DB(DAZZ_DB *db)
{ int i, j, r;
int allflag, cutoff;
int64 totlen;
int maxlen, nreads;
- HITS_TRACK *record;
- HITS_READ *reads;
+ DAZZ_TRACK *record;
+ DAZZ_READ *reads;
if (db->trimmed) return;
@@ -603,7 +621,7 @@ void Trim_DB(HITS_DB *db)
for (record = db->tracks; record != NULL; record = record->next)
if (strcmp(record->name,". at qvs") == 0)
- { uint16 *table = ((HITS_QV *) record)->table;
+ { uint16 *table = ((DAZZ_QV *) record)->table;
j = 0;
for (i = 0; i < db->nreads; i++)
@@ -675,7 +693,7 @@ void Trim_DB(HITS_DB *db)
db->trimmed = 1;
if (j < nreads)
- { db->reads = Realloc(reads-1,sizeof(HITS_READ)*(j+2),NULL);
+ { db->reads = Realloc(reads-1,sizeof(DAZZ_READ)*(j+2),NULL);
db->reads += 1;
}
}
@@ -683,12 +701,12 @@ void Trim_DB(HITS_DB *db)
// The DB has already been trimmed, but a track over the untrimmed DB needs to be loaded.
// Trim the track by rereading the untrimmed DB index from the file system.
-static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
+static int Late_Track_Trim(DAZZ_DB *db, DAZZ_TRACK *track, int ispart)
{ int i, j, r;
int allflag, cutoff;
int ureads;
char *root;
- HITS_READ read;
+ DAZZ_READ read;
FILE *indx;
if (!db->trimmed) return (0);
@@ -703,7 +721,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
root = rindex(db->path,'/') + 2;
indx = Fopen(Catenate(db->path,"","",".idx"),"r");
- fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*db->ufirst,SEEK_SET);
+ fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*db->ufirst,SEEK_SET);
if (ispart)
ureads = ((int *) (db->reads))[-1];
else
@@ -725,7 +743,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
{ anno = (char *) track->anno;
j = r = 0;
for (i = r = 0; i < ureads; i++, r += size)
- { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
@@ -744,7 +762,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
anno4 = (int *) (track->anno);
j = anno4[0] = 0;
for (i = 0; i < ureads; i++)
- { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
@@ -764,7 +782,7 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
anno8 = (int64 *) (track->anno);
j = anno8[0] = 0;
for (i = 0; i < ureads; i++)
- { if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ { if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
fclose(indx);
EXIT(1);
@@ -789,8 +807,8 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
-void Close_DB(HITS_DB *db)
-{ HITS_TRACK *t, *p;
+void Close_DB(DAZZ_DB *db)
+{ DAZZ_TRACK *t, *p;
if (db->loaded)
free(((char *) (db->bases)) - 1);
@@ -813,19 +831,19 @@ void Close_DB(HITS_DB *db)
// Return the size in bytes of the memory occupied by a given DB
-int64 sizeof_DB(HITS_DB *db)
+int64 sizeof_DB(DAZZ_DB *db)
{ int64 s;
- HITS_TRACK *t;
+ DAZZ_TRACK *t;
- s = sizeof(HITS_DB)
- + sizeof(HITS_READ)*(db->nreads+2)
+ s = sizeof(DAZZ_DB)
+ + sizeof(DAZZ_READ)*(db->nreads+2)
+ strlen(db->path)+1
+ (db->totlen+db->nreads+4);
t = db->tracks;
if (t != NULL && strcmp(t->name,". at qvs") == 0)
- { HITS_QV *q = (HITS_QV *) t;
- s += sizeof(HITS_QV)
+ { DAZZ_QV *q = (DAZZ_QV *) t;
+ s += sizeof(DAZZ_QV)
+ sizeof(uint16) * db->nreads
+ q->ncodes * sizeof(QVcoding)
+ 6;
@@ -833,7 +851,7 @@ int64 sizeof_DB(HITS_DB *db)
}
for (; t != NULL; t = t->next)
- { s += sizeof(HITS_TRACK)
+ { s += sizeof(DAZZ_TRACK)
+ strlen(t->name)+1
+ t->size * (db->nreads+1);
if (t->data != NULL)
@@ -854,14 +872,14 @@ int64 sizeof_DB(HITS_DB *db)
*
********************************************************************************************/
-HITS_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry"
-HITS_QV *Active_QV; // Becomes invalid after closing
+DAZZ_DB *Active_DB = NULL; // Last db/qv used by "Load_QVentry"
+DAZZ_QV *Active_QV; // Becomes invalid after closing
-int Load_QVs(HITS_DB *db)
+int Load_QVs(DAZZ_DB *db)
{ FILE *quiva, *istub, *indx;
char *root;
uint16 *table;
- HITS_QV *qvtrk;
+ DAZZ_QV *qvtrk;
QVcoding *coding, *nx;
int ncodes = 0;
@@ -956,7 +974,7 @@ int Load_QVs(HITS_DB *db)
goto error;
}
- // Carefully get the first coding scheme (its offset is most likely in a HITS_RECORD
+ // Carefully get the first coding scheme (its offset is most likely in a DAZZ_RECORD
// in .idx that is *not* in memory). Get all the other coding schemes normally and
// assign the tables # for each read in the block in "tables".
@@ -974,10 +992,10 @@ int Load_QVs(HITS_DB *db)
i = n-fbeg;
if (first < pfirst)
- { HITS_READ read;
+ { DAZZ_READ read;
- fseeko(indx,sizeof(HITS_DB) + sizeof(HITS_READ)*first,SEEK_SET);
- if (fread(&read,sizeof(HITS_READ),1,indx) != 1)
+ fseeko(indx,sizeof(DAZZ_DB) + sizeof(DAZZ_READ)*first,SEEK_SET);
+ if (fread(&read,sizeof(DAZZ_READ),1,indx) != 1)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
ncodes = i;
goto error;
@@ -1050,17 +1068,17 @@ int Load_QVs(HITS_DB *db)
}
}
- // Allocate and fill in the HITS_QV record and add it to the front of the
+ // Allocate and fill in the DAZZ_QV record and add it to the front of the
// track list
- qvtrk = (HITS_QV *) Malloc(sizeof(HITS_QV),"Allocating QV pseudo-track");
+ qvtrk = (DAZZ_QV *) Malloc(sizeof(DAZZ_QV),"Allocating QV pseudo-track");
if (qvtrk == NULL)
goto error;
qvtrk->name = Strdup(". at qvs","Allocating QV pseudo-track name");
if (qvtrk->name == NULL)
goto error;
qvtrk->next = db->tracks;
- db->tracks = (HITS_TRACK *) qvtrk;
+ db->tracks = (DAZZ_TRACK *) qvtrk;
qvtrk->ncodes = ncodes;
qvtrk->table = table;
qvtrk->coding = coding;
@@ -1091,16 +1109,16 @@ error:
// Close the QV stream, free the QV pseudo track and all associated memory
-void Close_QVs(HITS_DB *db)
-{ HITS_TRACK *track;
- HITS_QV *qvtrk;
+void Close_QVs(DAZZ_DB *db)
+{ DAZZ_TRACK *track;
+ DAZZ_QV *qvtrk;
int i;
Active_DB = NULL;
track = db->tracks;
if (track != NULL && strcmp(track->name,". at qvs") == 0)
- { qvtrk = (HITS_QV *) track;
+ { qvtrk = (DAZZ_QV *) track;
for (i = 0; i < qvtrk->ncodes; i++)
Free_QVcoding(qvtrk->coding+i);
free(qvtrk->coding);
@@ -1125,7 +1143,7 @@ void Close_QVs(HITS_DB *db)
// -1: Track is not the right size of DB either trimmed or untrimmed
// -2: Could not find the track
-int Check_Track(HITS_DB *db, char *track, int *kind)
+int Check_Track(DAZZ_DB *db, char *track, int *kind)
{ FILE *afile;
int tracklen, size, ispart;
int ureads, treads;
@@ -1181,10 +1199,10 @@ int Check_Track(HITS_DB *db, char *track, int *kind)
// If track is not already in the db's track list, then allocate all the storage for it,
// read it in from the appropriate file, add it to the track list, and return a pointer
-// to the newly created HITS_TRACK record. If the track does not exist or cannot be
+// to the newly created DAZZ_TRACK record. If the track does not exist or cannot be
// opened for some reason, then NULL is returned.
-HITS_TRACK *Load_Track(HITS_DB *db, char *track)
+DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track)
{ FILE *afile, *dfile;
int tracklen, size;
int nreads, ispart;
@@ -1192,7 +1210,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
void *anno;
void *data;
char *name;
- HITS_TRACK *record;
+ DAZZ_TRACK *record;
if (track[0] == '.')
{ EPRINTF(EPLACE,"%s: Track name, '%s', cannot begin with a .\n",Prog_Name,track);
@@ -1340,7 +1358,7 @@ HITS_TRACK *Load_Track(HITS_DB *db, char *track)
fclose(afile);
- record = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating Track Record");
+ record = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),"Allocating Track Record");
if (record == NULL)
goto error;
record->name = Strdup(track,"Allocating Track Name");
@@ -1379,8 +1397,161 @@ error:
EXIT (NULL);
}
-void Close_Track(HITS_DB *db, char *track)
-{ HITS_TRACK *record, *prev;
+// Assumming file pointer for afile is correctly positioned at the start of a extra item,
+// and aname is the name of the .anno file, decode the value present and places it in
+// extra if extra->nelem == 0, otherwise reduce the value just read into extra according
+// according the to the directive given by 'accum'. Leave the read poinrt at the next
+// extra or end-of-file.
+// Returns:
+// 1 if at the end of file,
+// 0 if item was read and folded correctly,
+// -1 if there was a system IO or allocation error (if interactive), and
+// -2 if the new value could not be reduced into the currenct value of extra (interactive)
+
+int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra)
+{ int vtype, nelem, accum, slen;
+ char *name;
+ void *value;
+
+#define EREAD(v,s,n,file,ret) \
+ { if (fread(v,s,n,file) != (size_t) n) \
+ { if (ferror(file)) \
+ fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \
+ else if (ret) \
+ return (1); \
+ else \
+ fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,aname); \
+ EXIT(-1); \
+ } \
+ }
+
+ EREAD(&vtype,sizeof(int),1,afile,1)
+ EREAD(&nelem,sizeof(int),1,afile,0)
+ EREAD(&accum,sizeof(int),1,afile,0)
+ EREAD(&slen,sizeof(int),1,afile,0)
+
+ if (extra == NULL)
+ { if (fseeko(afile,slen+8*nelem,SEEK_CUR) < 0)
+ { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name);
+ EXIT(-1);
+ }
+ return (0);
+ }
+
+ name = (char *) Malloc(slen+1,"Allocating extra name");
+ value = Malloc(8*nelem,"Allocating extra value");
+ if (name == NULL || value == NULL)
+ EXIT(-1);
+
+ EREAD(name,1,slen,afile,0);
+ EREAD(value,8,nelem,afile,0);
+ name[slen] = '\0';
+
+ if (extra->nelem == 0)
+ { extra->vtype = vtype;
+ extra->nelem = nelem;
+ extra->accum = accum;
+ extra->name = name;
+ extra->value = value;
+ return (0);
+ }
+
+ if (vtype != extra->vtype)
+ { fprintf(stderr,"%s: Type of extra %s does not agree with previous .anno block files\n",
+ Prog_Name,name);
+ goto error;
+ }
+ if (nelem != extra->nelem)
+ { fprintf(stderr,"%s: Length of extra %s does not agree with previous .anno block files\n",
+ Prog_Name,name);
+ goto error;
+ }
+ if (accum != extra->accum)
+ { fprintf(stderr,"%s: Reduction indicator of extra %s does not agree with",Prog_Name,name);
+ fprintf(stderr," previos .anno block files\n");
+ goto error;
+ }
+ if (strcmp(name,extra->name) != 0)
+ { fprintf(stderr,"%s: Expecting extra %s in .anno block file, not %s\n",
+ Prog_Name,extra->name,name);
+ goto error;
+ }
+
+ if (vtype == DB_INT)
+ { int64 *ival = (int64 *) value;
+ int64 *eval = (int64 *) (extra->value);
+ int j;
+
+ if (accum == DB_EXACT)
+ { for (j = 0; j < nelem; j++)
+ if (eval[j] != ival[j])
+ { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name);
+ fprintf(stderr," with previous .anno block files\n");
+ goto error;
+ }
+ }
+ else
+ { for (j = 0; j < nelem; j++)
+ eval[j] += ival[j];
+ }
+ }
+
+ else
+ { double *ival = (double *) value;
+ double *eval = (double *) (extra->value);
+ int j;
+
+ if (accum == DB_EXACT)
+ { for (j = 0; j < nelem; j++)
+ if (eval[j] != ival[j])
+ { fprintf(stderr,"%s: Value of extra %s doe not agree",Prog_Name,name);
+ fprintf(stderr," with previous .anoo block files\n");
+ goto error;
+ }
+ }
+ else
+ { for (j = 0; j < nelem; j++)
+ eval[j] += ival[j];
+ }
+ }
+
+ free(value);
+ free(name);
+ return (0);
+
+error:
+ free(value);
+ free(name);
+ EXIT(1);
+}
+
+// Write extra record to end of file afile and advance write pointer
+// If interactive, then return non-zero on error, if bash, then print
+// and halt if an error
+
+int Write_Extra(FILE *afile, DAZZ_EXTRA *extra)
+{ int slen;
+
+#define EWRITE(v,s,n,file) \
+ { if (fwrite(v,s,n,file) != (size_t) n) \
+ { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \
+ EXIT(1); \
+ } \
+ }
+
+ EWRITE(&(extra->vtype),sizeof(int),1,afile)
+ FWRITE(&(extra->nelem),sizeof(int),1,afile)
+ FWRITE(&(extra->accum),sizeof(int),1,afile)
+ slen = strlen(extra->name);
+ FWRITE(&slen,sizeof(int),1,afile)
+ FWRITE(extra->name,1,slen,afile)
+ FWRITE(extra->value,8,extra->nelem,afile)
+
+ return (0);
+}
+
+void Close_Track(DAZZ_DB *db, char *track)
+{ DAZZ_TRACK *record, *prev;
prev = NULL;
for (record = db->tracks; record != NULL; record = record->next)
@@ -1410,7 +1581,7 @@ void Close_Track(HITS_DB *db, char *track)
// Allocate and return a buffer big enough for the largest read in 'db', leaving room
// for an initial delimiter character
-char *New_Read_Buffer(HITS_DB *db)
+char *New_Read_Buffer(DAZZ_DB *db)
{ char *read;
read = (char *) Malloc(db->maxlen+4,"Allocating New Read Buffer");
@@ -1425,11 +1596,11 @@ char *New_Read_Buffer(HITS_DB *db)
//
// **NB**, the byte before read will be set to a delimiter character!
-int Load_Read(HITS_DB *db, int i, char *read, int ascii)
+int Load_Read(DAZZ_DB *db, int i, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
- HITS_READ *r = db->reads;
+ DAZZ_READ *r = db->reads;
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
@@ -1472,14 +1643,14 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
// and as a numeric string otherwise.
//
-HITS_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow"
+DAZZ_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow"
FILE *Arrow_File = NULL; // Becomes invalid after closing
-int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii)
{ FILE *arrow;
int64 off;
int len, clen;
- HITS_READ *r = db->reads;
+ DAZZ_READ *r = db->reads;
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
@@ -1519,12 +1690,12 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
return (0);
}
-char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
+char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
int len, clen;
int bbeg, bend;
- HITS_READ *r = db->reads;
+ DAZZ_READ *r = db->reads;
if (i >= db->nreads)
{ EPRINTF(EPLACE,"%s: Index out of bounds (Load_Read)\n",Prog_Name);
@@ -1578,7 +1749,7 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
// Allocate and return a buffer of 5 vectors big enough for the largest read in 'db'
-char **New_QV_Buffer(HITS_DB *db)
+char **New_QV_Buffer(DAZZ_DB *db)
{ char **entry;
char *qvs;
int i;
@@ -1595,8 +1766,8 @@ char **New_QV_Buffer(HITS_DB *db)
// Load into entry the QV streams for the i'th read from db. The parameter ascii applies to
// the DELTAG stream as described for Load_Read.
-int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
-{ HITS_READ *reads;
+int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii)
+{ DAZZ_READ *reads;
FILE *quiva;
int rlen;
@@ -1605,7 +1776,7 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
{ EPRINTF(EPLACE,"%s: QV's are not loaded (Load_QVentry)\n",Prog_Name);
EXIT(1);
}
- Active_QV = (HITS_QV *) db->tracks;
+ Active_QV = (DAZZ_QV *) db->tracks;
Active_DB = db;
}
if (i >= db->nreads)
@@ -1655,10 +1826,10 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii)
// non-zero then the reads are converted to ACGT ascii, otherwise the reads are left
// as numeric strings over 0(A), 1(C), 2(G), and 3(T).
-int Read_All_Sequences(HITS_DB *db, int ascii)
+int Read_All_Sequences(DAZZ_DB *db, int ascii)
{ FILE *bases;
int nreads = db->nreads;
- HITS_READ *reads = db->reads;
+ DAZZ_READ *reads = db->reads;
void (*translate)(char *s);
char *seq;
@@ -1713,6 +1884,16 @@ int Read_All_Sequences(HITS_DB *db, int ascii)
return (0);
}
+// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
+// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
+// pointed at by path, and the suffix of the path by extension. The . proceeds the root
+// name if the defined constant HIDE_FILES is set. Always the first call is with the
+// path "prefix/root.[db|dam]" and extension "db" or "dam". There will always be calls for
+// "prefix/[.]root.idx" and "prefix/[.]root.bps". All other calls are for *tracks* and
+// so this routine gives one a way to know all the tracks associated with a given DB.
+// -1 is returned if the path could not be found, and 1 is returned if an error (reported
+// to EPLACE) occured and INTERACTIVE is defined. Otherwise a 0 is returned.
+
int List_DB_Files(char *path, void actor(char *path, char *extension))
{ int status, plen, rlen, dlen;
char *root, *pwd, *name;
@@ -1750,19 +1931,9 @@ int List_DB_Files(char *path, void actor(char *path, char *extension))
{ isdam = 1;
break;
}
- if (strcasecmp(name,Catenate("","",root,".db")) == 0)
- { strncpy(root,name,rlen);
- break;
- }
- if (strcasecmp(name,Catenate("","",root,".dam")) == 0)
- { strncpy(root,name,rlen);
- isdam = 1;
- break;
- }
}
if (dp == NULL)
- { EPRINTF(EPLACE,"%s: Cannot find %s (List_DB_Files)\n",Prog_Name,pwd);
- status = -1;
+ { status = -1;
closedir(dirp);
goto error;
}
diff --git a/DB.h b/DB.h
index dc281de..ff41e9f 100644
--- a/DB.h
+++ b/DB.h
@@ -12,9 +12,9 @@
*
********************************************************************************************/
-#ifndef _HITS_DB
+#ifndef _DAZZ_DB
-#define _HITS_DB
+#define _DAZZ_DB
#include <stdio.h>
@@ -74,11 +74,6 @@ extern char Ebuffer[];
#endif
-#define SYSTEM_ERROR \
- { EPRINTF(EPLACE,"%s: System error, read failed!\n",Prog_Name); \
- exit (2); \
- }
-
#define ARG_INIT(name) \
Prog_Name = Strdup(name,""); \
for (i = 0; i < 128; i++) \
@@ -125,6 +120,108 @@ extern char Ebuffer[];
exit (1); \
}
+
+/*******************************************************************************************
+ *
+ * GUARDED BATCH IO MACROS
+ *
+ ********************************************************************************************/
+
+ // Utilitieis
+
+int Count_Args(char *arg);
+
+#define SYSTEM_READ_ERROR \
+ { fprintf(stderr,"%s: System error, read failed!\n",Prog_Name); \
+ exit (2); \
+ }
+
+#define SYSTEM_WRITE_ERROR \
+ { fprintf(stderr,"%s: System error, write failed!\n",Prog_Name); \
+ exit (2); \
+ }
+
+#define SYSTEM_CLOSE_ERROR \
+ { fprintf(stderr,"%s: System error, file close failed!\n",Prog_Name); \
+ exit (2); \
+ }
+
+ // Output
+
+#define FWRITE(v,s,n,file) \
+ { if (fwrite(v,s,n,file) != (size_t) n) \
+ SYSTEM_WRITE_ERROR \
+ }
+
+#define FPRINTF(file,...) \
+ { if (fprintf(file,__VA_ARGS__) < 0) \
+ SYSTEM_WRITE_ERROR \
+ }
+
+#define PRINTF(...) \
+ { if (printf(__VA_ARGS__) < 0) \
+ SYSTEM_WRITE_ERROR \
+ }
+
+#define FPUTS(x,file) \
+ { if (fputs(x,file) == EOF) \
+ SYSTEM_WRITE_ERROR \
+ }
+
+ // Close
+
+#define FCLOSE(file) \
+ { if (fclose(file) != 0) \
+ SYSTEM_CLOSE_ERROR \
+ }
+
+ // Input
+
+#define FREAD(v,s,n,file) \
+ { if (fread(v,s,n,file) != (size_t) n) \
+ { if (ferror(file)) \
+ SYSTEM_READ_ERROR \
+ else \
+ { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \
+ exit (1); \
+ } \
+ } \
+ }
+
+#define FSCANF(file,...) \
+ { if (fscanf(file,__VA_ARGS__) != Count_Args(#__VA_ARGS__)-1) \
+ { if (ferror(file)) \
+ SYSTEM_READ_ERROR \
+ else \
+ { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \
+ exit (1); \
+ } \
+ } \
+ }
+
+#define FGETS(v,n,file) \
+ { if (fgets(v,n,file) == NULL) \
+ { if (ferror(file)) \
+ SYSTEM_READ_ERROR \
+ else \
+ { fprintf(stderr,"%s: The file %s is corrupted\n",Prog_Name,file ## _name); \
+ exit (1); \
+ } \
+ } \
+ }
+
+#define FSEEKO(file,p,d) \
+ { if (fseeko(file,p,d) < 0) \
+ SYSTEM_READ_ERROR \
+ }
+
+#define FTELLO(file) \
+ ( { int x = ftello(file); \
+ if (x < 0) \
+ SYSTEM_READ_ERROR \
+ ; x; \
+ } )
+
/*******************************************************************************************
*
* UTILITIES
@@ -193,7 +290,7 @@ typedef struct
// Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
// 4 compressed shorts containing snr info if an arrow DB.
int flags; // QV of read + flags above (DB only)
- } HITS_READ;
+ } DAZZ_READ;
// A track can be of 3 types:
// data == NULL: there are nreads 'anno' records of size 'size'.
@@ -208,9 +305,31 @@ typedef struct _track
int size; // Size in bytes of anno records
void *anno; // over [0,nreads]: read i annotation: int, int64, or 'size' records
void *data; // data[anno[i] .. anno[i+1]-1] is data if data != NULL
- } HITS_TRACK;
+ } DAZZ_TRACK;
+
+// The tailing part of a .anno track file can contain meta-information produced by the
+// command that produced the track. For example, the coverage, or good/bad parameters
+// for trimming, or even say a histogram of QV values. Each item is an array of 'nelem'
+// 64-bit ints or floats ('vtype' = DB_INT or DB_REAL), has a 'name' string that
+// describes it, and an indicator as to whether the values should be equal accross all
+// block tracks, or summed accross all block tracks (by Catrack). 'value' points at the
+// array of values
+
+#define DB_INT 0
+#define DB_REAL 1
-// The information for accessing QV streams is in a HITS_QV record that is a "pseudo-track"
+#define DB_EXACT 0
+#define DB_SUM 1
+
+typedef struct
+ { int vtype; // INT64 or FLOAST64
+ int nelem; // >= 1
+ int accum; // EXACT, SUM
+ char *name;
+ void *value;
+ } DAZZ_EXTRA;
+
+// The information for accessing QV streams is in a DAZZ_QV record that is a "pseudo-track"
// named ". at qvs" and is always the first track record in the list (if present). Since normal
// track names cannot begin with a . (this is enforced), this pseudo-track is never confused
// with a normal track.
@@ -223,11 +342,11 @@ typedef struct
uint16 *table; // for i in [0,db->nreads-1]: read i should be decompressed with
// scheme coding[table[i]]
FILE *quiva; // the open file pointer to the .qvs file
- } HITS_QV;
+ } DAZZ_QV;
// The DB record holds all information about the current state of an active DB including an
-// array of HITS_READS, one per read, and a linked list of HITS_TRACKs the first of which
-// is always a HITS_QV pseudo-track (if the QVs have been loaded).
+// array of DAZZ_READS, one per read, and a linked list of DAZZ_TRACKs the first of which
+// is always a DAZZ_QV pseudo-track (if the QVs have been loaded).
typedef struct
{ int ureads; // Total number of reads in untrimmed DB
@@ -257,9 +376,9 @@ typedef struct
int loaded; // Are reads loaded in memory?
void *bases; // file pointer for bases file (to fetch reads from),
// or memory pointer to uncompressed block of all sequences.
- HITS_READ *reads; // Array [-1..nreads] of HITS_READ
- HITS_TRACK *tracks; // Linked list of loaded tracks
- } HITS_DB;
+ DAZZ_READ *reads; // Array [-1..nreads] of DAZZ_READ
+ DAZZ_TRACK *tracks; // Linked list of loaded tracks
+ } DAZZ_DB;
/*******************************************************************************************
@@ -294,7 +413,7 @@ typedef struct
// contain N-separated contigs), and .fpulse the first base of the contig in the
// fasta entry
- // Open the given database or dam, "path" into the supplied HITS_DB record "db". If the name has
+ // Open the given database or dam, "path" into the supplied DAZZ_DB record "db". If the name has
// a part # in it then just the part is opened. The index array is allocated (for all or
// just the part) and read in.
// Return status of routine:
@@ -302,34 +421,34 @@ typedef struct
// 0: Open of DB proceeded without mishap
// 1: Open of DAM proceeded without mishap
-int Open_DB(char *path, HITS_DB *db);
+int Open_DB(char *path, DAZZ_DB *db);
// Trim the DB or part thereof and all loaded tracks according to the cutoff and all settings
// of the current DB partition. Reallocate smaller memory blocks for the information kept
// for the retained reads.
-void Trim_DB(HITS_DB *db);
+void Trim_DB(DAZZ_DB *db);
// Shut down an open 'db' by freeing all associated space, including tracks and QV structures,
// and any open file pointers. The record pointed at by db however remains (the user
// supplied it and so should free it).
-void Close_DB(HITS_DB *db);
+void Close_DB(DAZZ_DB *db);
// Return the size in bytes of the given DB
-int64 sizeof_DB(HITS_DB *db);
+int64 sizeof_DB(DAZZ_DB *db);
// If QV pseudo track is not already in db's track list, then load it and set it up.
// The database must not have been trimmed yet. -1 is returned if a .qvs file is not
// present, and 1 is returned if an error (reported to EPLACE) occured and INTERACTIVE
// is defined. Otherwise a 0 is returned.
-int Load_QVs(HITS_DB *db);
+int Load_QVs(DAZZ_DB *db);
// Remove the QV pseudo track, all space associated with it, and close the .qvs file.
-void Close_QVs(HITS_DB *db);
+void Close_QVs(DAZZ_DB *db);
// Look up the file and header in the file of the indicated track. Return:
// 1: Track is for trimmed DB
@@ -344,28 +463,47 @@ void Close_QVs(HITS_DB *db);
#define CUSTOM_TRACK 0
#define MASK_TRACK 1
-int Check_Track(HITS_DB *db, char *track, int *kind);
+int Check_Track(DAZZ_DB *db, char *track, int *kind);
// If track is not already in the db's track list, then allocate all the storage for it,
// read it in from the appropriate file, add it to the track list, and return a pointer
- // to the newly created HITS_TRACK record. If the track does not exist or cannot be
+ // to the newly created DAZZ_TRACK record. If the track does not exist or cannot be
// opened for some reason, then NULL is returned if INTERACTIVE is defined. Otherwise
// the routine prints an error message to stderr and exits if an error occurs, and returns
// with NULL only if the track does not exist.
-HITS_TRACK *Load_Track(HITS_DB *db, char *track);
+DAZZ_TRACK *Load_Track(DAZZ_DB *db, char *track);
+
+ // Assumming file pointer for afile is correctly positioned at the start of a extra item,
+ // and aname is the name of the .anno file, decode the value present and places it in
+ // extra if extra->nelem == 0, otherwise reduce the value just read into extra according
+ // according the to the directive given by 'accum'. Leave the read poinrt at the next
+ // extra or end-of-file.
+ // Returns:
+ // 1 if at the end of file,
+ // 0 if item was read and folded correctly,
+ // -1 if there was a system IO or allocation error (if interactive), and
+ // -2 if the new value could not be reduced into the currenct value of extra (interactive)
+
+int Read_Extra(FILE *afile, char *aname, DAZZ_EXTRA *extra);
+
+// Write extra record to end of file afile and advance write pointer
+// If interactive, then return non-zero on error, if bash, then print
+// and halt if an error
+
+int Write_Extra(FILE *afile, DAZZ_EXTRA *extra);
// If track is on the db's track list, then it is removed and all storage associated with it
// is freed.
-void Close_Track(HITS_DB *db, char *track);
+void Close_Track(DAZZ_DB *db, char *track);
// Allocate and return a buffer big enough for the largest read in 'db'.
// **NB** free(x-1) if x is the value returned as *prefix* and suffix '\0'(4)-byte
// are needed by the alignment algorithms. If cannot allocate memory then return NULL
// if INTERACTIVE is defined, or print error to stderr and exit otherwise.
-char *New_Read_Buffer(HITS_DB *db);
+char *New_Read_Buffer(DAZZ_DB *db);
// Load into 'read' the i'th read in 'db'. As a lower case ascii string if ascii is 1, an
// upper case ascii string if ascii is 2, and a numeric string over 0(A), 1(C), 2(G), and 3(T)
@@ -373,12 +511,12 @@ char *New_Read_Buffer(HITS_DB *db);
// for traversals in either direction. A non-zero value is returned if an error occured
// and INTERACTIVE is defined.
-int Load_Read(HITS_DB *db, int i, char *read, int ascii);
+int Load_Read(DAZZ_DB *db, int i, char *read, int ascii);
// Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
// and there is only a choice between numeric (0) or ascii (1);
-int Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+int Load_Arrow(DAZZ_DB *db, int i, char *read, int ascii);
// Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
// the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii
@@ -387,7 +525,7 @@ int Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
// the string holding the substring so it has a delimeter for traversals in either direction.
// A NULL pointer is returned if an error occured and INTERACTIVE is defined.
-char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii);
+char *Load_Subread(DAZZ_DB *db, int i, int beg, int end, char *read, int ascii);
// Allocate a set of 5 vectors large enough to hold the longest QV stream that will occur
// in the database. If cannot allocate memory then return NULL if INTERACTIVE is defined,
@@ -399,13 +537,13 @@ char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii);
#define SUB_QV 3 // The substitution QVs
#define MRG_QV 4 // The merge QVs
-char **New_QV_Buffer(HITS_DB *db);
+char **New_QV_Buffer(DAZZ_DB *db);
// Load into 'entry' the 5 QV vectors for i'th read in 'db'. The deletion tag or characters
// are converted to a numeric or upper/lower case ascii string as per ascii. Return with
// a zero, except when an error occurs and INTERACTIVE is defined in which case return wtih 1.
-int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
+int Load_QVentry(DAZZ_DB *db, int i, char **entry, int ascii);
// Allocate a block big enough for all the uncompressed sequences, read them into it,
// reset the 'off' in each read record to be its in-memory offset, and set the
@@ -415,7 +553,7 @@ int Load_QVentry(HITS_DB *db, int i, char **entry, int ascii);
// Return with a zero, except when an error occurs and INTERACTIVE is defined in which
// case return wtih 1.
-int Read_All_Sequences(HITS_DB *db, int ascii);
+int Read_All_Sequences(DAZZ_DB *db, int ascii);
// For the DB or DAM "path" = "prefix/root.[db|dam]", find all the files for that DB, i.e. all
// those of the form "prefix/[.]root.part" and call actor with the complete path to each file
@@ -429,4 +567,4 @@ int Read_All_Sequences(HITS_DB *db, int ascii);
int List_DB_Files(char *path, void actor(char *path, char *extension));
-#endif // _HITS_DB
+#endif // _DAZZ_DB
diff --git a/HPC.daligner.c b/HPC.daligner.c
index 578a9da..b700f9f 100644
--- a/HPC.daligner.c
+++ b/HPC.daligner.c
@@ -81,12 +81,12 @@ void daligner_script(int argc, char *argv[])
}
if (fscanf(dbvis,"files = %d\n",&nfiles) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
for (i = 0; i < nfiles; i++)
{ char buffer[30001];
if (fgets(buffer,30000,dbvis) == NULL)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
useblock = 1;
@@ -731,12 +731,12 @@ void mapper_script(int argc, char *argv[])
}
if (fscanf(dbvis,"files = %d\n",&nfiles) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
for (i = 0; i < nfiles; i++)
{ char buffer[30001];
if (fgets(buffer,30000,dbvis) == NULL)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
useblock1 = 1;
@@ -773,12 +773,12 @@ void mapper_script(int argc, char *argv[])
}
if (fscanf(dbvis,"files = %d\n",&nfiles) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
for (i = 0; i < nfiles; i++)
{ char buffer[30001];
if (fgets(buffer,30000,dbvis) == NULL)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
useblock2 = 1;
@@ -959,18 +959,18 @@ void mapper_script(int argc, char *argv[])
{ if (useblock1 || usepath2)
{ fprintf(out," && mv %s",root2);
if (useblock2)
- fprintf(out,".%d",i);
+ fprintf(out,".%d.las",i);
if (useblock1)
- fprintf(out,".%s.1 ",root1);
+ fprintf(out,".%s.1.las ",root1);
else
- fprintf(out,".%s ",root1);
+ fprintf(out,".%s.las ",root1);
if (useblock1)
{ if (usepath2)
fprintf(out,"%s/",pwd2);
fprintf(out,"%s",root2);
if (useblock2)
fprintf(out,".%d",i);
- fprintf(out,".%s",root1);
+ fprintf(out,".%s.las",root1);
}
else
fprintf(out,"%s",pwd2);
diff --git a/LAcat.c b/LAcat.c
index c0f61f8..66136cd 100644
--- a/LAcat.c
+++ b/LAcat.c
@@ -90,10 +90,10 @@ int main(int argc, char *argv[])
if ((input = fopen(name,"r")) == NULL) break;
if (fread(&povl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
novl += povl;
if (fread(&mspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (i == 0)
{ tspace = mspace;
if (tspace <= TRACE_XOVR && tspace != 0)
@@ -109,9 +109,9 @@ int main(int argc, char *argv[])
fclose(input);
}
if (fwrite(&novl,sizeof(int64),1,stdout) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fwrite(&tspace,sizeof(int),1,stdout) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
{ int i, j;
@@ -129,9 +129,9 @@ int main(int argc, char *argv[])
if ((input = fopen(name,"r")) == NULL) break;
if (fread(&povl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&mspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (VERBOSE)
fprintf(stderr," Concatenating %s: %lld la\'s\n",Numbered_Suffix(root,i+1,root2),povl);
@@ -154,7 +154,7 @@ int main(int argc, char *argv[])
if (optr + ovlsize + tsize > otop)
{ if (fwrite(oblock,1,optr-oblock,stdout) != (size_t) (optr-oblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
optr = oblock;
}
@@ -181,7 +181,7 @@ int main(int argc, char *argv[])
if (optr > oblock)
{ if (fwrite(oblock,1,optr-oblock,stdout) != (size_t) (optr-oblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
}
diff --git a/LAcheck.c b/LAcheck.c
index 4c878ac..b0f8be0 100644
--- a/LAcheck.c
+++ b/LAcheck.c
@@ -23,8 +23,8 @@ static char *Usage = "[-vS] <src1:db|dam> [ <src2:db|dam> ] <align:las> ...";
#define MEMORY 1000 // How many megabytes for output buffer
int main(int argc, char *argv[])
-{ HITS_DB _db1, *db1 = &_db1;
- HITS_DB _db2, *db2 = &_db2;
+{ DAZZ_DB _db1, *db1 = &_db1;
+ DAZZ_DB _db2, *db2 = &_db2;
int VERBOSE;
int SORTED;
int ISTWO;
@@ -103,9 +103,9 @@ int main(int argc, char *argv[])
{ char *iblock;
int64 bsize, ovlsize, ptrsize;
int i, j;
- HITS_READ *reads1 = db1->reads;
+ DAZZ_READ *reads1 = db1->reads;
int nreads1 = db1->nreads;
- HITS_READ *reads2 = db2->reads;
+ DAZZ_READ *reads2 = db2->reads;
int nreads2 = db2->nreads;
// Setup IO buffers
@@ -138,9 +138,9 @@ int main(int argc, char *argv[])
goto error;
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (novl < 0)
{ if (VERBOSE)
fprintf(stderr," %s: Number of alignments < 0\n",root);
diff --git a/LAdump.c b/LAdump.c
index 8931e5c..09e098d 100644
--- a/LAdump.c
+++ b/LAdump.c
@@ -22,7 +22,7 @@
#include "align.h"
static char *Usage =
- "[-cdtlo] <src1:db|dam> [ <src2:db|dam> ] <align:las> [ <reads:FILE> | <reads:range> ... ]";
+ "[-cdtlo] <src1:db|dam> [<src2:db|dam>] <align:las> [<reads:FILE> | <reads:range> ...]";
#define LAST_READ_SYMBOL '$'
@@ -33,8 +33,8 @@ static int ORDER(const void *l, const void *r)
}
int main(int argc, char *argv[])
-{ HITS_DB _db1, *db1 = &_db1;
- HITS_DB _db2, *db2 = &_db2;
+{ DAZZ_DB _db1, *db1 = &_db1;
+ DAZZ_DB _db2, *db2 = &_db2;
Overlap _ovl, *ovl = &_ovl;
FILE *input;
@@ -79,16 +79,22 @@ int main(int argc, char *argv[])
if (argc <= 2)
{ fprintf(stderr,"Usage: %s %s\n",Prog_Name,Usage);
fprintf(stderr,"\n");
- fprintf(stderr," P #a #b #o #c - (#a,#b^#o) have an LA between them where #o is 'n' or 'c' and\n");
- fprintf(stderr," #c is '>' (start of best chain), '+' (start of alternate chain),\n");
- fprintf(stderr," '-' (continuation of chain), or '.' (no chains in file).\n");
+ fprintf(stderr," P #a #b #o #c -");
+ fprintf(stderr," (#a,#b^#o) have an LA between them where #o is 'n' or 'c' and\n");
+ fprintf(stderr," ");
+ fprintf(stderr," #c is '>' (start of best chain), '+' (start of alternate chain),\n");
+ fprintf(stderr," ");
+ fprintf(stderr," '-' (continuation of chain), or '.' (no chains in file).\n");
fprintf(stderr,"\n");
fprintf(stderr," -c: C #ab #ae #bb #be - #a[#ab,#ae] aligns with #b^#o[#bb,#be]\n");
fprintf(stderr," -d: D # - there are # differences in the LA\n");
- fprintf(stderr," -t: T #n - there are #n trace point intervals for the LA\n");
- fprintf(stderr," (#d #y )^#n - there are #d difference aligning the #y bp's of B with the\n");
+ fprintf(stderr," -t: T #n -");
+ fprintf(stderr," there are #n trace point intervals for the LA\n");
+ fprintf(stderr," (#d #y )^#n -");
+ fprintf(stderr," there are #d difference aligning the #y bp's of B with the\n");
fprintf(stderr," next fixed-size interval of A\n");
- fprintf(stderr," -l: L #la #lb - #la is the length of the a-read and #lb that of the b-read\n");
+ fprintf(stderr," -l: L #la #lb -");
+ fprintf(stderr," #la is the length of the a-read and #lb that of the b-read\n");
fprintf(stderr,"\n");
fprintf(stderr," -o: Output proper overlaps only\n");
@@ -274,9 +280,9 @@ int main(int argc, char *argv[])
exit (1);
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (tspace <= TRACE_XOVR && tspace != 0)
{ small = 1;
@@ -387,7 +393,7 @@ int main(int argc, char *argv[])
{ int j, k;
uint16 *trace;
int in, npt, idx, ar;
- HITS_READ *read1, *read2;
+ DAZZ_READ *read1, *read2;
rewind(input);
fread(&novl,sizeof(int64),1,input);
diff --git a/LAindex.c b/LAindex.c
index 897e49d..cd6303e 100644
--- a/LAindex.c
+++ b/LAindex.c
@@ -80,9 +80,9 @@ int main(int argc, char *argv[])
exit (1);
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
diff --git a/LAmerge.c b/LAmerge.c
index 70e42e7..23624a6 100644
--- a/LAmerge.c
+++ b/LAmerge.c
@@ -252,10 +252,10 @@ int main(int argc, char *argv[])
free(root);
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
totl += novl;
if (fread(&mspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (i == 0)
{ tspace = mspace;
if (tspace <= TRACE_XOVR && tspace != 0)
@@ -288,9 +288,9 @@ int main(int argc, char *argv[])
free(root);
if (fwrite(&totl,sizeof(int64),1,output) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fwrite(&tspace,sizeof(int),1,output) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
oblock = block+fway*bsize;
optr = oblock;
@@ -353,7 +353,7 @@ int main(int argc, char *argv[])
ovl_reload(src,bsize);
if (optr + span > otop)
{ if (fwrite(oblock,1,optr-oblock,output) != (size_t) (optr-oblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
optr = oblock;
}
@@ -378,7 +378,7 @@ int main(int argc, char *argv[])
if (optr > oblock)
{ if (fwrite(oblock,1,optr-oblock,output) != (size_t) (optr-oblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
fclose(output);
diff --git a/LAshow.c b/LAshow.c
index 13c85fe..d094c01 100644
--- a/LAshow.c
+++ b/LAshow.c
@@ -35,8 +35,8 @@ static int ORDER(const void *l, const void *r)
}
int main(int argc, char *argv[])
-{ HITS_DB _db1, *db1 = &_db1;
- HITS_DB _db2, *db2 = &_db2;
+{ DAZZ_DB _db1, *db1 = &_db1;
+ DAZZ_DB _db2, *db2 = &_db2;
Overlap _ovl, *ovl = &_ovl;
Alignment _aln, *aln = &_aln;
@@ -284,9 +284,9 @@ int main(int argc, char *argv[])
exit (1);
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (tspace < 0)
{ fprintf(stderr,"%s: Garbage .las file, trace spacing < 0 !\n",Prog_Name);
exit (1);
@@ -399,6 +399,15 @@ int main(int argc, char *argv[])
ovl->path.trace = (void *) trace;
Read_Trace(input,ovl,tbytes);
+ if (ovl->aread >= db1->nreads)
+ { fprintf(stderr,"%s: A-read is out-of-range of DB %s\n",Prog_Name,argv[1]);
+ exit (1);
+ }
+ if (ovl->bread >= db2->nreads)
+ { fprintf(stderr,"%s: B-read is out-of-range of DB %s\n",Prog_Name,argv[1+ISTWO]);
+ exit (1);
+ }
+
// Determine if it should be displayed
ar = ovl->aread+1;
diff --git a/LAsort.c b/LAsort.c
index 72ca23e..7803b97 100644
--- a/LAsort.c
+++ b/LAsort.c
@@ -160,9 +160,9 @@ int main(int argc, char *argv[])
size = info.st_size;
if (fread(&novl,sizeof(int64),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
@@ -183,9 +183,9 @@ int main(int argc, char *argv[])
exit (1);
if (fwrite(&novl,sizeof(int64),1,foutput) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fwrite(&tspace,sizeof(int),1,foutput) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
free(pwd);
free(root);
@@ -203,7 +203,7 @@ int main(int argc, char *argv[])
size -= (sizeof(int64) + sizeof(int));
if (size > 0)
{ if (fread(iblock,size,1,input) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
fclose(input);
iend = iblock + (size - ptrsize);
@@ -261,7 +261,7 @@ int main(int argc, char *argv[])
span = ovlsize + tsize;
if (fptr + span > ftop)
{ if (fwrite(fblock,1,fptr-fblock,foutput) != (size_t) (fptr-fblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
fptr = fblock;
}
memmove(fptr,((char *) w)+ptrsize,ovlsize);
@@ -274,7 +274,7 @@ int main(int argc, char *argv[])
}
if (fptr > fblock)
{ if (fwrite(fblock,1,fptr-fblock,foutput) != (size_t) (fptr-fblock))
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
}
diff --git a/LAsplit.c b/LAsplit.c
index aceb647..40ca712 100644
--- a/LAsplit.c
+++ b/LAsplit.c
@@ -81,19 +81,19 @@ int main(int argc, char *argv[])
free(root);
if (fscanf(dbvis,DB_NFILE,&nfiles) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
while (nfiles-- > 0)
if (fgets(buffer,2*MAX_NAME+100,dbvis) == NULL)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
parts = 0;
if (fscanf(dbvis,DB_NBLOCK,&parts) != 1)
{ fprintf(stderr,"%s: DB %s has not been partitioned\n",Prog_Name,argv[2]);
exit (1);
}
if (fscanf(dbvis,DB_PARAMS,&size,&cutoff,&all) != 3)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fscanf(dbvis,DB_BDATA,&olast,&blast) != 2)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
}
else
{ dbvis = NULL;
@@ -128,9 +128,9 @@ int main(int argc, char *argv[])
*root2++ = '\0';
if (fread(&novl,sizeof(int64),1,stdin) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (fread(&tspace,sizeof(int),1,stdin) != 1)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
if (tspace <= TRACE_XOVR && tspace != 0)
tbytes = sizeof(uint8);
else
@@ -139,9 +139,9 @@ int main(int argc, char *argv[])
if (VERBOSE)
fprintf(stderr," Distributing %lld la\'s\n",novl);
- { int i, j;
+ { int i;
Overlap *w;
- int64 low, hgh, last;
+ int64 j, low, hgh, last;
int64 tsize, povl;
char *iptr, *itop;
char *optr, *otop;
@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
low = hgh;
if (dbvis != NULL)
{ if (fscanf(dbvis,DB_BDATA,&olast,&blast) != 2)
- SYSTEM_ERROR
+ SYSTEM_READ_ERROR
last = blast-1;
hgh = 0;
}
diff --git a/align.c b/align.c
index d4fdfe8..e408eb9 100644
--- a/align.c
+++ b/align.c
@@ -161,6 +161,7 @@ static double Bias_Factor[10] = { .690, .690, .690, .690, .780,
typedef struct
{ double ave_corr;
int trace_space;
+ int reach;
float freq[4];
int ave_path;
int16 *score;
@@ -196,7 +197,7 @@ static void set_table(int bit, int prefix, int score, int max, Table_Bits *parms
/* Create an alignment specification record including path tip tables & values */
-Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq)
+Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq, int reach)
{ _Align_Spec *spec;
Table_Bits parms;
double match;
@@ -208,6 +209,7 @@ Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq)
spec->ave_corr = ave_corr;
spec->trace_space = trace_space;
+ spec->reach = reach;
spec->freq[0] = freq[0];
spec->freq[1] = freq[1];
spec->freq[2] = freq[2];
@@ -257,6 +259,9 @@ int Trace_Spacing(Align_Spec *espec)
float *Base_Frequencies(Align_Spec *espec)
{ return (((_Align_Spec *) espec)->freq); }
+int Overlap_If_Possible(Align_Spec *espec)
+{ return (((_Align_Spec *) espec)->reach); }
+
/****************************************************************************************\
* *
@@ -343,6 +348,7 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int TRACE_SPACE = spec->trace_space;
int PATH_AVE = spec->ave_path;
+ int REACH = spec->reach;
int16 *SCORE = spec->score;
int16 *TABLE = spec->table;
@@ -874,7 +880,7 @@ static int forward_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int a, b, k, h;
int d, e;
- if (morem >= 0)
+ if (morem >= 0 && REACH)
{ trimx = morea-morey;
trimy = morey;
trimd = mored;
@@ -1004,6 +1010,7 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int TRACE_SPACE = spec->trace_space;
int PATH_AVE = spec->ave_path;
+ int REACH = spec->reach;
int16 *SCORE = spec->score;
int16 *TABLE = spec->table;
@@ -1527,7 +1534,7 @@ static int reverse_wave(_Work_Data *work, _Align_Spec *spec, Alignment *align, P
int a, b, k, h;
int d, e;
- if (morem >= 0)
+ if (morem >= 0 && REACH)
{ trimx = morea-morey;
trimy = morey;
trimd = mored;
@@ -4194,7 +4201,10 @@ static int ToA[4] = { 'a', 'c', 'g', 't' };
#endif
-static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
+static char *TP_Align =
+ "Bad alignment between trace points (Compute_Trace), source DB likely incorrect";
+
+static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode, int dmax)
{ int **PVF = wave->PVF;
int **PHF = wave->PHF;
int D;
@@ -4226,17 +4236,21 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
hgh = 0;
}
- posl = -INT32_MAX;
- posh = INT32_MAX;
+ posl = -dmax;
+ posh = dmax;
if (wave->Aabs == wave->Babs)
{ if (B == A)
- { EPRINTF(EPLACE,"Error: self comparison starts on diagonal 0 (Compute_Trace)\n");
+ { EPRINTF(EPLACE,"%s: self comparison starts on diagonal 0 (Compute_Trace)\n",Prog_Name);
EXIT(-1);
}
else if (B < A)
- posl = (B-A)+1;
+ { if ((B-A)+1 > posl)
+ posl = (B-A)+1;
+ }
else
- posh = (B-A)-1;
+ { if ((B-A)-1 < posh)
+ posh = (B-A)-1;
+ }
}
F1 = PVF[-2];
@@ -4254,6 +4268,11 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
int am, ac, ap;
char *a;
+ if (D > dmax)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Align);
+ EXIT(-1);
+ }
+
F2 = F1;
F1 = F0;
F0 = PVF[D];
@@ -4523,7 +4542,7 @@ static int iter_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
return (D + abs(del));
}
-static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode)
+static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode, int dmax)
{ int **PVF = wave->PVF;
int **PHF = wave->PHF;
int D;
@@ -4555,17 +4574,21 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
hgh = 0;
}
- posl = -INT32_MAX;
- posh = INT32_MAX;
+ posl = -dmax;
+ posh = dmax;
if (wave->Aabs == wave->Babs)
{ if (B == A)
- { EPRINTF(EPLACE,"Error: self comparison starts on diagonal 0 (Compute_Trace)\n");
+ { EPRINTF(EPLACE,"%s: self comparison starts on diagonal 0 (Compute_Trace)\n",Prog_Name);
EXIT(1);
}
else if (B < A)
- posl = (B-A)+1;
+ { if ((B-A)+1 > posl)
+ posl = (B-A)+1;
+ }
else
- posh = (B-A)-1;
+ { if ((B-A)-1 < posh)
+ posh = (B-A)-1;
+ }
}
F1 = PVF[-2];
@@ -4583,6 +4606,11 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
int am, ac, ap;
char *a;
+ if (D > dmax)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Align);
+ EXIT(-1);
+ }
+
F2 = F1;
F1 = F0;
F0 = PVF[D];
@@ -4795,14 +4823,20 @@ static int middle_np(char *A, int M, char *B, int N, Trace_Waves *wave, int mode
* *
\****************************************************************************************/
+static char *TP_Error = "Trace point out of bounds (Compute_Trace), source DB likely incorrect";
+
int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
{ _Work_Data *work = (_Work_Data *) ework;
Trace_Waves wave;
Path *path;
char *aseq, *bseq;
+ int alen, blen;
int M, N, D;
+ int dmax;
+ alen = align->alen;
+ blen = align->blen;
path = align->path;
aseq = align->aseq;
bseq = align->bseq;
@@ -4812,7 +4846,6 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
{ int64 s;
int d;
- int dmax;
int **PVF, **PHF;
if (M < N)
@@ -4851,7 +4884,11 @@ int Compute_Trace_ALL(Alignment *align, Work_Data *ework)
wave.Aabs = aseq;
wave.Babs = bseq;
- D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST);
+ if (path->aepos > alen || path->bepos > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ D = iter_np(aseq+path->abpos,M,bseq+path->bbpos,N,&wave,GREEDIEST,dmax);
if (D < 0)
EXIT(1);
path->diffs = D;
@@ -4867,12 +4904,15 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
Path *path;
char *aseq, *bseq;
+ int alen, blen;
uint16 *points;
int tlen;
int ab, bb;
int ae, be;
- int diffs;
+ int diffs, dmax;
+ alen = align->alen;
+ blen = align->blen;
path = align->path;
aseq = align->aseq;
bseq = align->bseq;
@@ -4882,7 +4922,7 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
{ int64 s;
int d;
int M, N;
- int dmax, nmax;
+ int nmax;
int **PVF, **PHF;
M = path->aepos-path->abpos;
@@ -4938,7 +4978,11 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
for (i = 1; i < tlen; i += 2)
{ ae = ae + trace_spacing;
be = bb + points[i];
- d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+ if (ae > alen || be > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
@@ -4947,7 +4991,11 @@ int Compute_Trace_PTS(Alignment *align, Work_Data *ework, int trace_spacing, int
}
ae = path->aepos;
be = path->bepos;
- d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+ if (ae > alen || be > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
@@ -4966,12 +5014,15 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
Path *path;
char *aseq, *bseq;
+ int alen, blen;
uint16 *points;
int tlen;
int ab, bb;
int ae, be;
- int diffs;
+ int diffs, dmax;
+ alen = align->alen;
+ blen = align->blen;
path = align->path;
aseq = align->aseq;
bseq = align->bseq;
@@ -4981,7 +5032,7 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
{ int64 s;
int d;
int M, N;
- int dmax, nmax;
+ int nmax;
int **PVF, **PHF;
M = path->aepos-path->abpos;
@@ -5039,11 +5090,15 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
for (i = 1; i < tlen; i += 2)
{ ae = ae + trace_spacing;
be = bb + points[i];
- if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
+ if (ae > alen || be > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax))
EXIT(1);
af = wave.mida;
bf = wave.midb;
- d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
+ d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
@@ -5056,18 +5111,22 @@ int Compute_Trace_MID(Alignment *align, Work_Data *ework, int trace_spacing, int
ae = path->aepos;
be = path->bepos;
- if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode))
+ if (ae > alen || be > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ if (middle_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax))
EXIT(1);
af = wave.mida;
bf = wave.midb;
- d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode);
+ d = iter_np(aseq+as,af-as,bseq+bs,bf-bs,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
as = af;
bs = bf;
- d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave,mode);
+ d += iter_np(aseq+af,ae-as,bseq+bf,be-bs,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
@@ -5086,12 +5145,15 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
Path *path;
char *aseq, *bseq;
+ int alen, blen;
uint16 *points;
int tlen;
int ab, bb;
int ae, be;
- int diffs;
+ int diffs, dmax;
+ alen = align->alen;
+ blen = align->blen;
path = align->path;
aseq = align->aseq;
bseq = align->bseq;
@@ -5101,7 +5163,7 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
{ int64 s;
int d;
int M, N;
- int mmax, nmax, dmax;
+ int mmax, nmax;
int **PVF, **PHF;
M = path->aepos-path->abpos;
@@ -5160,7 +5222,11 @@ int Compute_Trace_IRR(Alignment *align, Work_Data *ework, int mode)
for (i = 0; i < tlen; i += 2)
{ ae = ab + points[i];
be = bb + points[i+1];
- d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode);
+ if (ae > alen || be > blen)
+ { EPRINTF(EPLACE,"%s: %s\n",Prog_Name,TP_Error);
+ EXIT(1);
+ }
+ d = iter_np(aseq+ab,ae-ab,bseq+bb,be-bb,&wave,mode,dmax);
if (d < 0)
EXIT(1);
diffs += d;
diff --git a/align.h b/align.h
index daa9151..d5f49a9 100644
--- a/align.h
+++ b/align.h
@@ -138,6 +138,10 @@ typedef struct
#define CHAIN_NEXT(x) ((x) & NEXT_FLAG)
#define BEST_CHAIN(x) ((x) & BEST_FLAG)
+#define ELIM_FLAG 0x20 // This LA should be ignored
+
+#define ELIM(x) ((x) & ELIM_FLAG)
+
typedef struct
{ Path *path;
uint32 flags; /* Pipeline status and complementation flags */
@@ -173,7 +177,9 @@ void Complement_Seq(char *a, int n);
description of 'trace' for Paths above)
freq[4]: a 4-element vector where afreq[0] = frequency of A, f(A), freq[1] = f(C),
freq[2] = f(G), and freq[3] = f(T). This vector is part of the header
- of every HITS database (see db.h).
+ of every DAZZ database (see db.h).
+ reach: a boolean, if set alignment extend to the boundary when reasonable, otherwise
+ the terminate only at suffix-positive points.
If an alignment cannot reach the boundary of the d.p. matrix with this condition (i.e.
overlap), then the last/first 30 columns of the alignment are guaranteed to be
@@ -187,13 +193,14 @@ void Complement_Seq(char *a, int n);
typedef void Align_Spec;
- Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq);
+ Align_Spec *New_Align_Spec(double ave_corr, int trace_space, float *freq, int reach);
void Free_Align_Spec(Align_Spec *spec);
int Trace_Spacing (Align_Spec *spec);
double Average_Correlation(Align_Spec *spec);
float *Base_Frequencies (Align_Spec *spec);
+ int Overlap_If_Possible(Align_Spec *spec);
/* Local_Alignment finds the longest significant local alignment between the sequences in
'align' subject to:
@@ -303,7 +310,7 @@ void Complement_Seq(char *a, int n);
/*** OVERLAP ABSTRACTION:
Externally, between modules an Alignment is modeled by an "Overlap" record, which
- (a) replaces the pointers to the two sequences with their ID's in the HITS data bases,
+ (a) replaces the pointers to the two sequences with their ID's in the DAZZ data bases,
(b) does not contain the length of the 2 sequences (must fetch from DB), and
(c) contains its path as a subrecord rather than as a pointer (indeed, typically the
corresponding Alignment record points at the Overlap's path sub-record). The trace pointer
diff --git a/daligner.c b/daligner.c
index ef47d6b..4037fa1 100644
--- a/daligner.c
+++ b/daligner.c
@@ -179,13 +179,13 @@ static void reheap(int s, Event **heap, int hsize)
heap[c] = hs;
}
-static int64 merge_size(HITS_DB *block, int mtop)
+static int64 merge_size(DAZZ_DB *block, int mtop)
{ Event ev[mtop+1];
Event *heap[mtop+2];
int r, mhalf;
int64 nsize;
- { HITS_TRACK *track;
+ { DAZZ_TRACK *track;
int i;
track = block->tracks;
@@ -204,7 +204,7 @@ static int64 merge_size(HITS_DB *block, int mtop)
nsize = 0;
for (r = 0; r < block->nreads; r++)
{ int i, level, hsize;
- HITS_TRACK *track;
+ DAZZ_TRACK *track;
track = block->tracks;
for (i = 0; i < mtop; i++)
@@ -251,15 +251,15 @@ static int64 merge_size(HITS_DB *block, int mtop)
return (nsize);
}
-static HITS_TRACK *merge_tracks(HITS_DB *block, int mtop, int64 nsize)
-{ HITS_TRACK *ntrack;
+static DAZZ_TRACK *merge_tracks(DAZZ_DB *block, int mtop, int64 nsize)
+{ DAZZ_TRACK *ntrack;
Event ev[mtop+1];
Event *heap[mtop+2];
int r, mhalf;
int64 *anno;
int *data;
- ntrack = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),"Allocating merged track");
+ ntrack = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),"Allocating merged track");
if (ntrack == NULL)
exit (1);
ntrack->name = Strdup("merge","Allocating merged track");
@@ -270,7 +270,7 @@ static HITS_TRACK *merge_tracks(HITS_DB *block, int mtop, int64 nsize)
if (anno == NULL || data == NULL || ntrack->name == NULL)
exit (1);
- { HITS_TRACK *track;
+ { DAZZ_TRACK *track;
int i;
track = block->tracks;
@@ -289,7 +289,7 @@ static HITS_TRACK *merge_tracks(HITS_DB *block, int mtop, int64 nsize)
nsize = 0;
for (r = 0; r < block->nreads; r++)
{ int i, level, hsize;
- HITS_TRACK *track;
+ DAZZ_TRACK *track;
anno[r] = nsize;
@@ -339,7 +339,7 @@ static HITS_TRACK *merge_tracks(HITS_DB *block, int mtop, int64 nsize)
return (ntrack);
}
-static int read_DB(HITS_DB *block, char *name, char **mask, int *mstat, int mtop, int kmer)
+static int read_DB(DAZZ_DB *block, char *name, char **mask, int *mstat, int mtop, int kmer)
{ int i, isdam, status, kind, stop;
isdam = Open_DB(name,block);
@@ -367,7 +367,7 @@ static int read_DB(HITS_DB *block, char *name, char **mask, int *mstat, int mtop
stop = 0;
for (i = 0; i < mtop; i++)
- { HITS_TRACK *track;
+ { DAZZ_TRACK *track;
int64 *anno;
int j;
@@ -384,7 +384,7 @@ static int read_DB(HITS_DB *block, char *name, char **mask, int *mstat, int mtop
if (stop > 1)
{ int64 nsize;
- HITS_TRACK *track;
+ DAZZ_TRACK *track;
nsize = merge_size(block,stop);
track = merge_tracks(block,stop,nsize);
@@ -425,10 +425,10 @@ static void complement(char *s, int len)
*s = (char) (3-*s);
}
-static HITS_DB *complement_DB(HITS_DB *block, int inplace)
-{ static HITS_DB _cblock, *cblock = &_cblock;
+static DAZZ_DB *complement_DB(DAZZ_DB *block, int inplace)
+{ static DAZZ_DB _cblock, *cblock = &_cblock;
int nreads;
- HITS_READ *reads;
+ DAZZ_READ *reads;
char *seq;
nreads = block->nreads;
@@ -463,7 +463,7 @@ static HITS_DB *complement_DB(HITS_DB *block, int inplace)
complement(seq+reads[i].boff,reads[i].rlen);
}
- { HITS_TRACK *src, *trg;
+ { DAZZ_TRACK *src, *trg;
int *data, *tata;
int i, x, rlen;
int64 *tano, *anno;
@@ -483,7 +483,7 @@ static HITS_DB *complement_DB(HITS_DB *block, int inplace)
"Allocating dazzler interval track data");
anno = (int64 *) Malloc(sizeof(int64)*(nreads+1),
"Allocating dazzler interval track index");
- trg = (HITS_TRACK *) Malloc(sizeof(HITS_TRACK),
+ trg = (DAZZ_TRACK *) Malloc(sizeof(DAZZ_TRACK),
"Allocating dazzler interval track header");
if (data == NULL || trg == NULL || anno == NULL)
exit (1);
@@ -536,8 +536,8 @@ static char *CommandBuffer(char *aname, char *bname)
}
int main(int argc, char *argv[])
-{ HITS_DB _ablock, _bblock;
- HITS_DB *ablock = &_ablock, *bblock = &_bblock;
+{ DAZZ_DB _ablock, _bblock;
+ DAZZ_DB *ablock = &_ablock, *bblock = &_bblock;
char *afile, *bfile;
char *aroot, *broot;
void *aindex, *bindex;
@@ -691,7 +691,7 @@ int main(int argc, char *argv[])
else
aroot = Root(afile,".db");
- asettings = New_Align_Spec( AVE_ERROR, SPACING, ablock->freq);
+ asettings = New_Align_Spec( AVE_ERROR, SPACING, ablock->freq, 1);
/* Compare against reads in B in both orientations */
diff --git a/filter.c b/filter.c
index 97c4868..de360b1 100644
--- a/filter.c
+++ b/filter.c
@@ -383,9 +383,9 @@ static int *NormShift = NULL;
static int LogNorm, LogThresh;
static int LogBase[4];
-static HITS_DB *TA_block;
+static DAZZ_DB *TA_block;
static KmerPos *TA_list;
-static HITS_TRACK *TA_track;
+static DAZZ_TRACK *TA_track;
typedef struct
{ int tnum;
@@ -410,7 +410,7 @@ static void *tuple_thread(void *arg)
n -= Kmer*i;
if (TA_track != NULL)
- { HITS_READ *reads = TA_block->reads;
+ { DAZZ_READ *reads = TA_block->reads;
int64 *anno1 = ((int64 *) (TA_track->anno)) + 1;
int *point = (int *) (TA_track->data);
int64 a, b, f;
@@ -493,7 +493,7 @@ static void *biased_tuple_thread(void *arg)
n -= Kmer*i;
if (TA_track != NULL)
- { HITS_READ *reads = TA_block->reads;
+ { DAZZ_READ *reads = TA_block->reads;
int64 *anno1 = ((int64 *) (TA_track->anno)) + 1;
int *point = (int *) (TA_track->data);
int64 j, b, f;
@@ -658,7 +658,7 @@ static void *compress_thread(void *arg)
return (NULL);
}
-void *Sort_Kmers(HITS_DB *block, int *len)
+void *Sort_Kmers(DAZZ_DB *block, int *len)
{ THREAD threads[NTHREADS];
Tuple_Arg parmt[NTHREADS];
Comp_Arg parmf[NTHREADS];
@@ -1200,8 +1200,8 @@ static void *merge_thread(void *arg)
// Report threads: given a segment of merged list, find all seeds and from them all alignments.
-static HITS_DB *MR_ablock;
-static HITS_DB *MR_bblock;
+static DAZZ_DB *MR_ablock;
+static DAZZ_DB *MR_bblock;
static SeedPair *MR_hits;
static int MR_two;
static Align_Spec *MR_spec;
@@ -1569,8 +1569,8 @@ static void *report_thread(void *arg)
Double *hitd = (Double *) MR_hits;
char *aseq = (char *) (MR_ablock->bases);
char *bseq = (char *) (MR_bblock->bases);
- HITS_READ *aread = MR_ablock->reads;
- HITS_READ *bread = MR_bblock->reads;
+ DAZZ_READ *aread = MR_ablock->reads;
+ DAZZ_READ *bread = MR_bblock->reads;
int *score = data->score;
int *scorp = data->score + 1;
int *scorm = data->score - 1;
@@ -1943,7 +1943,7 @@ static char *NameBuffer(char *aname, char *bname)
return (cat);
}
-void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
+void Match_Filter(char *aname, DAZZ_DB *ablock, char *bname, DAZZ_DB *bblock,
void *vasort, int alen, void *vbsort, int blen,
int comp, Align_Spec *aspec)
{ THREAD threads[NTHREADS];
diff --git a/filter.h b/filter.h
index 48e3222..36c6e69 100644
--- a/filter.h
+++ b/filter.h
@@ -27,9 +27,9 @@ extern uint64 MEM_PHYSICAL;
int Set_Filter_Params(int kmer, int binshift, int suppress, int hitmin, int nthreads);
-void *Sort_Kmers(HITS_DB *block, int *len);
+void *Sort_Kmers(DAZZ_DB *block, int *len);
-void Match_Filter(char *aname, HITS_DB *ablock, char *bname, HITS_DB *bblock,
+void Match_Filter(char *aname, DAZZ_DB *ablock, char *bname, DAZZ_DB *bblock,
void *atable, int alen, void *btable, int blen,
int comp, Align_Spec *asettings);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/daligner.git
More information about the debian-med-commit
mailing list