[med-svn] [daligner] 01/03: Imported Upstream version 1.0+20161119
Afif Elghraoui
afif at moszumanska.debian.org
Thu Jan 19 06:20:36 UTC 2017
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository daligner.
commit 297c59fbf22871b2231b605692a7ed6b8af20951
Author: Afif Elghraoui <afif at debian.org>
Date: Wed Jan 18 22:05:50 2017 -0800
Imported Upstream version 1.0+20161119
---
DB.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
DB.h | 14 ++++++++-
LAdump.c | 25 ++++++----------
LAmerge.c | 2 +-
QV.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
QV.h | 3 ++
align.c | 17 ++++++-----
align.h | 4 +--
filter.c | 10 +++++--
9 files changed, 230 insertions(+), 38 deletions(-)
diff --git a/DB.c b/DB.c
index 46046b7..3afff14 100644
--- a/DB.c
+++ b/DB.c
@@ -314,6 +314,14 @@ void Upper_Read(char *s)
*s = '\0';
}
+void Letter_Arrow(char *s)
+{ static char letter[4] = { '1', '2', '3', '4' };
+
+ for ( ; *s != 4; s++)
+ *s = letter[(int) *s];
+ *s = '\0';
+}
+
// Convert read in ascii representation to [0-3] representation (end with 4)
void Number_Read(char *s)
@@ -341,6 +349,31 @@ void Number_Read(char *s)
*s = 4;
}
+void Number_Arrow(char *s)
+{ static char arrow[128] =
+ { 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 0, 1, 2, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ };
+
+ for ( ; *s != '\0'; s++)
+ *s = arrow[(int) *s];
+ *s = 4;
+}
+
/*******************************************************************************************
*
@@ -426,7 +459,7 @@ int Open_DB(char* path, HITS_DB *db)
if (fscanf(dbvis,DB_NBLOCK,&nblocks) != 1)
if (part == 0)
{ cutoff = 0;
- all = 1;
+ all = DB_ALL;
}
else
{ EPRINTF(EPLACE,"%s: DB %s has not yet been partitioned, cannot request a block !\n",
@@ -466,7 +499,7 @@ int Open_DB(char* path, HITS_DB *db)
db->tracks = NULL;
db->part = part;
db->cutoff = cutoff;
- db->all = all;
+ db->allarr |= all;
db->ufirst = ufirst;
db->tfirst = tfirst;
@@ -478,7 +511,7 @@ int Open_DB(char* path, HITS_DB *db)
db->reads += 1;
if (fread(db->reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
- free(db->reads);
+ free(db->reads-1);
goto error2;
}
}
@@ -495,7 +528,7 @@ int Open_DB(char* path, HITS_DB *db)
fseeko(index,sizeof(HITS_READ)*ufirst,SEEK_CUR);
if (fread(reads,sizeof(HITS_READ),nreads,index) != (size_t) nreads)
{ EPRINTF(EPLACE,"%s: Index file (.idx) of %s is junk\n",Prog_Name,root);
- free(reads);
+ free(reads-1);
goto error2;
}
@@ -557,10 +590,10 @@ void Trim_DB(HITS_DB *db)
if (db->trimmed) return;
- if (db->cutoff <= 0 && db->all) return;
+ if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return;
cutoff = db->cutoff;
- if (db->all)
+ if ((db->allarr & DB_ALL) != 0)
allflag = 0;
else
allflag = DB_BEST;
@@ -660,10 +693,10 @@ static int Late_Track_Trim(HITS_DB *db, HITS_TRACK *track, int ispart)
if (!db->trimmed) return (0);
- if (db->cutoff <= 0 && db->all) return (0);
+ if (db->cutoff <= 0 && (db->allarr & DB_ALL) != 0) return (0);
cutoff = db->cutoff;
- if (db->all)
+ if ((db->allarr & DB_ALL) != 0)
allflag = 0;
else
allflag = DB_BEST;
@@ -1435,6 +1468,56 @@ int Load_Read(HITS_DB *db, int i, char *read, int ascii)
return (0);
}
+// Load into 'read' the i'th arrow in 'db'. As an ASCII string if ascii is 1,
+// and as a numeric string otherwise.
+//
+
+HITS_DB *Arrow_DB = NULL; // Last db/arw used by "Load_Arrow"
+FILE *Arrow_File = NULL; // Becomes invalid after closing
+
+int Load_Arrow(HITS_DB *db, int i, char *read, int ascii)
+{ FILE *arrow;
+ int64 off;
+ int len, clen;
+ HITS_READ *r = db->reads;
+
+ if (i >= db->nreads)
+ { EPRINTF(EPLACE,"%s: Index out of bounds (Load_Arrow)\n",Prog_Name);
+ EXIT(1);
+ }
+ if (Arrow_DB != db)
+ { fclose(Arrow_File);
+ arrow = Fopen(Catenate(db->path,"","",".arw"),"r");
+ if (arrow == NULL)
+ EXIT(1);
+ Arrow_File = arrow;
+ Arrow_DB = db;
+ }
+ else
+ arrow = Arrow_File;
+
+ off = r[i].boff;
+ len = r[i].rlen;
+
+ if (ftello(arrow) != off)
+ fseeko(arrow,off,SEEK_SET);
+ clen = COMPRESSED_LEN(len);
+ if (clen > 0)
+ { if (fread(read,clen,1,arrow) != 1)
+ { EPRINTF(EPLACE,"%s: Failed read of .bps file (Load_Arrow)\n",Prog_Name);
+ EXIT(1);
+ }
+ }
+ Uncompress_Read(len,read);
+ if (ascii == 1)
+ { Letter_Arrow(read);
+ read[-1] = '\0';
+ }
+ else
+ read[-1] = 4;
+ return (0);
+}
+
char *Load_Subread(HITS_DB *db, int i, int beg, int end, char *read, int ascii)
{ FILE *bases = (FILE *) db->bases;
int64 off;
diff --git a/DB.h b/DB.h
index 983bee7..dc281de 100644
--- a/DB.h
+++ b/DB.h
@@ -164,6 +164,9 @@ void Lower_Read(char *s); // Convert read from numbers to lowercase letters
void Upper_Read(char *s); // Convert read from numbers to uppercase letters (0-3 to ACGT)
void Number_Read(char *s); // Convert read from letters to numbers
+void Letter_Arrow(char *s); // Convert arrow pw's from numbers to uppercase letters (0-3 to 1234)
+void Number_Arrow(char *s); // Convert arrow pw string from letters to numbers
+
/*******************************************************************************************
*
@@ -175,6 +178,9 @@ void Number_Read(char *s); // Convert read from letters to numbers
#define DB_CSS 0x0400 // This is the second or later of a group of reads from a given insert
#define DB_BEST 0x0800 // This is the longest read of a given insert (may be the only 1)
+#define DB_ARROW 0x2 // DB is an arrow DB
+#define DB_ALL 0x1 // all wells are in the trimmed DB
+
// Fields have different interpretations if a .db versus a .dam
typedef struct
@@ -185,6 +191,7 @@ typedef struct
// uncompressed bases in memory block
int64 coff; // Offset (in bytes) of compressed quiva streams in '.qvs' file (DB),
// Offset (in bytes) of scaffold header string in '.hdr' file (DAM)
+ // 4 compressed shorts containing snr info if an arrow DB.
int flags; // QV of read + flags above (DB only)
} HITS_READ;
@@ -226,7 +233,7 @@ typedef struct
{ int ureads; // Total number of reads in untrimmed DB
int treads; // Total number of reads in trimmed DB
int cutoff; // Minimum read length in block (-1 if not yet set)
- int all; // Consider multiple reads from a given well
+ int allarr; // DB_ALL | DB_ARROW
float freq[4]; // frequency of A, C, G, T, respectively
// Set with respect to "active" part of DB (all vs block, untrimmed vs trimmed)
@@ -368,6 +375,11 @@ char *New_Read_Buffer(HITS_DB *db);
int Load_Read(HITS_DB *db, int i, char *read, int ascii);
+ // Exactly the same as Load_Read, save the arrow information is loaded, not the DNA sequence,
+ // and there is only a choice between numeric (0) or ascii (1);
+
+int Load_Arrow(HITS_DB *db, int i, char *read, int ascii);
+
// Load into 'read' the subread [beg,end] of the i'th read in 'db' and return a pointer to the
// the start of the subinterval (not necessarily = to read !!! ). As a lower case ascii
// string if ascii is 1, an upper case ascii string if ascii is 2, and a numeric string
diff --git a/LAdump.c b/LAdump.c
index 070c3c2..716e873 100644
--- a/LAdump.c
+++ b/LAdump.c
@@ -40,6 +40,7 @@ int main(int argc, char *argv[])
FILE *input;
int64 novl;
int tspace, tbytes, small;
+ int tmax;
int reps, *pts;
int input_pts;
@@ -279,7 +280,7 @@ int main(int argc, char *argv[])
{ int j, al, tlen;
int in, npt, idx, ar;
- int64 novls, odeg, omax, sdeg, smax, ttot, tmax;
+ int64 novls, odeg, omax, sdeg, smax, ttot;
in = 0;
npt = pts[0];
@@ -359,28 +360,24 @@ int main(int argc, char *argv[])
printf("+ P %lld\n",novls);
printf("%% P %lld\n",omax);
- printf("+ T %lld\n",ttot);
- printf("%% T %lld\n",smax);
- printf("@ T %lld\n",tmax);
+ if (DOTRACE)
+ { printf("+ T %lld\n",ttot);
+ printf("%% T %lld\n",smax);
+ printf("@ T %lld\n",tmax);
+ }
}
// Read the file and display selected records
{ int j;
uint16 *trace;
- int tmax;
int in, npt, idx, ar;
int64 verse;
rewind(input);
- fread(&verse,sizeof(int64),1,input);
+ fread(&novl,sizeof(int64),1,input);
fread(&tspace,sizeof(int),1,input);
- if (verse < 0)
- { for (j = 0; j < 5; j++)
- fread(&verse,sizeof(int64),1,input);
- }
- tmax = 1000;
trace = (uint16 *) Malloc(sizeof(uint16)*tmax,"Allocating trace vector");
if (trace == NULL)
exit (1);
@@ -396,12 +393,6 @@ int main(int argc, char *argv[])
// Read it in
{ Read_Overlap(input,ovl);
- if (ovl->path.tlen > tmax)
- { tmax = ((int) 1.2*ovl->path.tlen) + 100;
- trace = (uint16 *) Realloc(trace,sizeof(uint16)*tmax,"Allocating trace vector");
- if (trace == NULL)
- exit (1);
- }
ovl->path.trace = (void *) trace;
Read_Trace(input,ovl,tbytes);
diff --git a/LAmerge.c b/LAmerge.c
index 985a5f6..6a768fd 100644
--- a/LAmerge.c
+++ b/LAmerge.c
@@ -168,7 +168,7 @@ static void ovl_reload(IO_block *in, int64 bsize)
remains = in->top - in->ptr;
if (remains > 0)
- memcpy(in->block, in->ptr, remains);
+ memmove(in->block, in->ptr, remains);
in->ptr = in->block;
in->top = in->block + remains;
in->top += fread(in->top,1,bsize-remains,in->stream);
diff --git a/QV.c b/QV.c
index cdb6a63..d7d7263 100644
--- a/QV.c
+++ b/QV.c
@@ -863,6 +863,62 @@ static int delChar, subChar;
// Referred by: QVcoding_Scan, Create_QVcoding
+void QVcoding_Scan1(int rlen, char *delQV, char *delTag, char *insQV, char *mergeQV, char *subQV)
+{
+ if (rlen == 0) // Initialization call
+ { int i;
+
+ // Zero histograms
+
+ bzero(delHist,sizeof(uint64)*256);
+ bzero(mrgHist,sizeof(uint64)*256);
+ bzero(insHist,sizeof(uint64)*256);
+ bzero(subHist,sizeof(uint64)*256);
+
+ for (i = 0; i < 256; i++)
+ delRun[i] = subRun[i] = 1;
+
+ totChar = 0;
+ delChar = -1;
+ subChar = -1;
+ return;
+ }
+
+ // Add streams to accumulating histograms and figure out the run chars
+ // for the deletion and substition streams
+
+ Histogram_Seqs(delHist,(uint8 *) delQV,rlen);
+ Histogram_Seqs(insHist,(uint8 *) insQV,rlen);
+ Histogram_Seqs(mrgHist,(uint8 *) mergeQV,rlen);
+ Histogram_Seqs(subHist,(uint8 *) subQV,rlen);
+
+ if (delChar < 0)
+ { int k;
+
+ for (k = 0; k < rlen; k++)
+ if (delTag[k] == 'n' || delTag[k] == 'N')
+ { delChar = delQV[k];
+ break;
+ }
+ }
+ if (delChar >= 0)
+ Histogram_Runs( delRun,(uint8 *) delQV,rlen,delChar);
+ totChar += rlen;
+ if (subChar < 0)
+ { if (totChar >= 100000)
+ { int k;
+
+ subChar = 0;
+ for (k = 1; k < 256; k++)
+ if (subHist[k] > subHist[subChar])
+ subChar = k;
+ }
+ }
+ if (subChar >= 0)
+ Histogram_Runs( subRun,(uint8 *) subQV,rlen,subChar);
+ return;
+}
+
int QVcoding_Scan(FILE *input, int num, FILE *temp)
{ char *slash;
int rlen;
@@ -1284,6 +1340,44 @@ void Free_QVcoding(QVcoding *coding)
*
********************************************************************************************/
+void Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+ FILE *output, QVcoding *coding, int lossy)
+{ int clen;
+
+ if (coding->delChar < 0)
+ { Encode(coding->delScheme, output, (uint8 *) del, rlen);
+ clen = rlen;
+ }
+ else
+ { Encode_Run(coding->delScheme, coding->dRunScheme, output,
+ (uint8 *) del, rlen, coding->delChar);
+ clen = Pack_Tag(tag,del,rlen,coding->delChar);
+ }
+ Number_Read(tag);
+ Compress_Read(clen,tag);
+ fwrite(tag,1,COMPRESSED_LEN(clen),output);
+
+ if (lossy)
+ { uint8 *insert = (uint8 *) ins;
+ uint8 *merge = (uint8 *) mrg;
+ int k;
+
+ for (k = 0; k < rlen; k++)
+ { insert[k] = (uint8) ((insert[k] >> 1) << 1);
+ merge[k] = (uint8) (( merge[k] >> 2) << 2);
+ }
+ }
+
+ Encode(coding->insScheme, output, (uint8 *) ins, rlen);
+ Encode(coding->mrgScheme, output, (uint8 *) mrg, rlen);
+ if (coding->subChar < 0)
+ Encode(coding->subScheme, output, (uint8 *) sub, rlen);
+ else
+ Encode_Run(coding->subScheme, coding->sRunScheme, output,
+ (uint8 *) sub, rlen, coding->subChar);
+ return;
+}
+
int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy)
{ int rlen, clen;
diff --git a/QV.h b/QV.h
index 532b2f4..e5c9485 100644
--- a/QV.h
+++ b/QV.h
@@ -58,6 +58,7 @@ int Get_QV_Line();
// If there is an error then -1 is returned, otherwise the number of entries read.
int QVcoding_Scan(FILE *input, int num, FILE *temp);
+void QVcoding_Scan1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub);
// Given QVcoding_Scan has been called at least once, create an encoding scheme based on
// the accumulated statistics and return a pointer to it. The returned encoding object
@@ -83,6 +84,8 @@ void Free_QVcoding(QVcoding *coding);
// occurred, and the sequence length otherwise.
int Compress_Next_QVentry(FILE *input, FILE *output, QVcoding *coding, int lossy);
+void Compress_Next_QVentry1(int rlen, char *del, char *tag, char *ins, char *mrg, char *sub,
+ FILE *output, QVcoding *coding, int lossy);
// Assuming the input is position just beyond the compressed encoding of an entry header,
// read the set of compressed encodings for the ensuing 5 QV vectors, decompress them,
diff --git a/align.c b/align.c
index eb877d4..449b833 100644
--- a/align.c
+++ b/align.c
@@ -2983,8 +2983,8 @@ int Find_Extension(Alignment *align, Work_Data *ework, Align_Spec *espec,
if (prefix)
{ if (reverse_extend(work,spec,align,diag,anti,minp,maxp))
EXIT(1);
- apath->aepos = (anti-diag)/2;
- apath->bepos = (anti+diag)/2;
+ apath->aepos = (anti+diag)/2;
+ apath->bepos = (anti-diag)/2;
#ifdef DEBUG_PASSES
printf("E1 (%d,%d) => (%d,%d) %d\n",
(anti+diag)/2,(anti-diag)/2,apath->abpos,apath->bbpos,apath->diffs);
@@ -2993,8 +2993,8 @@ int Find_Extension(Alignment *align, Work_Data *ework, Align_Spec *espec,
else
{ if (forward_extend(work,spec,align,diag,anti,minp,maxp))
EXIT(1);
- apath->abpos = (anti-diag)/2;
- apath->bbpos = (anti+diag)/2;
+ apath->abpos = (anti+diag)/2;
+ apath->bbpos = (anti-diag)/2;
#ifdef DEBUG_PASSES
printf("F1 (%d,%d) => (%d,%d) %d\n",
(anti+diag)/2,(anti-diag)/2,apath->aepos,apath->bepos,apath->diffs);
@@ -3044,10 +3044,13 @@ int Read_Trace(FILE *input, Overlap *ovl, int tbytes)
return (0);
}
-void Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
-{ fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output);
+int Write_Overlap(FILE *output, Overlap *ovl, int tbytes)
+{ if (fwrite( ((char *) ovl) + PtrSize, OvlIOSize, 1, output) != 1)
+ return (1);
if (ovl->path.trace != NULL)
- fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output);
+ if (fwrite(ovl->path.trace,tbytes,ovl->path.tlen,output) != (size_t) ovl->path.tlen)
+ return (1);
+ return (0);
}
void Compress_TraceTo8(Overlap *ovl)
diff --git a/align.h b/align.h
index c3b31ae..daa9151 100644
--- a/align.h
+++ b/align.h
@@ -325,7 +325,7 @@ typedef struct {
accommodate the trace where each value take 'tbytes' bytes (1 if uint8 or 2 if uint16).
Write_Overlap write 'ovl' to stream 'output' followed by its trace vector (if any) that
- occupies 'tbytes' bytes per value.
+ occupies 'tbytes' bytes per value. It returns non-zero if there was an error writing.
Print_Overlap prints an ASCII version of the contents of 'ovl' to stream 'output'
where the trace occupes 'tbytes' per value and the print out is indented from the left
@@ -343,7 +343,7 @@ typedef struct {
int Read_Overlap(FILE *input, Overlap *ovl);
int Read_Trace(FILE *innput, Overlap *ovl, int tbytes);
- void Write_Overlap(FILE *output, Overlap *ovl, int tbytes);
+ int Write_Overlap(FILE *output, Overlap *ovl, int tbytes);
void Print_Overlap(FILE *output, Overlap *ovl, int tbytes, int indent);
void Compress_TraceTo8(Overlap *ovl);
diff --git a/filter.c b/filter.c
index a9c5083..90ca7c3 100644
--- a/filter.c
+++ b/filter.c
@@ -1842,14 +1842,20 @@ static void *report_thread(void *arg)
ovla->path.trace = tbuf->trace + (uint64) (ovla->path.trace);
if (small)
Compress_TraceTo8(ovla);
- Write_Overlap(ofile1,ovla,tbytes);
+ if (Write_Overlap(ofile1,ovla,tbytes))
+ { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+ exit (1);
+ }
}
for (i = 0; i < novlb; i++)
{ ovlb->path = bmatch[i];
ovlb->path.trace = tbuf->trace + (uint64) (ovlb->path.trace);
if (small)
Compress_TraceTo8(ovlb);
- Write_Overlap(ofile2,ovlb,tbytes);
+ if (Write_Overlap(ofile2,ovlb,tbytes))
+ { fprintf(stderr,"%s: Cannot write to /tmp, too small?\n",Prog_Name);
+ exit (1);
+ }
}
ahits += novla;
bhits += novlb;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/daligner.git
More information about the debian-med-commit
mailing list