[med-svn] [Git][med-team/sra-sdk][master] 6 commits: New upstream version 2.8.2-5+dfsg
Andreas Tille
gitlab at salsa.debian.org
Sun Feb 18 14:56:54 UTC 2018
Andreas Tille pushed to branch master at Debian Med / sra-sdk
Commits:
f9830f55 by Andreas Tille at 2018-02-18T08:58:20+01:00
New upstream version 2.8.2-5+dfsg
- - - - -
0c939402 by Andreas Tille at 2018-02-18T08:58:29+01:00
Update upstream source from tag 'upstream/2.8.2-5+dfsg'
Update to upstream version '2.8.2-5+dfsg'
with Debian dir 3e1180b842babd4ed10a1842cd8b72bd973eafbd
- - - - -
d1161b9a by Andreas Tille at 2018-02-18T08:59:47+01:00
Standards-Version: 4.1.3
- - - - -
f6f9c913 by Andreas Tille at 2018-02-18T15:44:03+01:00
New upstream version
- - - - -
ead4ba92 by Andreas Tille at 2018-02-18T15:53:56+01:00
Spelling
- - - - -
de21e465 by Andreas Tille at 2018-02-18T15:55:12+01:00
Realise that build fails with d/compat=11 and upload as is to unstable
- - - - -
13 changed files:
- debian/changelog
- debian/control
- debian/patches/absolute_vschema_path_in_test.patch
- + test/sra-sort/md-created.sh
- tools/sra-sort/csra-tbl.c
- tools/sra-sort/tbl-pair.c
- tools/sra-sort/tbl-pair.h
- tools/sra-sort/xcheck-ref-align.c
- tools/sra-sort/xcheck.h
- tools/sra-stat/Makefile
- + tools/sra-stat/assembly-statistics.c
- tools/sra-stat/sra-stat.c
- tools/sra-stat/sra-stat.h
Changes:
=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+sra-sdk (2.8.2-5+dfsg-1) unstable; urgency=medium
+
+ * New upstream version
+ * Standards-Version: 4.1.3
+
+ -- Andreas Tille <tille at debian.org> Sun, 18 Feb 2018 15:44:10 +0100
+
sra-sdk (2.8.2-3+dfsg-1) unstable; urgency=medium
[ Steffen Moeller ]
=====================================
debian/control
=====================================
--- a/debian/control
+++ b/debian/control
@@ -11,7 +11,7 @@ Build-Depends: debhelper (>= 10),
libhdf5-dev,
libmagic-dev,
libxml2-dev
-Standards-Version: 4.1.1
+Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/sra-sdk.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/sra-sdk.git
Homepage: https://github.com/ncbi/sra-tools/
=====================================
debian/patches/absolute_vschema_path_in_test.patch
=====================================
--- a/debian/patches/absolute_vschema_path_in_test.patch
+++ b/debian/patches/absolute_vschema_path_in_test.patch
@@ -1,6 +1,6 @@
Author: Andreas Tille <tille at debian.org>
Last-Update: Mon, 06 Jun 2016 11:24:46 +0200
-Description: We need to set absolute pathes also in the test
+Description: We need to set absolute paths also in the test
(test fails anyway for different reasons and is excluded later)
--- a/test/vcf-loader/test-vcf-loader.cpp
=====================================
test/sra-sort/md-created.sh
=====================================
--- /dev/null
+++ b/test/sra-sort/md-created.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+TMP=/export/home/TMP
+if [ ! -d "$TMP" ] ; then
+ echo $TMP is not found: skipping the test
+ exit 0
+fi
+
+if [ "$PANFS_PAN1" == "" ] ; then
+ echo PANFS_PAN1 is not set: exiting
+ exit 1
+fi
+
+I=`whoami`
+DSTDIR=$TMP/$I/sra-sort-md
+DST=$DSTDIR/sorted
+
+rm -fr $DSTDIR
+
+SORT=sra-sort
+which $SORT > /dev/null 2>&1
+if [ "$?" != "0" ] ; then
+ echo "sra-sort not found: add it to your PATH"
+ exit 10
+fi
+
+OPT="--tempdir $DSTDIR --mmapdir $DSTDIR --map-file-bsize 80000000000 --max-ref-idx-ids 4000000000 --max-idx-ids 4000000000 --max-large-idx-ids 800000000"
+SRC=$PANFS_PAN1/sra-test/TEST-DATA/SRR5318091-sra-sort-md
+CMD="$SORT -f -v -L6 $OPT $SRC $DST"
+echo $ $CMD
+
+$CMD
+EXIT=$?
+if [ $EXIT -ne 0 ] ; then
+ echo "sra-sort failed with $EXIT"
+ rm -r $DSTDIR
+ exit $EXIT
+fi
+
+if [ ! -d "$DST/md" ] ; then
+ echo Failure: md was not created in $DST:
+ echo $ ls $DST
+ ls $DST
+ rm -r $DSTDIR
+ exit 20
+else
+ echo Success: md was created in $DST:
+ echo $ ls $DST
+ ls $DST
+fi
+
+rm -r $DSTDIR
=====================================
tools/sra-sort/csra-tbl.c
=====================================
--- a/tools/sra-sort/csra-tbl.c
+++ b/tools/sra-sort/csra-tbl.c
@@ -446,6 +446,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
cSRAPair *csra = self -> csra;
+ struct KThread ** pt = NULL;
+ assert ( self );
+ pt = & self -> dad . thread;
+
RowSetIteratorRelease ( self -> rsi, ctx );
self -> rsi = NULL;
@@ -458,10 +462,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
switch ( self -> align_idx )
{
case 1:
- CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT" );
+ CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT", pt );
break;
case 2:
- CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT" );
+ CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT", pt );
#if SEQUENCE_BEFORE_SECONDARY
cSRATblPairWhackMappingIdx ( self, ctx );
=====================================
tools/sra-sort/tbl-pair.c
=====================================
--- a/tools/sra-sort/tbl-pair.c
+++ b/tools/sra-sort/tbl-pair.c
@@ -47,6 +47,7 @@ typedef struct TablePair StdTblPair;
#include <klib/text.h>
#include <klib/namelist.h>
#include <klib/rc.h>
+#include <kproc/thread.h> /* KThreadWait */
#include <string.h>
@@ -1050,6 +1051,43 @@ void TablePairDestroy ( TablePair *self, const ctx_t *ctx )
VTableRelease ( self -> stbl );
MemFree ( ctx, ( void* ) self -> full_spec, self -> full_spec_size + 1 );
+
+ if ( self -> thread != NULL ) {
+ rc_t rc = 0;
+ rc_t status = 0;
+ STATUS ( 2, "waiting for background thread 0x%p to finish...",
+ self -> thread );
+
+ rc = KThreadWait ( self -> thread, & status );
+ if ( rc != 0 )
+ ERROR ( rc, "failed to wait for background thread 0x%p",
+ self -> thread );
+ else if ( status == 0 )
+ STATUS ( 2, "...background thread 0x%p succeed", self -> thread );
+ else {
+ ERROR ( status, "background thread 0x%p failed", self -> thread );
+ rc = status;
+ }
+
+ {
+ rc_t r2 = KThreadRelease ( self -> thread );
+ if ( r2 != 0 ) {
+ ERROR ( r2, "failed to release background thread 0x%p",
+ self -> thread );
+ if ( rc == 0 )
+ rc = r2;
+ }
+ }
+
+ if ( rc != 0 && ctx != NULL ) {
+ ctx_t * mctx = NULL;
+ for ( mctx = ( ctx_t * ) ctx; mctx != NULL && mctx -> rc == 0;
+ mctx = ( ctx_t* ) mctx -> caller )
+ {
+ mctx -> rc = rc;
+ }
+ }
+ }
memset ( self, 0, sizeof * self );
}
=====================================
tools/sra-sort/tbl-pair.h
=====================================
--- a/tools/sra-sort/tbl-pair.h
+++ b/tools/sra-sort/tbl-pair.h
@@ -45,6 +45,7 @@
*/
struct VTable;
struct DbPair;
+struct KThread;
struct ColumnReader;
struct ColumnWriter;
struct ColumnPair;
@@ -109,6 +110,9 @@ struct TablePair
/* true if already exploded */
bool exploded;
+ /* Thread launched by TablePairPostCopy [ to do consistency-check ] */
+ struct KThread * thread;
+
uint8_t align [ 2 ];
};
=====================================
tools/sra-sort/xcheck-ref-align.c
=====================================
--- a/tools/sra-sort/xcheck-ref-align.c
+++ b/tools/sra-sort/xcheck-ref-align.c
@@ -341,11 +341,12 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info };
const ctx_t *ctx = & thread_ctx;
- STATUS ( 2, "running consistency-check on background thread" );
+ STATUS ( 2, "running consistency-check on background thread 0x%p", self );
CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name );
- STATUS ( 2, "finished consistency-check on background thread" );
+ STATUS ( 2, "finished consistency-check on background thread 0x%p: %s",
+ self, ctx -> rc ? "failure" : "success ");
VTableRelease ( pb -> align_tbl );
VTableRelease ( pb -> ref_tbl );
@@ -357,7 +358,8 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
#endif
void CrossCheckRefAlignTbl ( const ctx_t *ctx,
- const VTable *ref_tbl, const VTable *align_tbl, const char *align_name )
+ const VTable *ref_tbl, const VTable *align_tbl, const char *align_name,
+ KThread ** pt )
{
FUNC_ENTRY ( ctx );
@@ -368,6 +370,9 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name );
+ assert ( pt );
+ * pt = NULL;
+
#if USE_BGTHREAD
name_len = strlen ( align_name );
TRY ( pb = MemAlloc ( ctx, sizeof * pb + name_len, false ) )
@@ -391,6 +396,7 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb );
if ( rc == 0 )
{
+ * pt = t;
return;
}
=====================================
tools/sra-sort/xcheck.h
=====================================
--- a/tools/sra-sort/xcheck.h
+++ b/tools/sra-sort/xcheck.h
@@ -35,6 +35,7 @@
/*--------------------------------------------------------------------------
* forwards
*/
+struct KThread;
struct VTable;
@@ -44,6 +45,6 @@ struct VTable;
* runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID
*/
void CrossCheckRefAlignTbl ( const ctx_t *ctx,
- struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name );
+ struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name, struct KThread ** pt );
#endif
=====================================
tools/sra-stat/Makefile
=====================================
--- a/tools/sra-stat/Makefile
+++ b/tools/sra-stat/Makefile
@@ -62,6 +62,7 @@ clean: stdclean
# sra statistics
#
SRASTAT_SRC = \
+ assembly-statistics \
sra \
sra-stat \
=====================================
tools/sra-stat/assembly-statistics.c
=====================================
--- /dev/null
+++ b/tools/sra-stat/assembly-statistics.c
@@ -0,0 +1,288 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "sra-stat.h" /* Ctx */
+
+#include <insdc/sra.h> /* INSDC_coord_len */
+
+#include <kdb/table.h> /* KTable */
+
+#include <klib/container.h> /* BSTNode */
+#include <klib/debug.h> /* DBGMSG */
+#include <klib/log.h> /* LOGERR */
+#include <klib/out.h> /* OUTMSG */
+#include <klib/rc.h>
+
+#include <vdb/blob.h> /* VBlob */
+#include <vdb/cursor.h> /* VCursor */
+#include <vdb/database.h> /* VDatabase */
+#include <vdb/table.h> /* VTable */
+#include <vdb/vdb-priv.h> /* VTableOpenKTableRead */
+
+typedef struct {
+ BSTNode n;
+ uint64_t length;
+} Contig;
+
+static void CC ContigWhack ( BSTNode * n, void * data ) {
+ free ( n );
+}
+
+static
+int64_t CC ContigSort ( const BSTNode * item, const BSTNode * n )
+{
+ const Contig * contigl = ( const Contig * ) item;
+ const Contig * contigr = ( const Contig * ) n;
+
+ assert ( contigl && contigr);
+
+ return contigl -> length < contigr -> length;
+}
+
+typedef struct {
+ uint64_t assemblyLength;
+ uint64_t contigLength;
+
+ uint64_t count;
+ uint64_t length;
+
+ BSTree bt;
+
+ uint64_t l50;
+ uint64_t n50;
+ uint64_t l90;
+ uint64_t n90;
+} Contigs;
+
+static void CC ContigNext ( BSTNode * n, void * data ) {
+ const Contig * contig = ( const Contig * ) n;
+ Contigs * nl = ( Contigs * ) data;
+
+ assert ( contig && nl );
+
+ ++ nl -> count;
+ nl -> length += contig -> length;
+
+ DBGMSG ( DBG_APP, DBG_COND_1, ( "Contig %lu/%lu: %lu. Total: %lu/%lu\n",
+ nl -> count, nl -> contigLength, contig -> length,
+ nl -> length, nl -> assemblyLength ) );
+
+ if ( nl -> l50 == 0 && nl -> length * 2 >= nl -> assemblyLength ) {
+ nl -> n50 = contig -> length;
+ nl -> l50 = nl -> count;
+ DBGMSG ( DBG_APP, DBG_COND_1, ( "L50: %lu, N50: %lu (%lu>=%lu/2)\n",
+ nl -> l50, nl -> n50, nl -> length, nl -> assemblyLength ) );
+ }
+
+ if ( nl -> l90 == 0 &&
+ .9 * nl -> assemblyLength <= nl -> length )
+ {
+ nl -> n90 = contig -> length;
+ nl -> l90 = nl -> count;
+ DBGMSG ( DBG_APP, DBG_COND_1, ( "L90: %lu, N90: %lu (%lu*.9>=%lu)\n",
+ nl -> l90, nl -> n90, nl -> length, nl -> assemblyLength ) );
+ }
+}
+
+static void ContigsInit ( Contigs * self ) {
+ assert ( self );
+
+ memset ( self, 0, sizeof * self );
+}
+
+static rc_t ContigsAdd ( Contigs * self, uint32_t length ) {
+ Contig * contig = ( Contig * ) calloc ( 1, sizeof * contig );
+
+ assert ( self );
+
+ if ( contig == NULL )
+ return RC ( rcExe, rcStorage, rcAllocating, rcMemory, rcExhausted );
+
+ self -> assemblyLength += length;
+ ++ self -> contigLength;
+
+ contig -> length = length;
+ return BSTreeInsert ( & self -> bt, ( BSTNode * ) contig, ContigSort );
+}
+
+static void ContigsCalculateStatistics ( Contigs * self ) {
+ assert ( self );
+
+ BSTreeForEach ( & self -> bt, false, ContigNext, self );
+}
+
+static void ContigsFini ( Contigs * self ) {
+ assert ( self );
+
+ BSTreeWhack ( & self -> bt, ContigWhack, NULL );
+}
+
+/* Calculate N50, L50 statistics:
+see https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics */
+rc_t CC CalculateNL ( const VDatabase * db, Ctx * ctx ) {
+ rc_t rc = 0;
+
+ const VTable * tbl = NULL;
+ const KTable * ktbl = NULL;
+ const KIndex * idx = NULL;
+ const VCursor * cursor = NULL;
+
+ uint32_t CMP_READ;
+ uint32_t READ_LEN;
+
+ Contigs contigs;
+
+ int64_t start = 1;
+ uint64_t count = 0;
+
+ /* Statictics is calculated just for VDatabase-s */
+ if ( db == NULL ) {
+ DBGMSG ( DBG_APP, DBG_COND_1,
+ ( "CalculateAssemblyStatistics skipped: not a database\n" ) );
+ return 0;
+ }
+
+ rc = VDatabaseOpenTableRead ( db, & tbl, "REFERENCE" );
+ /* Statictics is calculated just for VDatabase-s with REFERENCE */
+ if ( rc != 0 && GetRCState ( rc ) == rcNotFound ) {
+ DBGMSG ( DBG_APP, DBG_COND_1,
+ ( "CalculateAssemblyStatistics skipped: no REFERENCE table\n" ) );
+ return 0;
+ }
+
+ ContigsInit ( & contigs );
+
+ rc = VTableOpenKTableRead ( tbl, & ktbl );
+ DISP_RC ( rc, "while calling "
+ "VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE))");
+ if ( rc == 0 ) {
+ rc = KTableOpenIndexRead ( ktbl, & idx, "i_name" );
+ DISP_RC ( rc, "while calling KTableOpenIndexRead"
+ "(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
+ }
+ if ( rc == 0 ) {
+ rc = VTableCreateCursorRead ( tbl, & cursor );
+ DISP_RC ( rc, "while calling VTableCreateCursorRead(REFERENCE)");
+ }
+ if ( rc == 0 ) {
+ rc = VCursorAddColumn ( cursor, & CMP_READ, "CMP_READ" );
+ DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,CMP_READ)");
+ }
+ if ( rc == 0 ) {
+ rc = VCursorAddColumn ( cursor, & READ_LEN, "READ_LEN" );
+ DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,READ_LEN)");
+ }
+ if ( rc == 0 ) {
+ rc = VCursorOpen ( cursor );
+ DISP_RC ( rc, "while calling VCursorOpen(REFERENCE)");
+ }
+
+ for ( start = 1; rc == 0; start += count ) {
+ uint32_t row_len = 0;
+ const VBlob * blob = NULL;
+ size_t key_size = 0;
+ char key [ 4096 ];
+ rc = KIndexProjectText ( idx, start, & start, & count,
+ key, sizeof key, & key_size );
+ if ( rc == SILENT_RC
+ ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound ) )
+ {
+ rc = 0;
+ break; /* no more references */
+ }
+ DISP_RC ( rc, "while calling KIndexProjectText(KTableOpenIndexRead"
+ "(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
+ if ( rc == 0 ) {
+ rc = VCursorGetBlobDirect ( cursor, & blob, start, CMP_READ );
+ DISP_RC ( rc, "while calling VCursorGetBlobDirect(CMP_READ)" );
+ }
+ if ( rc == 0 ) {
+ uint32_t elem_bits = 0;
+ const void * base = NULL;
+ uint32_t boff = 0;
+ rc = VBlobCellData ( blob, start,
+ & elem_bits, & base, & boff, & row_len );
+ DISP_RC ( rc, "while calling VBlobCellData(CMP_READ)" );
+ }
+ if ( rc == 0 ) {
+ if ( row_len == 0 ) {
+ /* When CMP_READ is not empty - local reference.
+ We calculate statistics just for local references */
+ DBGMSG ( DBG_APP, DBG_COND_1, ( "CalculateAssemblyStatistics: "
+ "%s skipped: not a local reference\n", key ) );
+ }
+ else {
+ uint64_t length = 0;
+ INSDC_coord_len buffer = 0;
+ uint32_t row_len = 0;
+ rc = VCursorReadDirect ( cursor, start,
+ READ_LEN, 8, & buffer, sizeof buffer, & row_len );
+ DISP_RC ( rc, "while calling VCursorReadDirect(READ_LEN,id)" );
+ if ( rc == 0 )
+ length = buffer;
+ if ( rc == 0 && count > 1 ) {
+ INSDC_coord_len buffer = 0;
+ uint32_t row_len = 0;
+ rc = VCursorReadDirect ( cursor, start + count - 1,
+ READ_LEN, 8, & buffer, sizeof buffer, & row_len );
+ DISP_RC ( rc,
+ "while calling VCursorReadDirect(READ_LEN,id+count)" );
+ if ( rc == 0 )
+ length = length * ( count - 1) + buffer;
+ }
+ if ( rc == 0 )
+ rc = ContigsAdd ( & contigs, length );
+ }
+ }
+ RELEASE ( VBlob, blob );
+ }
+
+ if ( rc == 0 )
+ ContigsCalculateStatistics ( & contigs );
+
+ RELEASE ( VCursor, cursor );
+ RELEASE ( KIndex, idx );
+ RELEASE ( KTable, ktbl );
+ RELEASE ( VTable, tbl );
+
+ if ( rc == 0 ) {
+ assert ( ctx );
+ assert ( contigs . assemblyLength == contigs . length );
+ assert ( contigs . contigLength == contigs . count );
+ if ( contigs . n90 > 0 ) {
+ ctx -> l50 = contigs . l50;
+ ctx -> n50 = contigs . n50;
+ ctx -> l90 = contigs . l90;
+ ctx -> n90 = contigs . n90;
+ ctx -> l = contigs . contigLength;
+ ctx -> n = contigs . assemblyLength;
+ }
+ }
+
+ ContigsFini ( & contigs );
+
+ return rc;
+}
=====================================
tools/sra-stat/sra-stat.c
=====================================
--- a/tools/sra-stat/sra-stat.c
+++ b/tools/sra-stat/sra-stat.c
@@ -20,7 +20,7 @@
*
* Please cite the author in any work or product based on this material.
*
-* ===========================================================================
+* ==============================================================================
*
*/
@@ -54,6 +54,7 @@
#include <sra/sraschema.h> /* VDBManagerMakeSRASchema */
+#include <vdb/blob.h> /* VBlobCellData */
#include <vdb/cursor.h> /* VCursor */
#include <vdb/database.h> /* VDatabaseRelease */
#include <vdb/dependencies.h> /* VDBDependencies */
@@ -69,15 +70,12 @@
#include <os-native.h> /* strtok_r on Windows */
#include <assert.h>
+#include <ctype.h> /* isprint */
#include <math.h> /* sqrt */
#include <stdlib.h>
#include <string.h>
#include <time.h>
-/* #include <stdio.h> */ /* stderr */
-
-#define DISP_RC(rc, msg) (void)((rc == 0) ? 0 : LOGERR(klogInt, rc, msg))
-
#define DISP_RC2(rc, name, msg) (void)((rc == 0) ? 0 : \
PLOGERR(klogInt, (klogInt, rc, \
"$(name): $(msg)", "name=%s,msg=%s", name, msg)))
@@ -253,6 +251,79 @@ typedef struct Statistics2 {
double average;
double diff_sq_sum;
} Statistics2;
+typedef enum {
+ ebtUNDEFINED,
+ ebtREAD,
+ ebtCSREAD,
+ ebtRAW_READ,
+} EBasesType;
+typedef struct {
+ uint64_t cnt[5];
+ EBasesType basesType;
+
+ const VCursor * cursSEQUENCE;
+ uint32_t idxSEQUENCE;
+ uint32_t idxSEQ_READ_LEN;
+ uint32_t idxSEQ_READ_TYPE;
+ uint64_t startSEQUENCE;
+ uint64_t stopSEQUENCE;
+
+ const VCursor * cursALIGNMENT;
+ uint32_t idxALIGNMENT;
+ uint32_t idxALN_READ_LEN;
+ uint32_t idxALN_READ_TYPE;
+ uint64_t startALIGNMENT;
+ uint64_t stopALIGNMENT;
+
+ bool finalized;
+
+/* accessing PRIMARY_ALIGNMENT/RD_FILTER terribly slows down the reading
+ uint32_t idxALN_RD_FILTER;
+ so we count filtered reads, as well
+ uint32_t idxSEQ_RD_FILTER; */
+} Bases;
+typedef struct SraStatsTotal {
+ uint64_t spot_count;
+ uint64_t spot_count_mates;
+ uint64_t BIO_BASE_COUNT; /* bio_len */
+ uint64_t bio_len_mates;
+ uint64_t BASE_COUNT; /* total_len */
+ uint64_t bad_spot_count;
+ uint64_t bad_bio_len;
+ uint64_t filtered_spot_count;
+ uint64_t filtered_bio_len;
+ uint64_t total_cmp_len; /* CMP_READ : compressed */
+
+ bool variable_nreads;
+ uint32_t nreads; /* if (nreads == 0) then (nreads is variable) */
+ Statistics * stats ; /* nreads elements */
+ Statistics2* stats2; /* nreads elements */
+
+ Bases bases_count;
+} SraStatsTotal;
+typedef struct srastat_parms {
+ const char* table_path;
+
+ bool xml; /* output format (txt or xml) */
+ bool printMeta;
+ bool quick; /* quick mode: stats from meta */
+ bool skip_members; /* not to print spot_group statistics */
+ bool progress; /* show progress */
+ bool skip_alignment; /* not to print alignment info */
+ bool print_arcinfo;
+ bool statistics; /* calculate average and stdev */
+ bool test; /* test stdev */
+
+ const XMLLogger *logger;
+
+ int64_t start, stop;
+
+ bool hasSPOT_GROUP;
+ bool variableReadLength;
+
+ SraStatsTotal total; /* is used in srastat_print */
+} srastat_parms;
+
static
void Statistics2Init(Statistics2* self, double sum, int64_t count)
{
@@ -281,7 +352,7 @@ static void Statistics2Print(const Statistics2* selfs,
look the same */
uint64_t spot_count)
{
- int i = 0;
+ uint32_t i = 0;
if (nreads) {
assert(selfs && indent);
@@ -311,29 +382,80 @@ static bool columnUndefined(rc_t rc) {
|| rc == SILENT_RC(rcVDB, rcCursor, rcUpdating, rcColumn, rcNotFound );
}
-typedef struct {
- uint64_t cnt[5];
- bool CS_NATIVE;
- const VCursor *curs;
- uint32_t idx;
-
- bool finalized;
-} Bases;
-
-static rc_t BasesInit(Bases *self, const VTable *vtbl) {
+static rc_t BasesInit(Bases *self, const Ctx *ctx, const VTable *vtbl,
+ const srastat_parms *pb)
+{
rc_t rc = 0;
assert(self);
memset(self, 0, sizeof *self);
- if (rc == 0) {
+ assert(ctx && pb);
+
+ if (ctx->db != NULL && pb -> start == 0 && pb -> stop == 0) {
+ const VTable * tbl = NULL;
+ rc_t r2 = VDatabaseOpenTableRead(ctx->db, &tbl, "PRIMARY_ALIGNMENT");
+ if (r2 == 0) {
+ const VCursor *curs = NULL;
+ rc = VTableCreateCachedCursorRead ( tbl, & curs,
+ DEFAULT_CURSOR_CAPACITY);
+ DISP_RC(rc,
+ "Cannot VTableCreateCachedCursorRead(PRIMARY_ALIGNMENT)");
+ if (rc == 0) {
+ rc = VCursorPermitPostOpenAdd(curs);
+ DISP_RC(rc, "Cannot VCursorPermitPostOpenAdd");
+ }
+ if (rc == 0) {
+ rc = VCursorOpen(curs);
+ DISP_RC(rc, "Cannot VCursorOpen");
+ }
+ if (rc == 0) {
+ const char name [] = "(INSDC:4na:bin)RAW_READ";
+ rc = VCursorAddColumn(curs, &self->idxALIGNMENT, name);
+ DISP_RC2(rc, "Cannot VCursorAddColumn", name);
+ }
+ if (rc == 0) {
+ const char name [] = "READ_LEN";
+ rc = VCursorAddColumn(curs, &self->idxALN_READ_LEN, name);
+ if (rc != 0)
+ PLOGERR(klogInt, (klogInt, rc, "Cannot VCursorAddColumn"
+ "(PRIMARY_ALIGNMENT:$(name))", "name=%s", name));
+ }
+ if (rc == 0) {
+ const char name [] = "READ_TYPE";
+ rc = VCursorAddColumn(curs, &self->idxALN_READ_TYPE, name);
+ if (rc != 0)
+ PLOGERR(klogInt, (klogInt, rc, "Cannot VCursorAddColumn"
+ "(PRIMARY_ALIGNMENT:$(name))", "name=%s", name));
+ }
+ if (rc == 0) {
+ int64_t first = 0;
+ uint64_t count = 0;
+ rc = VCursorIdRange(curs, 0, &first, &count);
+ if ( rc == 0 ) {
+ self->startALIGNMENT = first;
+ self->stopALIGNMENT = first + count;
+ }
+ }
+ if (rc != 0)
+ VCursorRelease ( curs );
+ else
+ self -> cursALIGNMENT = curs;
+ self->basesType = ebtRAW_READ;
+ }
+ RELEASE(VTable, tbl);
+ }
+
+ if (rc == 0 && self->cursSEQUENCE == NULL
+ && self->basesType != ebtRAW_READ)
+ {
const char name[] = "CS_NATIVE";
const void *base = NULL;
const VCursor *curs = NULL;
uint32_t idx = 0;
- self->CS_NATIVE = true;
+ self->basesType = ebtCSREAD;
rc = VTableCreateCachedCursorRead(vtbl, &curs, DEFAULT_CURSOR_CAPACITY);
DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
@@ -352,7 +474,7 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
rc = VCursorAddColumn(curs, &idx, "%s", name);
if (rc != 0) {
if (columnUndefined(rc)) {
- self->CS_NATIVE = false;
+ self->basesType = ebtREAD;
rc = 0;
}
else {
@@ -377,7 +499,8 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
}
if (rc == 0) {
- self->CS_NATIVE = *((bool*)base);
+ bool CS_NATIVE = *((bool*)base);
+ self -> basesType = CS_NATIVE ? ebtCSREAD : ebtREAD;
}
}
@@ -386,28 +509,64 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
RELEASE(VCursor, curs);
}
- {
- const char *name = self->CS_NATIVE ? "CSREAD" : "READ";
- const char *datatype
- = self->CS_NATIVE ? "INSDC:x2cs:bin" : "INSDC:x2na:bin";
- rc = VTableCreateCachedCursorRead(vtbl, &self->curs, DEFAULT_CURSOR_CAPACITY);
+ if (self->cursSEQUENCE == NULL && rc == 0) {
+ const char *name = self->basesType == ebtCSREAD
+ ? "(INSDC:x2cs:bin)CSREAD" :
+ self->basesType == ebtREAD
+ ? "(INSDC:x2na:bin)READ" : "(INSDC:x2na:bin)CMP_READ";
+ rc = VTableCreateCachedCursorRead(vtbl, &self->cursSEQUENCE,
+ DEFAULT_CURSOR_CAPACITY);
DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
if (rc == 0) {
- rc = VCursorAddColumn(self->curs,
- &self->idx, "(%s)%s", datatype, name);
- if (rc != 0) {
+ rc = VCursorAddColumn(self->cursSEQUENCE, &self->idxSEQUENCE, name);
+ if (rc != 0)
PLOGERR(klogInt, (klogInt, rc,
- "Cannot VCursorAddColumn(($(type)),$(name)",
- "type=%s,name=%s", datatype, name));
- }
+ "Cannot VCursorAddColumn($(name))", "name=%s", name));
}
if (rc == 0) {
- rc = VCursorOpen(self->curs);
- if (rc != 0) {
+ const char name [] = "READ_LEN";
+ rc = VCursorAddColumn(self->cursSEQUENCE,
+ &self->idxSEQ_READ_LEN, name);
+ if (rc != 0)
+ PLOGERR(klogInt, (klogInt, rc,
+ "Cannot VCursorAddColumn($(name))", "name=%s", name));
+ }
+ if (rc == 0) {
+ const char name [] = "READ_TYPE";
+ rc = VCursorAddColumn(self->cursSEQUENCE,
+ &self->idxSEQ_READ_TYPE, name);
+ if (rc != 0)
+ PLOGERR(klogInt, (klogInt, rc,
+ "Cannot VCursorAddColumn($(name))", "name=%s", name));
+ }
+ if (rc == 0) {
+ rc = VCursorOpen(self->cursSEQUENCE);
+ if (rc != 0)
PLOGERR(klogInt, (klogInt, rc,
- "Cannot VCursorOpen(($(type)),$(name)))",
- "type=%s,name=%s", datatype, name));
+ "Cannot VCursorOpen($(name))", "name=%s", name));
+ }
+ }
+
+ if ( rc == 0 ) {
+ int64_t first = 0;
+ uint64_t count = 0;
+ rc = VCursorIdRange(self->cursSEQUENCE, 0, &first, &count);
+ if ( rc == 0 ) {
+ if (pb->start > 0) {
+ self->startSEQUENCE = pb->start;
+ if (self->startSEQUENCE < first)
+ self->startSEQUENCE = first;
+ }
+ else
+ self->startSEQUENCE = first;
+
+ if (pb->stop > 0) {
+ self->stopSEQUENCE = pb->stop;
+ if ( ( uint64_t ) self->stopSEQUENCE > first + count)
+ self->stopSEQUENCE = first + count;
}
+ else
+ self->stopSEQUENCE = first + count;
}
}
@@ -417,7 +576,7 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
static void BasesFinalize(Bases *self) {
assert(self);
- if (self->curs == NULL) {
+ if (self->cursSEQUENCE == NULL) {
LOGMSG(klogInfo, "Bases statistics will not be printed : "
"READ cursor was not opened during BasesFinalize()");
return;
@@ -431,66 +590,174 @@ static rc_t BasesRelease(Bases *self) {
assert(self);
- RELEASE(VCursor , self->curs);
+ RELEASE(VCursor, self->cursSEQUENCE);
+ RELEASE(VCursor, self->cursALIGNMENT);
return rc;
}
-static void BasesAdd(Bases *self, int64_t spotid) {
+static rc_t BasesAdd(Bases *self, int64_t spotid, bool alignment) {
rc_t rc = 0;
const void *base = NULL;
bitsz_t row_bits = ~0;
bitsz_t i = ~0;
const unsigned char *bases = NULL;
+ bitsz_t boff = 0;
+
+ const VCursor * c = NULL;
+ int64_t row_id = 0;
+
+ uint32_t dREAD_LEN [MAX_NREADS];
+ uint8_t dREAD_TYPE [MAX_NREADS] = { 1 };
+ int nreads = 0;
+
+ int read = 0;
+ uint32_t nxtRdStart = 0;
assert(self);
- if (self->curs == NULL) {
- return;
+ if (self->cursSEQUENCE == NULL) {
+ return 0;
}
- {
+ if ( alignment ) {
+ c = self -> cursALIGNMENT;
+ row_id = self -> idxALIGNMENT;
+ }
+ else {
+ c = self -> cursSEQUENCE;
+ row_id = self -> idxSEQUENCE;
+ }
+ if (rc == 0) {
+ rc = VCursorColumnRead(c, spotid,
+ alignment ? self->idxALN_READ_LEN : self->idxSEQ_READ_LEN,
+ &base, &boff, &row_bits);
+ DISP_RC_Read(rc, "READ_LEN", spotid, "while calling VCursorColumnRead");
+ if (rc == 0) {
+ if (boff & 7)
+ rc = RC(rcExe, rcColumn, rcReading, rcOffset, rcInvalid);
+ else if (row_bits & 7)
+ rc = RC(rcExe, rcColumn, rcReading, rcSize, rcInvalid);
+ else if ((row_bits >> 3) > sizeof(dREAD_LEN))
+ rc = RC(rcExe, rcColumn, rcReading, rcBuffer, rcInsufficient);
+ DISP_RC_Read(rc, "READ_LEN", spotid,
+ "after calling VCursorColumnRead");
+ }
+ if (rc == 0) {
+ nreads = (row_bits >> 3) / sizeof(*dREAD_LEN);
+ memmove(dREAD_LEN, ((const char*)base) + (boff >> 3),
+ ( size_t ) row_bits >> 3);
+ }
+ }
+
+ if (rc == 0) {
+ rc = VCursorColumnRead(c, spotid,
+ alignment ? self->idxALN_READ_TYPE : self->idxSEQ_READ_TYPE,
+ &base, &boff, &row_bits);
+ DISP_RC_Read(rc, "READ_TYPE", spotid,
+ "while calling VCursorColumnRead");
+ if (rc == 0) {
+ if (boff & 7)
+ rc = RC(rcExe, rcColumn, rcReading, rcOffset, rcInvalid);
+ else if (row_bits & 7)
+ rc = RC(rcExe, rcColumn, rcReading, rcSize, rcInvalid);
+ else if ((row_bits >> 3) > sizeof(dREAD_TYPE))
+ rc = RC(rcExe, rcColumn, rcReading,
+ rcBuffer, rcInsufficient);
+ else if (((row_bits >> 3) / sizeof(*dREAD_TYPE)) != nreads)
+ rc = RC(rcExe, rcColumn, rcReading, rcData, rcIncorrect);
+ DISP_RC_Read(rc, "READ_TYPE", spotid,
+ "after calling VCursorColumnRead");
+ if (rc == 0)
+ memmove(dREAD_TYPE,
+ ((const char*)base) + (boff >> 3),
+ ( size_t ) row_bits >> 3);
+ }
+ }
+
+ if ( rc == 0 ) {
uint32_t elem_bits = 0, elem_off = 0, elem_cnt = 0;
- rc = VCursorCellDataDirect(self->curs, spotid, self->idx,
- &elem_bits, &base, &elem_off, &elem_cnt);
+ rc = VCursorCellDataDirect(c, spotid, row_id,
+ &elem_bits, &base, &elem_off, &elem_cnt);
if (rc != 0) {
+ const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+ : self->basesType == ebtREAD ? "READ" : "RAW_READ";
PLOGERR(klogInt, (klogErr, rc,
- "while VCursorCellDataDirect(READ, $(type))",
- "type=%s", self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE"));
+ "while VCursorCellDataDirect($(name))",
+ "name=%s", name));
BasesRelease(self);
- return;
+ return rc;
}
row_bits = elem_cnt * elem_bits;
}
+ if ( rc != 0 )
+ return rc;
+
if ((row_bits % 8) != 0) {
+ const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+ : self->basesType == ebtREAD ? "READ" : "RAW_READ";
rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
PLOGERR(klogInt, (klogErr, rc, "Invalid row_bits '$(row_bits) "
- "while VCursorCellDataDirect(READ, $(type), spotid=$(spotid))",
- "row_bits=%lu,type=%s,spotid=%lu",
- row_bits, self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE", spotid));
+ "while VCursorCellDataDirect($(name), spotid=$(spotid))",
+ "row_bits=%lu,name=%s,spotid=%lu", row_bits, name, spotid));
BasesRelease(self);
- return;
+ return rc;
}
row_bits /= 8;
bases = base;
+
for (i = 0; i < row_bits; ++i) {
unsigned char base = bases[i];
+ if ( i == nxtRdStart) {
+ if ( read > nreads )
+ return RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
+ nxtRdStart += dREAD_LEN [ read ++ ];
+ if ( ( (dREAD_TYPE[read-1] & SRA_READ_TYPE_BIOLOGICAL) == 0 ) )
+ /* skip non-biological reads */
+ {
+ i += dREAD_LEN [ read - 1 ] - 1;
+ continue;
+ }
+ }
+
+ if ( alignment ) {
+ const unsigned char x [16]
+ = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, };
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ A C G T N */
+ if (base > 15) {
+ rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
+ PLOGERR(klogInt, (klogErr, rc, "Invalid RAW_READ column "
+ "value '$(base)' while VCursorCellDataDirect"
+ "(spotid=$(spotid), index=$(i))",
+ "base=%d,spotid=%lu,i=%lu", base, spotid, i));
+ BasesRelease(self);
+ return rc;
+ }
+ assert ( base < sizeof x / sizeof x [ 0 ] );
+ base = x [ base ];
+ }
+
if (base > 4) {
+ const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+ : self->basesType == ebtREAD ? "READ" : "RAW_READ";
rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
PLOGERR(klogInt, (klogErr, rc,
- "Invalid READ column value '$(base) while VCursorCellDataDirect"
- "($(type), spotid=$(spotid), index=$(i))",
- "base=%d,type=%s,spotid=%lu,index=%lu",
- base, self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE",
- spotid, i));
+ "Invalid READ column value '$(base)' while VCursorCellDataDirect"
+ "($(name), spotid=$(spotid), index=$(i))",
+ "base=%d,name=%s,spotid=%lu,i=%lu",
+ base, name, spotid, i));
BasesRelease(self);
- return;
+ return rc;
}
+
++self->cnt[base];
}
+
+ return 0;
}
static rc_t BasesPrint(const Bases *self,
@@ -510,10 +777,10 @@ static rc_t BasesPrint(const Bases *self,
return rc;
}
- name = self->CS_NATIVE ? "0123." : "ACGTN";
+ name = self->basesType == ebtCSREAD ? "0123." : "ACGTN";
- OUTMSG(("%s<%s cs_native=\"%s\" count=\"%lu\">\n",
- indent, tag, self->CS_NATIVE ? "true" : "false", base_count));
+ OUTMSG(("%s<%s cs_native=\"%s\" count=\"%lu\">\n", indent,
+ tag, self->basesType == ebtCSREAD ? "true" : "false", base_count));
for (i = 0; i < 5; ++i) {
OUTMSG(("%s <Base value=\"%c\" count=\"%lu\"/>\n",
@@ -526,31 +793,13 @@ static rc_t BasesPrint(const Bases *self,
self->cnt[3] + self->cnt[4] != base_count)
{
rc = RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
- LOGERR(klogErr, rc, "stored base count did not match observed base count");
+ LOGERR(klogErr, rc,
+ "stored base count did not match observed base count");
}
return rc;
}
-typedef struct {
- uint64_t spot_count;
- uint64_t spot_count_mates;
- uint64_t BIO_BASE_COUNT; /* bio_len */
- uint64_t bio_len_mates;
- uint64_t BASE_COUNT; /* total_len */
- uint64_t bad_spot_count;
- uint64_t bad_bio_len;
- uint64_t filtered_spot_count;
- uint64_t filtered_bio_len;
- uint64_t total_cmp_len; /* CMP_READ : compressed */
-
- bool variable_nreads;
- uint32_t nreads; /* if (nreads == 0) then (nreads is variable) */
- Statistics * stats ; /* nreads elements */
- Statistics2* stats2; /* nreads elements */
-
- Bases bases_count;
-} SraStatsTotal;
static
rc_t SraStatsTotalMakeStatistics(SraStatsTotal* self, uint32_t nreads)
{
@@ -592,7 +841,7 @@ void SraStatsTotalStatistics2Init(SraStatsTotal* self,
uint64_t sum = sums[i];
Statistics2* stats = self->stats2 + i;
assert(stats);
- Statistics2Init(stats, sum, count[i]);
+ Statistics2Init(stats, ( double ) sum, count[i]);
}
}
@@ -652,7 +901,7 @@ static
void SraStatsTotalAdd(SraStatsTotal* self,
uint32_t* values, uint32_t nreads)
{
- int i = ~0;
+ uint32_t i = 0;
assert(self && values);
@@ -677,7 +926,7 @@ void SraStatsTotalAdd(SraStatsTotal* self,
static
void SraStatsTotalAdd2(SraStatsTotal* self, uint32_t* values) {
- int i = 0;
+ uint32_t i = 0;
assert(self && values);
@@ -709,7 +958,7 @@ void print_double_or_int(const char* name, double val, bool variable) {
static void StatisticsPrint(const Statistics* selfs,
uint32_t nreads, const char* indent)
{
- int i = ~0;
+ uint32_t i = 0;
if (nreads) {
assert(selfs && indent);
@@ -733,7 +982,7 @@ static rc_t StatisticsDiff(const Statistics* ss,
{
rc_t rc = 0;
- int i = ~0;
+ uint32_t i = 0;
assert(ss && ss2);
@@ -802,28 +1051,6 @@ static rc_t SraStatsTotalPrintStatistics(
return rc;
}
-typedef struct srastat_parms {
- const char* table_path;
-
- bool xml; /* output format (txt or xml) */
- bool printMeta;
- bool quick; /* quick mode: stats from meta */
- bool skip_members; /* not to print spot_group statistics */
- bool progress; /* show progress */
- bool skip_alignment; /* not to print alignment info */
- bool print_arcinfo;
- bool statistics; /* calculate average and stdev */
- bool test; /* test stdev */
-
- const XMLLogger *logger;
-
- int64_t start, stop;
-
- bool hasSPOT_GROUP;
- bool variableReadLength;
-
- SraStatsTotal total; /* is used in srastat_print */
-} srastat_parms;
typedef struct SraStatsMeta {
bool found;
uint64_t BASE_COUNT;
@@ -856,7 +1083,7 @@ typedef struct SraMeta {
Formatter formatter;
Loader loader;
} SraMeta;
-typedef struct ArcInfo_struct {
+typedef struct ArcInfo {
KTime_t timestamp;
struct {
const char* tag;
@@ -868,15 +1095,6 @@ typedef struct Quality {
uint32_t value;
uint64_t count;
} Quality;
-typedef struct QualityStats {
- Quality* QUALITY;
- size_t allocated;
- size_t used;
-} QualityStats;
-typedef enum EMetaState {
- eMSNotFound,
- eMSFound
-} EMetaState;
typedef struct Count {
EMetaState state;
uint64_t value;
@@ -888,39 +1106,21 @@ typedef struct Counts {
Count BASE_COUNT;
Count SPOT_COUNT;
} Counts;
-typedef struct TableCounts {
- EMetaState state;
- Counts* count;
- size_t allocated;
- size_t used;
-} TableCounts;
-typedef struct Ctx {
- const BSTree* tr;
- const MetaDataStats* meta;
- const SraMeta* info;
- const SraSizeStats* sizes;
- const ArcInfo* arc_info;
- srastat_parms* pb;
- SraStatsTotal* total;
- const VDatabase* db;
- QualityStats quality;
- TableCounts tables;
-} Ctx;
typedef rc_t (CC * RG_callback)(const BAM_HEADER_RG* rg, const void* data);
static
rc_t CC meta_RG_callback(const BAM_HEADER_RG* rg, const void* data)
{
rc_t rc = 0;
- const MetaDataStats* meta = data;
+ const MetaDataStats* meta_stats = data;
- int i = 0;
+ uint32_t i = 0;
bool found = false;
- assert(rg && meta && rg->ID);
+ assert(rg && meta_stats && rg->ID);
- for (i = 0; i < meta->spotGroupN && rc == 0; ++i) {
- SraStatsMeta* ss = &meta->spotGroup[i];
+ for (i = 0; i < meta_stats->spotGroupN && rc == 0; ++i) {
+ SraStatsMeta* ss = &meta_stats->spotGroup[i];
const char* spot_group = ss->spot_group;
assert(spot_group);
@@ -987,7 +1187,7 @@ static rc_t QualityParse(Quality* self,
{
rc_t rc = 0;
const char start[] = "PHRED_";
- int i = strlen(start);
+ size_t i = strlen(start);
assert(self && name);
@@ -1282,7 +1482,7 @@ rc_t TableCountsRead(TableCounts* self, const VDatabase* db)
memset(self, 0, sizeof *self);
if (db == NULL)
- { return 0; }
+ return 0;
rc = VDatabaseListTbl(db, &names);
DISP_RC(rc, "while calling VDatabaseListTbl");
@@ -2134,20 +2334,20 @@ static rc_t printXmlEscaped ( const char * c ) {
}
static
-void srastatmeta_print(const MetaDataStats* meta, srastat_parms *pb)
+void srastatmeta_print(const MetaDataStats* meta_stats, srastat_parms *pb)
{
uint32_t i = 0;
- assert(pb && meta);
+ assert(pb && meta_stats);
- if (meta->spotGroupN) {
- for (i = 0; i < meta->spotGroupN; ++i) {
- const SraStatsMeta* ss = &meta->spotGroup[i];
+ if (meta_stats->spotGroupN) {
+ for (i = 0; i < meta_stats->spotGroupN; ++i) {
+ const SraStatsMeta* ss = &meta_stats->spotGroup[i];
if (!pb->skip_members) {
const char* spot_group = "";
if (strcmp("default", ss->spot_group) != 0)
{ spot_group = ss->spot_group; }
if ((spot_group == NULL || spot_group[0] == '\0')
- && meta->spotGroupN > 1)
+ && meta_stats->spotGroupN > 1)
{
/* skip an empty "extra" 'default' spot_group */
if (ss->BASE_COUNT == 0 && ss->BIO_BASE_COUNT == 0 &&
@@ -2191,13 +2391,13 @@ void srastatmeta_print(const MetaDataStats* meta, srastat_parms *pb)
if (!pb->xml && !(pb->quick && pb->skip_members)) {
const char* spot_group = "";
OUTMSG(("%s|%s|%ld:%ld:%ld|:|:|:\n",
- pb->table_path, spot_group, meta->table.spot_count,
- meta->table.BASE_COUNT, meta->table.BIO_BASE_COUNT));
+ pb->table_path, spot_group, meta_stats->table.spot_count,
+ meta_stats->table.BASE_COUNT, meta_stats->table.BIO_BASE_COUNT));
}
- pb->total.spot_count = meta->table.spot_count;
- pb->total.BASE_COUNT = meta->table.BASE_COUNT;
- pb->total.BIO_BASE_COUNT = meta->table.BIO_BASE_COUNT;
- pb->total.total_cmp_len = meta->table.CMP_BASE_COUNT;
+ pb->total.spot_count = meta_stats->table.spot_count;
+ pb->total.BASE_COUNT = meta_stats->table.BASE_COUNT;
+ pb->total.BIO_BASE_COUNT = meta_stats->table.BIO_BASE_COUNT;
+ pb->total.total_cmp_len = meta_stats->table.CMP_BASE_COUNT;
}
}
@@ -2246,7 +2446,7 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
{
rc_t rc = 0;
uint32_t count = 0;
- int i = 0;
+ uint32_t i = 0;
const VDBDependencies* dep = NULL;
assert(indent && ctx);
@@ -2285,11 +2485,20 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
DISP_RC(rc, "while calling VDBDependenciesName");
break;
}
+
rc = VDBDependenciesPath(dep, &path, i);
if (rc != 0) {
DISP_RC(rc, "while calling VDBDependenciesPath");
break;
}
+ if ( path == NULL ) {
+ rc = VDBDependenciesPathRemote(dep, &path, i);
+ if (rc != 0) {
+ DISP_RC(rc, "while calling VDBDependenciesPath");
+ break;
+ }
+ }
+
rc = VDBDependenciesLocal(dep, &local, i);
if (rc != 0) {
DISP_RC(rc, "while calling VDBDependenciesLocal");
@@ -2318,6 +2527,181 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
return rc;
}
+/******************************************************************************/
+
+static
+void printChangeValue ( const char * value,
+ size_t size )
+{
+ assert ( value );
+
+ switch ( size ) {
+ case 1:
+ OUTMSG ( ( "%hu", ( ( const uint8_t * ) value ) [ 0 ] ) );
+ break;
+ case 2:
+ OUTMSG ( ( "%u", ( ( const uint16_t * ) value ) [ 0 ] ) );
+ break;
+ case 4:
+ OUTMSG ( ( "%u", ( ( const uint32_t * ) value ) [ 0 ] ) );
+ break;
+ case 8:
+ OUTMSG ( ( "%lu", ( ( const uint64_t * ) value ) [ 0 ] ) );
+ break;
+ default: {
+ size_t i = 0;
+ for ( i = 0; i < size; ++ i ) {
+ char c = value [ i ];
+ if ( isprint ( c ) )
+ OUTMSG ( ( "%c", c ) );
+ else
+ OUTMSG ( ( "\\x%02X", ( ( const uint8_t* ) value ) [ i ] ));
+ }
+ break;
+ }
+ }
+}
+
+static
+rc_t KMDataNodePrintAttr ( const KMDataNode * self,
+ const KNamelist * attrs,
+ uint32_t idx )
+{
+ rc_t rc = 0;
+
+ const char * attr = NULL;
+ char value [ 512 ] = "";
+ size_t size = 0;
+
+ rc = KNamelistGet ( attrs, idx, & attr );
+
+ if ( rc == 0 )
+ rc = KMDataNodeReadAttr ( self, attr, value, sizeof value, & size );
+
+ if ( rc == 0 )
+ rc = OUTMSG ( ( " %s=\"%s\"", attr, value ) );
+
+ if ( rc != 0 )
+ OUTMSG ( ( " CANNOT_READ_ATTRIBULE=\"%R\"", rc ) );
+
+ return rc;
+}
+
+static
+rc_t KMDataNodePrintChange ( const KMDataNode * self,
+ const char * name,
+ const char * indent )
+{
+ rc_t rc = 0;
+
+ const KMDataNode * node = NULL;
+
+ rc = KMDataNodeOpenNodeRead ( self, & node, name );
+ DISP_RC2 ( rc, name,
+ "while calling KMDataNodeOpenNodeRead" );
+
+ if ( rc == 0 ) {
+ const char tag [] = "Change";
+ char value [] = "sra-stat: ERROR WHILE READING CHANGES CHILD VALUE";
+ size_t size = 0;
+
+ KNamelist * names = NULL;
+ uint32_t count = 0;
+
+ KMDataNodeRead ( node, 0, value, sizeof value, & size, NULL );
+ if ( size == 0 )
+ size = sizeof value;
+
+ rc = KMDataNodeListAttr ( node, & names );
+ DISP_RC2 ( rc, name,
+ "while calling KMDataNodeListAttr" );
+
+ if ( rc == 0 )
+ rc = KNamelistCount ( names, & count );
+
+ if ( rc == 0 ) {
+ uint32_t i = 0;
+
+ OUTMSG ( ( " %s" "<%s", indent, tag ) );
+ for ( i = 0; i < count; ++i )
+ KMDataNodePrintAttr ( node, names, i );
+ OUTMSG ( ( ">" ) );
+
+ printChangeValue ( value, size );
+
+ OUTMSG ( ( "</%s>\n", tag ) );
+ }
+
+ RELEASE ( KNamelist, names );
+ }
+
+ RELEASE ( KMDataNode, node );
+
+ return rc;
+}
+
+static rc_t CtxPrintCHANGES ( const Ctx * self, const char * indent ) {
+ rc_t rc = 0;
+
+ const KMetadata * meta = NULL;
+ const KMDataNode * parent = NULL;
+ bool toReleaseMeta = false;
+
+ const char meta_tag [] = "CHANGES";
+ const char tag [] = "Changes";
+
+ assert ( self && indent );
+
+ meta = self -> meta;
+ if ( meta == NULL ) {
+ rc = VDatabaseOpenMetadataRead ( self -> db, & meta );
+ if ( rc != 0 )
+ return rc;
+ toReleaseMeta = true;
+ }
+
+ KMetadataOpenNodeRead ( meta, & parent, meta_tag );
+
+ if ( parent != NULL ) {
+ KNamelist * names = NULL;
+ rc = KMDataNodeListChild ( parent, & names );
+ DISP_RC2 ( rc, meta_tag, "while calling KMDataNodeListChild" );
+
+ OUTMSG ( ( "%s" "<%s>" "\n", indent, tag ) );
+
+ if ( rc == 0 ) {
+ uint32_t count = 0;
+ rc = KNamelistCount ( names, & count );
+ DISP_RC2 ( rc, meta_tag, "while calling KNamelistCount" );
+
+ if ( rc == 0 ) {
+ uint32_t i = 0;
+ for ( i = 0; i < count && rc == 0; ++i ) {
+ const char * child = NULL;
+ rc = KNamelistGet ( names, i, & child );
+ if ( rc != 0 )
+ PLOGERR ( klogInt, ( klogInt, rc,
+ "while calling $(n)::KNamelistGet($(idx))",
+ "n=%s,idx=%i", meta_tag, i ) );
+ else
+ rc = KMDataNodePrintChange ( parent, child, indent );
+ }
+ }
+ }
+
+ OUTMSG ( ( "%s" "</%s>" "\n", indent, tag ) );
+
+ RELEASE ( KNamelist, names );
+ }
+
+ if ( toReleaseMeta )
+ RELEASE ( KMetadata, meta );
+
+ RELEASE ( KMDataNode, parent );
+
+ return rc;
+}
+
static
rc_t print_results(const Ctx* ctx)
{
@@ -2326,23 +2710,23 @@ rc_t print_results(const Ctx* ctx)
bool mismatch = false;
assert(ctx && ctx->pb
- && ctx->tr && ctx->sizes && ctx->info && ctx->meta && ctx->total);
+ && ctx->tr && ctx->sizes && ctx->info && ctx->meta_stats && ctx->total);
if (ctx->pb->quick) {
- if ( ! ctx->meta -> found )
+ if ( ! ctx->meta_stats -> found )
LOGMSG(klogWarn, "Statistics metadata not found");
else
{
- ctx->pb->hasSPOT_GROUP = ctx->meta->spotGroupN > 1 ||
- (ctx->meta->spotGroupN == 1
- && strcmp("default", ctx->meta->spotGroup[0].spot_group) != 0);
+ ctx->pb->hasSPOT_GROUP = ctx->meta_stats->spotGroupN > 1 ||
+ (ctx->meta_stats->spotGroupN == 1
+ && strcmp("default", ctx->meta_stats->spotGroup[0].spot_group) != 0);
}
}
- if (ctx->meta->found && ! ctx->pb->quick) {
+ if (ctx->meta_stats->found && ! ctx->pb->quick) {
/* bool mismatch = false; */
SraStats* ss = (SraStats*)BSTreeFind(ctx->tr, "", srastats_cmp);
- const SraStatsMeta* m = &ctx->meta->table;
+ const SraStatsMeta* m = &ctx->meta_stats->table;
if (ctx->total->BASE_COUNT != m->BASE_COUNT)
{ mismatch = true; }
if (ctx->total->BIO_BASE_COUNT != m->BIO_BASE_COUNT)
@@ -2352,11 +2736,11 @@ rc_t print_results(const Ctx* ctx)
if (ctx->total->total_cmp_len != m->CMP_BASE_COUNT)
{ mismatch = true; }
if (ss != NULL) {
- const SraStatsMeta* m = &ctx->meta->table;
+ const SraStatsMeta* m = &ctx->meta_stats->table;
uint32_t i = 0;
- for (i = 0; i < ctx->meta->spotGroupN && !mismatch; ++i) {
+ for (i = 0; i < ctx->meta_stats->spotGroupN && !mismatch; ++i) {
const char* spot_group = "";
- m = &ctx->meta->spotGroup[i];
+ m = &ctx->meta_stats->spotGroup[i];
assert(m);
if (strcmp("default", m->spot_group) != 0)
{ spot_group = m->spot_group; }
@@ -2379,18 +2763,18 @@ rc_t print_results(const Ctx* ctx)
if (ctx->pb->xml) {
OUTMSG(("<Run accession=\"%s\"", ctx->pb->table_path));
- if (!ctx->pb->quick || ! ctx->meta->found) {
+ if (!ctx->pb->quick || ! ctx->meta_stats->found) {
OUTMSG((" read_length=\"%s\"",
ctx->pb->variableReadLength ? "variable" : "fixed"));
}
if (ctx->pb->quick) {
OUTMSG((" spot_count=\"%ld\" base_count=\"%ld\"",
- ctx->meta->table.spot_count, ctx->meta->table.BASE_COUNT));
+ ctx->meta_stats->table.spot_count, ctx->meta_stats->table.BASE_COUNT));
OUTMSG((" base_count_bio=\"%ld\"",
- ctx->meta->table.BIO_BASE_COUNT));
- if (ctx->meta->table.CMP_BASE_COUNT > 0) {
+ ctx->meta_stats->table.BIO_BASE_COUNT));
+ if (ctx->meta_stats->table.CMP_BASE_COUNT > 0) {
OUTMSG((" cmp_base_count=\"%ld\"",
- ctx->meta->table.CMP_BASE_COUNT));
+ ctx->meta_stats->table.CMP_BASE_COUNT));
}
}
else {
@@ -2413,8 +2797,8 @@ rc_t print_results(const Ctx* ctx)
else if (ctx->pb->skip_members) {
if (ctx->pb->quick) {
OUTMSG(("%s||%ld:%ld:%ld|:|:|:\n",
- ctx->pb->table_path, ctx->meta->table.spot_count,
- ctx->meta->table.BASE_COUNT, ctx->meta->table.BIO_BASE_COUNT));
+ ctx->pb->table_path, ctx->meta_stats->table.spot_count,
+ ctx->meta_stats->table.BASE_COUNT, ctx->meta_stats->table.BIO_BASE_COUNT));
}
else {
OUTMSG(("%s||%ld:%ld:%ld|%ld:%ld|%ld:%ld|%ld:%ld\n",
@@ -2427,14 +2811,14 @@ rc_t print_results(const Ctx* ctx)
}
}
- if (ctx->pb->quick && ctx->meta->found) {
+ if (ctx->pb->quick && ctx->meta_stats->found) {
memset(&ctx->pb->total, 0, sizeof ctx->pb->total);
- rc = parse_bam_header(ctx->db, meta_RG_callback, ctx->meta);
- srastatmeta_print(ctx->meta, ctx->pb);
- if (ctx->pb->total.spot_count != ctx->meta->table.spot_count ||
- ctx->pb->total.BIO_BASE_COUNT != ctx->meta->table.BIO_BASE_COUNT ||
- ctx->pb->total.BASE_COUNT != ctx->meta->table.BASE_COUNT ||
- ctx->pb->total.total_cmp_len != ctx->meta->table.CMP_BASE_COUNT)
+ rc = parse_bam_header(ctx->db, meta_RG_callback, ctx->meta_stats);
+ srastatmeta_print(ctx->meta_stats, ctx->pb);
+ if (ctx->pb->total.spot_count != ctx->meta_stats->table.spot_count ||
+ ctx->pb->total.BIO_BASE_COUNT != ctx->meta_stats->table.BIO_BASE_COUNT ||
+ ctx->pb->total.BASE_COUNT != ctx->meta_stats->table.BASE_COUNT ||
+ ctx->pb->total.total_cmp_len != ctx->meta_stats->table.CMP_BASE_COUNT)
{
rc = RC(rcExe, rcData, rcValidating, rcData, rcUnequal);
}
@@ -2444,8 +2828,8 @@ rc_t print_results(const Ctx* ctx)
memset(&ctx->pb->total, 0, sizeof ctx->pb->total);
rc = parse_bam_header(ctx->db, tree_RG_callback, ctx->tr);
BSTreeForEach(ctx->tr, false, srastat_print, ctx->pb);
- if (ctx->meta->found) {
- const SraStatsMeta* m = &ctx->meta->table;
+ if (ctx->meta_stats->found) {
+ const SraStatsMeta* m = &ctx->meta_stats->table;
if (ctx->pb->total.BASE_COUNT != m->BASE_COUNT
|| ctx->pb->total.BIO_BASE_COUNT != m->BIO_BASE_COUNT
|| ctx->pb->total.spot_count != m->spot_count)
@@ -2520,7 +2904,7 @@ rc_t print_results(const Ctx* ctx)
}
if (rc == 0 && !ctx->pb->quick) {
rc2 = BasesPrint(&ctx->total->bases_count,
- ctx->total->BASE_COUNT, " ");
+ ctx->total->BIO_BASE_COUNT, " ");
}
if (rc == 0 && !ctx->pb->skip_alignment) {
rc = process_align_info(" ", ctx);
@@ -2540,7 +2924,7 @@ rc_t print_results(const Ctx* ctx)
}
else {
size_t k = strftime(b, sizeof(b), "%Y-%m-%dT%H:%M:%S", tm);
- OUTMSG((" <Archive timestamp=\"%.*s\"", k, b));
+ OUTMSG((" <Archive timestamp=\"%.*s\"", ( int ) k, b));
for(i = 0; i < sizeof(a->i) / sizeof(a->i[0]); i++) {
OUTMSG((" size.%s=\"%lu\" md5.%s=\"%s\"",
a->i[i].tag, a->i[i].size, a->i[i].tag, a->i[i].md5));
@@ -2552,6 +2936,14 @@ rc_t print_results(const Ctx* ctx)
{ rc = QualityStatsPrint(&ctx->quality, " "); }
if (rc == 0)
{ rc = TableCountsPrint(&ctx->tables, " "); }
+ if ( rc == 0 )
+ rc = CtxPrintCHANGES ( ctx, " " );
+ if ( rc == 0 && ctx -> n90 > 0 )
+ OUTMSG ( (" <AssemblyStatistics "
+ "n50=\"%lu\" l50=\"%lu\" n90=\"%lu\" l90=\"%lu\" "
+ "n=\"%lu\" l=\"%lu\"/>\n",
+ ctx -> n50, ctx -> l50, ctx -> n90, ctx -> l90,\
+ ctx -> n, ctx -> l ) );
OUTMSG(("</Run>\n"));
}
if (mismatch && ctx->pb->start == 0 && ctx->pb->stop == 0) {
@@ -2581,7 +2973,7 @@ int64_t CC srastats_sort ( const BSTNode *item, const BSTNode *n )
}
static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
- SraStatsTotal* total, const VTable *vtbl)
+ SraStatsTotal* total, const Ctx * ctx, const VTable *vtbl)
{
rc_t rc = 0;
@@ -2684,18 +3076,6 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
rc = VCursorIdRange(curs, 0, &first, &count);
DISP_RC(rc, "VCursorIdRange() failed");
if (rc == 0) {
- rc = BasesInit(&total->bases_count, vtbl);
- }
- if (rc == 0) {
- const KLoadProgressbar *pr = NULL;
- bool bad_read_filter = false;
- bool fixedNReads = true;
- bool fixedReadLength = true;
-
- uint32_t g_dREAD_LEN[MAX_NREADS];
-
- memset(g_dREAD_LEN, 0, sizeof g_dREAD_LEN);
-
if (pb->start > 0) {
start = pb->start;
if (start < first) {
@@ -2708,13 +3088,26 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
if (pb->stop > 0) {
stop = pb->stop;
- if (stop > first + count) {
+ if ( ( uint64_t ) stop > first + count) {
stop = first + count;
}
}
else {
stop = first + count;
}
+ }
+ if (rc == 0) {
+ rc = BasesInit(&total->bases_count, ctx, vtbl, pb);
+ }
+ if (rc == 0) {
+ const KLoadProgressbar *pr = NULL;
+ bool bad_read_filter = false;
+ bool fixedNReads = true;
+ bool fixedReadLength = true;
+
+ uint32_t g_dREAD_LEN[MAX_NREADS];
+
+ memset(g_dREAD_LEN, 0, sizeof g_dREAD_LEN);
for (spotid = start; spotid < stop && rc == 0; ++spotid) {
SraStats* ss;
@@ -2733,7 +3126,13 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
}
if (rc == 0 && pb->progress && pr == NULL) {
- rc = KLoadProgressbar_Make(&pr, stop + 1 - start);
+ uint64_t b = total->bases_count.stopSEQUENCE + 1
+ - total->bases_count.startSEQUENCE;
+ if ( total->bases_count.stopALIGNMENT > 0 )
+ b += total->bases_count.stopALIGNMENT + 1
+ - total->bases_count.startALIGNMENT;
+ rc = KLoadProgressbar_Make(&pr,
+ stop + 1 - start + b);
if (rc != 0) {
DISP_RC(rc, "cannot initialize progress bar");
rc = 0;
@@ -2755,11 +3154,11 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
rc = RC(rcExe, rcColumn, rcReading,
rcOffset, rcInvalid);
}
- if (row_bits & 7) {
+ else if (row_bits & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcSize, rcInvalid);
}
- if ((row_bits >> 3) > sizeof(dREAD_LEN)) {
+ else if ((row_bits >> 3) > sizeof(dREAD_LEN)) {
rc = RC(rcExe, rcColumn, rcReading,
rcBuffer, rcInsufficient);
}
@@ -2768,9 +3167,10 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
}
if (rc == 0) {
int i, bio_len, bio_count, bad_cnt, filt_cnt;
- memmove(dREAD_LEN,
- ((const char*)base) + (boff>>3), row_bits >> 3);
- nreads = (row_bits >> 3) / sizeof(*dREAD_LEN);
+ memmove(dREAD_LEN, ((const char*)base) + (boff>>3),
+ ( size_t ) row_bits >> 3);
+ nreads
+ = (int) ((row_bits >> 3) / sizeof(*dREAD_LEN));
if (spotid == start) {
g_nreads = nreads;
if (pb->statistics) {
@@ -2792,15 +3192,15 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
rc = RC(rcExe, rcColumn, rcReading,
rcOffset, rcInvalid);
}
- if (row_bits & 7) {
+ else if (row_bits & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcSize, rcInvalid);
}
- if ((row_bits >> 3) > sizeof(dREAD_TYPE)) {
+ else if ((row_bits >> 3)
+ > sizeof(dREAD_TYPE))
rc = RC(rcExe, rcColumn, rcReading,
rcBuffer, rcInsufficient);
- }
- if ((row_bits >> 3) != nreads) {
+ else if ((row_bits >> 3) != nreads) {
rc = RC(rcExe, rcColumn, rcReading,
rcData, rcIncorrect);
}
@@ -2811,7 +3211,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
if (rc == 0) {
memmove(dREAD_TYPE,
((const char*)base) + (boff >> 3),
- row_bits >> 3);
+ ( size_t ) row_bits >> 3);
if (idxSPOT_GROUP != 0) {
rc = VCursorColumnRead(curs, spotid,
idxSPOT_GROUP, &base, &boff, &row_bits);
@@ -2824,11 +3224,11 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
rcReading,
rcOffset, rcInvalid);
}
- if (row_bits & 7) {
+ else if (row_bits & 7) {
rc = RC(rcExe, rcColumn,
rcReading,
rcSize, rcInvalid); }
- if ((row_bits >> 3)
+ else if ((row_bits >> 3)
> sizeof(dSPOT_GROUP))
{
rc = RC(rcExe, rcColumn,
@@ -2839,10 +3239,10 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
"after calling VCursorColumnRead"
);
if (rc == 0) {
- int n = row_bits >> 3;
+ bitsz_t n = row_bits >> 3;
memmove(dSPOT_GROUP,
((const char*)base)+(boff>>3),
- row_bits>>3);
+ ( size_t ) row_bits>>3);
dSPOT_GROUP[n]='\0';
if (n > 1 ||
(n == 1 && dSPOT_GROUP[0]))
@@ -2868,15 +3268,15 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
DISP_RC_Read(rc, RD_FILTER, spotid,
"while calling VCursorColumnRead");
if (rc == 0) {
- int size = row_bits >> 3;
+ bitsz_t size = row_bits >> 3;
if (boff & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcOffset, rcInvalid); }
- if (row_bits & 7) {
+ else if (row_bits & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcSize, rcInvalid);
}
- if (size > sizeof dRD_FILTER) {
+ else if (size > sizeof dRD_FILTER) {
rc = RC(rcExe, rcColumn, rcReading,
rcBuffer, rcInsufficient);
}
@@ -2885,7 +3285,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
if (rc == 0) {
memmove(dRD_FILTER,
((const char*)base) + (boff>>3),
- size);
+ ( size_t ) size);
if (size < nreads) {
/* RD_FILTER is expected to have nreads elements */
if (size == 1) {
@@ -2936,7 +3336,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
if (boff & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcOffset, rcInvalid); }
- if (row_bits & 7) {
+ else if (row_bits & 7) {
rc = RC(rcExe, rcColumn, rcReading,
rcSize, rcInvalid);
}
@@ -2977,8 +3377,6 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
ss->total_cmp_len += cmp_len;
total->total_cmp_len += cmp_len;
- BasesAdd(&total->bases_count, spotid);
-
if (pb->statistics) {
SraStatsTotalAdd(total, dREAD_LEN, nreads);
}
@@ -3075,6 +3473,32 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
} /* for (spotid = start; spotid <= stop && rc == 0;
++spotid) */
+ for (spotid = total->bases_count.startALIGNMENT;
+ !pb->quick &&
+ spotid < total->bases_count.stopALIGNMENT && rc == 0;
+ ++spotid)
+ {
+ rc = BasesAdd(&total->bases_count, spotid, true);
+ if ( rc == 0 && pb->progress )
+ KLoadProgressbar_Process ( pr, 1, false );
+ rc = Quitting();
+ if (rc != 0)
+ LOGMSG(klogWarn, "Interrupted");
+ }
+
+ for (spotid = total->bases_count.startSEQUENCE;
+ !pb->quick &&
+ spotid < total->bases_count.stopSEQUENCE && rc == 0;
+ ++spotid)
+ {
+ rc = BasesAdd(&total->bases_count, spotid, false);
+ if ( rc == 0 && pb->progress )
+ KLoadProgressbar_Process ( pr, 1, false );
+ rc = Quitting();
+ if (rc != 0)
+ LOGMSG(klogWarn, "Interrupted");
+ }
+
if (rc == 0) {
BasesFinalize(&total->bases_count);
pb->variableReadLength = !fixedReadLength;
@@ -3150,7 +3574,8 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
rcBuffer, rcInsufficient);
}
if (rc == 0) {
- memmove(dREAD_LEN, ((const char*)base) + (boff>>3), row_bits>>3);
+ memmove(dREAD_LEN, ((const char*)base) + (boff>>3),
+ ( size_t ) row_bits>>3);
}
for (i = 0; i < g_nreads; ++i) {
diff_sq[i] +=
@@ -3183,13 +3608,16 @@ rc_t run(srastat_parms* pb)
assert(pb && pb->table_path);
rc = VDBManagerMakeRead(&vmgr, NULL);
- if (rc != 0) {
+ if (rc != 0)
LOGERR(klogInt, rc, "failed to open VDBManager");
- }
else {
SraSizeStats sizes;
ArcInfo arc_info;
SraMeta info;
+
+ const VDatabase * db = NULL; /* sra-srat argument is a DB */
+ const VTable * tbl = NULL; /* sra-srat argument is a table:
+ don't release it*/
const VTable* vtbl = NULL;
VSchema *schema = NULL;
@@ -3201,11 +3629,12 @@ rc_t run(srastat_parms* pb)
if (rc == 0) {
rc = VDBManagerOpenTableRead(vmgr, &vtbl,
schema, "%s", pb->table_path);
- if (rc != 0 && GetRCObject(rc) == (enum RCObject)rcTable
- && GetRCState (rc) == rcIncorrect)
+ if (rc == 0 )
+ tbl = vtbl; /* sra-srat argument is a table */
+ else if ( GetRCObject(rc) == (enum RCObject)rcTable
+ && GetRCState (rc) == rcIncorrect)
{
const char altname[] = "SEQUENCE";
- const VDatabase *db = NULL;
rc_t rc2 = VDBManagerOpenDBRead(vmgr,
&db, schema, pb->table_path);
if (rc2 == 0) {
@@ -3214,7 +3643,6 @@ rc_t run(srastat_parms* pb)
rc = 0;
}
}
- VDatabaseRelease ( db );
}
if (rc != 0) {
PLOGERR(klogInt, (klogInt, rc,
@@ -3226,13 +3654,15 @@ rc_t run(srastat_parms* pb)
SraStatsTotal total;
const KTable* ktbl = NULL;
const KMetadata* meta = NULL;
- const VDatabase* db = NULL;
BSTree tr;
Ctx ctx;
BSTreeInit(&tr);
+
memset(&ctx, 0, sizeof ctx);
+ ctx . db = db;
+ ctx . tbl = tbl;
memset(&total, 0, sizeof total);
@@ -3243,11 +3673,6 @@ rc_t run(srastat_parms* pb)
DISP_RC(rc, "While calling KTableOpenMetadataRead");
}
if (rc == 0) {
- rc = VTableOpenParentRead(vtbl, &db);
- DISP_RC2(rc, pb->table_path,
- "while calling VTableOpenParentRead");
- }
- if (rc == 0) {
rc = get_stats_meta(meta, &stats, pb->quick);
if (rc == 0) {
if (pb->quick && !stats.found) {
@@ -3265,7 +3690,7 @@ rc_t run(srastat_parms* pb)
rc = get_load_info(meta, &info);
}
if (rc == 0 && !pb->quick) {
- rc = sra_stat(pb, &tr, &total, vtbl);
+ rc = sra_stat(pb, &tr, &total, &ctx, vtbl);
}
if (rc == 0 && pb->print_arcinfo ) {
rc = get_arc_info(pb->table_path, &arc_info, vmgr, vtbl);
@@ -3282,10 +3707,13 @@ rc_t run(srastat_parms* pb)
TableCountsSort(&ctx.tables);
}
}
+ if ( rc == 0 )
+ rc = CalculateNL ( db, & ctx );
if (rc == 0) {
- ctx.db = db;
+ if ( db == NULL )
+ ctx . meta = meta;
ctx.info = &info;
- ctx.meta = &stats;
+ ctx.meta_stats = &stats;
ctx.pb = pb;
ctx.sizes = &sizes;
ctx.total = &total;
@@ -3295,10 +3723,9 @@ rc_t run(srastat_parms* pb)
}
BSTreeWhack(&tr, bst_whack_free, NULL);
SraStatsTotalFree(&total);
- RELEASE(VDatabase, db);
RELEASE(KTable, ktbl);
{
- int i;
+ uint32_t i;
for (i = 0; i < stats.spotGroupN; ++i) {
SraStatsMetaDestroy(&stats.spotGroup[i]);
}
@@ -3311,6 +3738,7 @@ rc_t run(srastat_parms* pb)
CtxRelease(&ctx);
RELEASE(KMetadata, meta);
}
+ RELEASE(VDatabase, db);
RELEASE(VSchema, schema);
RELEASE(VTable, vtbl);
}
=====================================
tools/sra-stat/sra-stat.h
=====================================
--- a/tools/sra-stat/sra-stat.h
+++ b/tools/sra-stat/sra-stat.h
@@ -33,10 +33,61 @@ struct KFile;
struct VCursor;
struct VTable;
+typedef enum EMetaState {
+ eMSNotFound,
+ eMSFound
+} EMetaState;
+
+typedef struct QualityStats {
+ struct Quality* QUALITY;
+ size_t allocated;
+ size_t used;
+} QualityStats;
+
+typedef struct TableCounts {
+ EMetaState state;
+ struct Counts* count;
+ size_t allocated;
+ size_t used;
+} TableCounts;
+
+typedef struct Ctx {
+ const struct BSTree* tr;
+ const struct MetaDataStats* meta_stats;
+ const struct SraMeta* info;
+ const struct SraSizeStats* sizes;
+ const struct ArcInfo* arc_info;
+ struct srastat_parms* pb;
+ struct SraStatsTotal* total;
+
+ const struct VDatabase * db; /* sra-srat argument is a DB */
+ const struct VTable * tbl; /* sra-srat argument is a table */
+
+ const struct KMetadata* meta; /* from Table (when running on table) */
+ QualityStats quality;
+ TableCounts tables;
+
+ uint64_t n;
+ uint64_t l;
+ uint64_t n50;
+ uint64_t l50;
+ uint64_t n90;
+ uint64_t l90;
+} Ctx;
+
rc_t CC VCursorColumnRead(const struct VCursor *self, int64_t id,
uint32_t idx, const void **base, bitsz_t *offset, bitsz_t *size);
rc_t CC VTableMakeSingleFileArchive(const struct VTable *self,
const struct KFile **sfa, bool lightweight);
+rc_t CC CalculateNL ( const struct VDatabase * db, Ctx * ctx );
+
+#define RELEASE(type, obj) do { rc_t rc2 = type##Release(obj); \
+ if (rc2 && !rc) { rc = rc2; } obj = NULL; } while (false)
+
+
+#define DISP_RC(rc, msg) (void)((rc == 0) ? 0 : LOGERR(klogInt, rc, msg))
+
+
#endif /* _h_sra_stat_tools_ */
View it on GitLab: https://salsa.debian.org/med-team/sra-sdk/compare/8717fcc9b25b5fd443131a29f2de215eea42955f...de21e4654473da150b5a90678d3b3c7b853fa1c3
---
View it on GitLab: https://salsa.debian.org/med-team/sra-sdk/compare/8717fcc9b25b5fd443131a29f2de215eea42955f...de21e4654473da150b5a90678d3b3c7b853fa1c3
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/debian-med-commit/attachments/20180218/201d1482/attachment-0001.html>
More information about the debian-med-commit
mailing list