[med-svn] [Git][med-team/sra-sdk][master] 6 commits: New upstream version 2.8.2-5+dfsg

Andreas Tille gitlab at salsa.debian.org
Sun Feb 18 14:56:54 UTC 2018


Andreas Tille pushed to branch master at Debian Med / sra-sdk


Commits:
f9830f55 by Andreas Tille at 2018-02-18T08:58:20+01:00
New upstream version 2.8.2-5+dfsg
- - - - -
0c939402 by Andreas Tille at 2018-02-18T08:58:29+01:00
Update upstream source from tag 'upstream/2.8.2-5+dfsg'

Update to upstream version '2.8.2-5+dfsg'
with Debian dir 3e1180b842babd4ed10a1842cd8b72bd973eafbd
- - - - -
d1161b9a by Andreas Tille at 2018-02-18T08:59:47+01:00
Standards-Version: 4.1.3

- - - - -
f6f9c913 by Andreas Tille at 2018-02-18T15:44:03+01:00
New upstream version

- - - - -
ead4ba92 by Andreas Tille at 2018-02-18T15:53:56+01:00
Spelling

- - - - -
de21e465 by Andreas Tille at 2018-02-18T15:55:12+01:00
Realise that build fails with d/compat=11 and upload as is to unstable

- - - - -


13 changed files:

- debian/changelog
- debian/control
- debian/patches/absolute_vschema_path_in_test.patch
- + test/sra-sort/md-created.sh
- tools/sra-sort/csra-tbl.c
- tools/sra-sort/tbl-pair.c
- tools/sra-sort/tbl-pair.h
- tools/sra-sort/xcheck-ref-align.c
- tools/sra-sort/xcheck.h
- tools/sra-stat/Makefile
- + tools/sra-stat/assembly-statistics.c
- tools/sra-stat/sra-stat.c
- tools/sra-stat/sra-stat.h


Changes:

=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,10 @@
+sra-sdk (2.8.2-5+dfsg-1) unstable; urgency=medium
+
+  * New upstream version
+  * Standards-Version: 4.1.3
+
+ -- Andreas Tille <tille at debian.org>  Sun, 18 Feb 2018 15:44:10 +0100
+
 sra-sdk (2.8.2-3+dfsg-1) unstable; urgency=medium
 
   [ Steffen Moeller ]


=====================================
debian/control
=====================================
--- a/debian/control
+++ b/debian/control
@@ -11,7 +11,7 @@ Build-Depends: debhelper (>= 10),
                libhdf5-dev,
                libmagic-dev,
                libxml2-dev
-Standards-Version: 4.1.1
+Standards-Version: 4.1.3
 Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/sra-sdk.git
 Vcs-Git: https://anonscm.debian.org/git/debian-med/sra-sdk.git
 Homepage: https://github.com/ncbi/sra-tools/


=====================================
debian/patches/absolute_vschema_path_in_test.patch
=====================================
--- a/debian/patches/absolute_vschema_path_in_test.patch
+++ b/debian/patches/absolute_vschema_path_in_test.patch
@@ -1,6 +1,6 @@
 Author: Andreas Tille <tille at debian.org>
 Last-Update: Mon, 06 Jun 2016 11:24:46 +0200
-Description: We need to set absolute pathes also in the test
+Description: We need to set absolute paths also in the test
  (test fails anyway for different reasons and is excluded later)
 
 --- a/test/vcf-loader/test-vcf-loader.cpp


=====================================
test/sra-sort/md-created.sh
=====================================
--- /dev/null
+++ b/test/sra-sort/md-created.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+TMP=/export/home/TMP
+if [ ! -d "$TMP" ] ; then
+    echo $TMP is not found: skipping the test
+    exit 0
+fi
+
+if [ "$PANFS_PAN1" == "" ] ; then
+    echo PANFS_PAN1 is not set: exiting
+    exit 1
+fi
+
+I=`whoami`
+DSTDIR=$TMP/$I/sra-sort-md
+DST=$DSTDIR/sorted
+
+rm -fr $DSTDIR
+
+SORT=sra-sort
+which $SORT > /dev/null 2>&1
+if [ "$?" != "0" ] ; then
+    echo "sra-sort not found: add it to your PATH"
+    exit 10
+fi
+
+OPT="--tempdir $DSTDIR --mmapdir $DSTDIR --map-file-bsize 80000000000 --max-ref-idx-ids 4000000000 --max-idx-ids 4000000000 --max-large-idx-ids 800000000"
+SRC=$PANFS_PAN1/sra-test/TEST-DATA/SRR5318091-sra-sort-md
+CMD="$SORT -f -v -L6 $OPT $SRC $DST"
+echo $ $CMD
+
+$CMD
+EXIT=$?
+if [ $EXIT -ne 0 ] ; then
+    echo "sra-sort failed with $EXIT"
+    rm -r $DSTDIR
+    exit $EXIT
+fi
+
+if [ ! -d "$DST/md" ] ; then
+    echo Failure: md was not created in $DST:
+    echo $ ls $DST
+    ls $DST
+    rm -r $DSTDIR
+    exit 20
+else
+    echo Success: md was created in $DST:
+    echo $ ls $DST
+    ls $DST
+fi
+
+rm -r $DSTDIR


=====================================
tools/sra-sort/csra-tbl.c
=====================================
--- a/tools/sra-sort/csra-tbl.c
+++ b/tools/sra-sort/csra-tbl.c
@@ -446,6 +446,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
 
     cSRAPair *csra = self -> csra;
 
+    struct KThread ** pt = NULL;
+    assert ( self );
+    pt = & self -> dad . thread;
+
     RowSetIteratorRelease ( self -> rsi, ctx );
     self -> rsi = NULL;
 
@@ -458,10 +462,10 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
     switch ( self -> align_idx )
     {
     case 1:
-        CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT" );
+        CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> prim_align -> dtbl, "PRIMARY_ALIGNMENT", pt );
         break;
     case 2:
-        CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT" );
+        CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT", pt );
 
 #if SEQUENCE_BEFORE_SECONDARY
         cSRATblPairWhackMappingIdx ( self, ctx );


=====================================
tools/sra-sort/tbl-pair.c
=====================================
--- a/tools/sra-sort/tbl-pair.c
+++ b/tools/sra-sort/tbl-pair.c
@@ -47,6 +47,7 @@ typedef struct TablePair StdTblPair;
 #include <klib/text.h>
 #include <klib/namelist.h>
 #include <klib/rc.h>
+#include <kproc/thread.h> /* KThreadWait */
 
 #include <string.h>
 
@@ -1050,6 +1051,43 @@ void TablePairDestroy ( TablePair *self, const ctx_t *ctx )
     VTableRelease ( self -> stbl );
 
     MemFree ( ctx, ( void* ) self -> full_spec, self -> full_spec_size + 1 );
+    
+    if ( self -> thread != NULL ) {
+        rc_t rc = 0;
+        rc_t status = 0;
+        STATUS ( 2, "waiting for background thread 0x%p to finish...",
+                                                     self -> thread );
+
+        rc = KThreadWait ( self -> thread, & status );
+        if ( rc != 0 )
+            ERROR ( rc, "failed to wait for background thread 0x%p",
+                                                             self -> thread );
+        else if ( status == 0 )
+            STATUS ( 2, "...background thread 0x%p succeed", self -> thread );
+        else {
+            ERROR ( status, "background thread 0x%p failed", self -> thread );
+            rc = status;
+        }
+
+        {
+            rc_t r2 = KThreadRelease ( self -> thread );
+            if ( r2 != 0 ) {
+                ERROR ( r2, "failed to release background thread 0x%p",
+                                                             self -> thread );
+                if ( rc == 0 )
+                    rc = r2;
+            }
+        }
+
+        if ( rc != 0 && ctx != NULL ) {
+            ctx_t * mctx = NULL;
+            for ( mctx = ( ctx_t * ) ctx; mctx != NULL && mctx -> rc == 0;
+                  mctx = ( ctx_t* ) mctx -> caller )
+            {
+                mctx -> rc = rc;
+            }
+        }
+    }
 
     memset ( self, 0, sizeof * self );
 }


=====================================
tools/sra-sort/tbl-pair.h
=====================================
--- a/tools/sra-sort/tbl-pair.h
+++ b/tools/sra-sort/tbl-pair.h
@@ -45,6 +45,7 @@
  */
 struct VTable;
 struct DbPair;
+struct KThread;
 struct ColumnReader;
 struct ColumnWriter;
 struct ColumnPair;
@@ -109,6 +110,9 @@ struct TablePair
     /* true if already exploded */
     bool exploded;
 
+    /* Thread launched by TablePairPostCopy [ to do consistency-check ] */
+    struct KThread * thread;
+
     uint8_t align [ 2 ];
 };
 


=====================================
tools/sra-sort/xcheck-ref-align.c
=====================================
--- a/tools/sra-sort/xcheck-ref-align.c
+++ b/tools/sra-sort/xcheck-ref-align.c
@@ -341,11 +341,12 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
     ctx_t thread_ctx = { & pb -> caps, NULL, & ctx_info };
     const ctx_t *ctx = & thread_ctx;
 
-    STATUS ( 2, "running consistency-check on background thread" );
+    STATUS ( 2, "running consistency-check on background thread 0x%p", self );
 
     CrossCheckRefAlignTblInt ( ctx, pb -> ref_tbl, pb -> align_tbl, pb -> align_name );
 
-    STATUS ( 2, "finished consistency-check on background thread" );
+    STATUS ( 2, "finished consistency-check on background thread 0x%p: %s",
+             self, ctx -> rc ? "failure" : "success ");
 
     VTableRelease ( pb -> align_tbl );
     VTableRelease ( pb -> ref_tbl );
@@ -357,7 +358,8 @@ rc_t CC CrossCheckRefAlignTblRun ( const KThread *self, void *data )
 #endif
 
 void CrossCheckRefAlignTbl ( const ctx_t *ctx,
-    const VTable *ref_tbl, const VTable *align_tbl, const char *align_name )
+    const VTable *ref_tbl, const VTable *align_tbl, const char *align_name,
+    KThread ** pt )
 {
     FUNC_ENTRY ( ctx );
 
@@ -368,6 +370,9 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
 
     STATUS ( 2, "consistency-check on join indices between REFERENCE and %s tables", align_name );
 
+    assert ( pt );
+    * pt = NULL;
+
 #if USE_BGTHREAD
     name_len = strlen ( align_name );
     TRY ( pb = MemAlloc ( ctx, sizeof * pb + name_len, false ) )
@@ -391,6 +396,7 @@ void CrossCheckRefAlignTbl ( const ctx_t *ctx,
                     rc = KThreadMake ( & t, CrossCheckRefAlignTblRun, pb );
                     if ( rc == 0 )
                     {
+                        * pt = t;
                         return;
                     }
 


=====================================
tools/sra-sort/xcheck.h
=====================================
--- a/tools/sra-sort/xcheck.h
+++ b/tools/sra-sort/xcheck.h
@@ -35,6 +35,7 @@
 /*--------------------------------------------------------------------------
  * forwards
  */
+struct KThread;
 struct VTable;
 
 
@@ -44,6 +45,6 @@ struct VTable;
  *  runs a cross-check of REFERENCE.<name>_IDS against <name>.REF_ID
  */
 void CrossCheckRefAlignTbl ( const ctx_t *ctx,
-    struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name );
+    struct VTable const *ref_tbl, struct VTable const *align_tbl, const char *align_name, struct KThread ** pt );
 
 #endif


=====================================
tools/sra-stat/Makefile
=====================================
--- a/tools/sra-stat/Makefile
+++ b/tools/sra-stat/Makefile
@@ -62,6 +62,7 @@ clean: stdclean
 #  sra statistics
 #
 SRASTAT_SRC = \
+	assembly-statistics \
 	sra \
 	sra-stat \
 


=====================================
tools/sra-stat/assembly-statistics.c
=====================================
--- /dev/null
+++ b/tools/sra-stat/assembly-statistics.c
@@ -0,0 +1,288 @@
+/*===========================================================================
+*
+*                            PUBLIC DOMAIN NOTICE
+*               National Center for Biotechnology Information
+*
+*  This software/database is a "United States Government Work" under the
+*  terms of the United States Copyright Act.  It was written as part of
+*  the author's official duties as a United States Government employee and
+*  thus cannot be copyrighted.  This software/database is freely available
+*  to the public for use. The National Library of Medicine and the U.S.
+*  Government have not placed any restriction on its use or reproduction.
+*
+*  Although all reasonable efforts have been taken to ensure the accuracy
+*  and reliability of the software and data, the NLM and the U.S.
+*  Government do not and cannot warrant the performance or results that
+*  may be obtained by using this software or data. The NLM and the U.S.
+*  Government disclaim all warranties, express or implied, including
+*  warranties of performance, merchantability or fitness for any particular
+*  purpose.
+*
+*  Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "sra-stat.h" /* Ctx */
+
+#include <insdc/sra.h> /* INSDC_coord_len */
+
+#include <kdb/table.h> /* KTable */
+
+#include <klib/container.h> /* BSTNode */
+#include <klib/debug.h> /* DBGMSG */
+#include <klib/log.h> /* LOGERR */
+#include <klib/out.h> /* OUTMSG */
+#include <klib/rc.h>
+
+#include <vdb/blob.h> /* VBlob */
+#include <vdb/cursor.h> /* VCursor */
+#include <vdb/database.h> /* VDatabase */
+#include <vdb/table.h> /* VTable */
+#include <vdb/vdb-priv.h> /* VTableOpenKTableRead */
+
+typedef struct {
+    BSTNode n;
+    uint64_t length;
+} Contig;
+
+static void CC ContigWhack ( BSTNode * n, void * data ) {
+    free ( n );
+}
+
+static
+int64_t CC ContigSort ( const BSTNode * item, const BSTNode * n )
+{
+    const Contig * contigl = ( const Contig * ) item;
+    const Contig * contigr = ( const Contig * ) n;
+
+    assert ( contigl && contigr);
+
+    return contigl -> length < contigr -> length;
+}
+
+typedef struct {
+    uint64_t assemblyLength;
+    uint64_t contigLength;
+
+    uint64_t count;
+    uint64_t length;
+
+    BSTree bt;
+
+    uint64_t l50;
+    uint64_t n50;
+    uint64_t l90;
+    uint64_t n90;
+} Contigs;
+
+static void CC ContigNext ( BSTNode * n, void * data ) {
+    const Contig * contig = ( const Contig * ) n;
+    Contigs * nl = ( Contigs * ) data;
+
+    assert ( contig && nl );
+
+    ++ nl -> count;
+    nl -> length += contig -> length;
+
+    DBGMSG ( DBG_APP, DBG_COND_1, ( "Contig %lu/%lu: %lu. Total: %lu/%lu\n",
+        nl -> count, nl -> contigLength, contig -> length,
+        nl -> length, nl -> assemblyLength ) );
+
+    if ( nl -> l50 == 0 && nl -> length * 2 >= nl -> assemblyLength ) {
+        nl -> n50 = contig -> length;
+        nl -> l50 = nl -> count;
+        DBGMSG ( DBG_APP, DBG_COND_1, ( "L50: %lu, N50: %lu (%lu>=%lu/2)\n",
+            nl -> l50, nl -> n50, nl -> length, nl -> assemblyLength ) );
+    }
+
+    if ( nl -> l90 == 0 &&
+        .9 * nl -> assemblyLength <= nl -> length )
+    {
+        nl -> n90 = contig -> length;
+        nl -> l90 = nl -> count;
+        DBGMSG ( DBG_APP, DBG_COND_1, ( "L90: %lu, N90: %lu (%lu*.9>=%lu)\n",
+            nl -> l90, nl -> n90, nl -> length, nl -> assemblyLength ) );
+    }
+}
+
+static void ContigsInit ( Contigs * self ) {
+    assert ( self );
+
+    memset ( self, 0, sizeof * self );
+}
+
+static rc_t ContigsAdd ( Contigs * self, uint32_t length ) {
+    Contig * contig = ( Contig * ) calloc ( 1, sizeof * contig );
+
+    assert ( self );
+
+    if ( contig == NULL )
+        return RC ( rcExe, rcStorage, rcAllocating, rcMemory, rcExhausted );
+
+    self -> assemblyLength += length;
+    ++ self -> contigLength;
+
+    contig -> length = length;
+    return BSTreeInsert ( & self -> bt, ( BSTNode * ) contig, ContigSort );
+}
+
+static void ContigsCalculateStatistics ( Contigs * self ) {
+    assert ( self );
+
+    BSTreeForEach ( & self -> bt, false, ContigNext, self );
+}
+
+static void ContigsFini ( Contigs * self ) {
+    assert ( self );
+
+    BSTreeWhack ( & self -> bt, ContigWhack, NULL );
+}
+
+/* Calculate N50, L50 statistics:
+see https://en.wikipedia.org/wiki/N50,_L50,_and_related_statistics */
+rc_t CC CalculateNL ( const VDatabase * db, Ctx * ctx ) {
+    rc_t rc = 0;
+
+    const VTable * tbl = NULL;
+    const KTable * ktbl = NULL;
+    const KIndex * idx = NULL;
+    const VCursor * cursor = NULL;
+
+    uint32_t CMP_READ;
+    uint32_t READ_LEN;
+
+    Contigs contigs;
+
+    int64_t start = 1;
+    uint64_t count = 0;
+
+    /* Statictics is calculated just for VDatabase-s */
+    if ( db == NULL ) {
+        DBGMSG ( DBG_APP, DBG_COND_1,
+            ( "CalculateAssemblyStatistics skipped: not a database\n" ) );
+        return 0;
+    }
+
+    rc = VDatabaseOpenTableRead ( db, & tbl, "REFERENCE" );
+    /* Statictics is calculated just for VDatabase-s with REFERENCE */
+    if ( rc != 0 && GetRCState ( rc ) == rcNotFound ) {
+        DBGMSG ( DBG_APP, DBG_COND_1,
+            ( "CalculateAssemblyStatistics skipped: no REFERENCE table\n" ) );
+        return 0;
+    }
+
+    ContigsInit ( & contigs );
+
+    rc = VTableOpenKTableRead ( tbl, & ktbl );
+    DISP_RC ( rc, "while calling "
+        "VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE))");
+    if ( rc == 0 ) {
+        rc = KTableOpenIndexRead ( ktbl, & idx, "i_name" );
+        DISP_RC ( rc, "while calling KTableOpenIndexRead"
+            "(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
+    }
+    if ( rc == 0 ) {
+        rc = VTableCreateCursorRead ( tbl, & cursor );
+        DISP_RC ( rc, "while calling VTableCreateCursorRead(REFERENCE)");
+    }
+    if ( rc == 0 ) {
+        rc = VCursorAddColumn ( cursor,  & CMP_READ, "CMP_READ" );
+        DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,CMP_READ)");
+    }
+    if ( rc == 0 ) {
+        rc = VCursorAddColumn ( cursor,  & READ_LEN, "READ_LEN" );
+        DISP_RC ( rc, "while calling VCursorAddColumn(REFERENCE,READ_LEN)");
+    }
+    if ( rc == 0 ) {
+        rc = VCursorOpen ( cursor );
+        DISP_RC ( rc, "while calling VCursorOpen(REFERENCE)");
+    }
+
+    for ( start = 1; rc == 0; start += count ) {
+        uint32_t row_len = 0;
+        const VBlob * blob = NULL;
+        size_t key_size = 0;
+        char key [ 4096 ];
+        rc = KIndexProjectText ( idx, start, & start, & count,
+                                 key, sizeof key, & key_size );
+        if ( rc == SILENT_RC
+            ( rcDB, rcIndex, rcProjecting, rcId, rcNotFound ) )
+        {
+            rc = 0;
+            break; /* no more references */
+        }
+        DISP_RC ( rc, "while calling KIndexProjectText(KTableOpenIndexRead"
+            "(VTableOpenKTableRead(VDatabaseOpenTableRead(REFERENCE),i_name)");
+        if ( rc == 0 ) {
+            rc = VCursorGetBlobDirect ( cursor, & blob, start, CMP_READ );
+            DISP_RC ( rc, "while calling VCursorGetBlobDirect(CMP_READ)" );
+        }
+        if ( rc == 0 ) {
+            uint32_t elem_bits = 0;
+            const void * base = NULL;
+            uint32_t boff = 0;
+            rc = VBlobCellData ( blob, start,
+                                 & elem_bits, & base, & boff, & row_len );
+            DISP_RC ( rc, "while calling VBlobCellData(CMP_READ)" );
+        }
+        if ( rc == 0 ) {
+            if ( row_len == 0 ) {
+                /* When CMP_READ is not empty - local reference.
+                   We calculate statistics just for local references */
+                DBGMSG ( DBG_APP, DBG_COND_1, ( "CalculateAssemblyStatistics: "
+                    "%s skipped: not a local reference\n", key ) );
+            }
+            else {
+                uint64_t length = 0;
+                INSDC_coord_len buffer = 0;
+                uint32_t row_len = 0;
+                rc = VCursorReadDirect ( cursor, start,
+                    READ_LEN, 8, & buffer, sizeof buffer, & row_len );
+                DISP_RC ( rc, "while calling VCursorReadDirect(READ_LEN,id)" );
+                if ( rc == 0 )
+                    length = buffer;
+                if ( rc == 0 && count > 1 ) {
+                    INSDC_coord_len buffer = 0;
+                    uint32_t row_len = 0;
+                    rc = VCursorReadDirect ( cursor, start + count - 1,
+                        READ_LEN, 8, & buffer, sizeof buffer, & row_len );
+                    DISP_RC ( rc,
+                        "while calling VCursorReadDirect(READ_LEN,id+count)" );
+                    if ( rc == 0 )
+                        length = length * ( count - 1) + buffer;
+                }
+                if ( rc == 0 )
+                    rc = ContigsAdd ( & contigs, length );
+            }
+        }
+        RELEASE ( VBlob, blob );
+    }
+
+    if ( rc == 0 )
+        ContigsCalculateStatistics ( & contigs );
+
+    RELEASE ( VCursor, cursor );
+    RELEASE ( KIndex, idx );
+    RELEASE ( KTable, ktbl );
+    RELEASE ( VTable, tbl );
+
+    if ( rc == 0 ) {
+        assert ( ctx );
+        assert ( contigs . assemblyLength == contigs . length );
+        assert ( contigs . contigLength == contigs . count );
+        if ( contigs . n90 > 0 ) {
+            ctx -> l50 = contigs . l50;
+            ctx -> n50 = contigs . n50;
+            ctx -> l90 = contigs . l90;
+            ctx -> n90 = contigs . n90;
+            ctx -> l = contigs . contigLength;
+            ctx -> n = contigs . assemblyLength;
+        }
+    }
+
+    ContigsFini ( & contigs );
+
+    return rc;
+}


=====================================
tools/sra-stat/sra-stat.c
=====================================
--- a/tools/sra-stat/sra-stat.c
+++ b/tools/sra-stat/sra-stat.c
@@ -20,7 +20,7 @@
 *
 *  Please cite the author in any work or product based on this material.
 *
-* ===========================================================================
+* ==============================================================================
 *
 */
 
@@ -54,6 +54,7 @@
 
 #include <sra/sraschema.h> /* VDBManagerMakeSRASchema */
 
+#include <vdb/blob.h> /* VBlobCellData */
 #include <vdb/cursor.h> /* VCursor */
 #include <vdb/database.h> /* VDatabaseRelease */
 #include <vdb/dependencies.h> /* VDBDependencies */
@@ -69,15 +70,12 @@
 #include <os-native.h> /* strtok_r on Windows */
 
 #include <assert.h>
+#include <ctype.h> /* isprint */
 #include <math.h> /* sqrt */
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 
-/* #include <stdio.h> */ /* stderr */
-
-#define DISP_RC(rc, msg) (void)((rc == 0) ? 0 : LOGERR(klogInt, rc, msg))
-
 #define DISP_RC2(rc, name, msg) (void)((rc == 0) ? 0 : \
     PLOGERR(klogInt, (klogInt, rc, \
         "$(name): $(msg)", "name=%s,msg=%s", name, msg)))
@@ -253,6 +251,79 @@ typedef struct Statistics2 {
     double average;
     double diff_sq_sum;
 } Statistics2;
+typedef enum {
+    ebtUNDEFINED,
+    ebtREAD,
+    ebtCSREAD,
+    ebtRAW_READ,
+} EBasesType;
+typedef struct {
+    uint64_t cnt[5];
+    EBasesType basesType;
+
+    const VCursor * cursSEQUENCE;
+    uint32_t        idxSEQUENCE;
+    uint32_t        idxSEQ_READ_LEN;
+    uint32_t        idxSEQ_READ_TYPE;
+    uint64_t        startSEQUENCE;
+    uint64_t        stopSEQUENCE;
+
+    const VCursor * cursALIGNMENT;
+    uint32_t        idxALIGNMENT;
+    uint32_t        idxALN_READ_LEN;
+    uint32_t        idxALN_READ_TYPE;
+    uint64_t        startALIGNMENT;
+    uint64_t        stopALIGNMENT;
+
+    bool finalized;
+
+/* accessing PRIMARY_ALIGNMENT/RD_FILTER terribly slows down the reading
+    uint32_t        idxALN_RD_FILTER;
+   so we count filtered reads, as well
+    uint32_t        idxSEQ_RD_FILTER; */
+} Bases;
+typedef struct SraStatsTotal {
+    uint64_t spot_count;
+    uint64_t spot_count_mates;
+    uint64_t BIO_BASE_COUNT; /* bio_len */
+    uint64_t bio_len_mates;
+    uint64_t BASE_COUNT; /* total_len */
+    uint64_t bad_spot_count;
+    uint64_t bad_bio_len;
+    uint64_t filtered_spot_count;
+    uint64_t filtered_bio_len;
+    uint64_t total_cmp_len; /* CMP_READ : compressed */
+
+    bool variable_nreads;
+    uint32_t nreads; /* if (nreads == 0) then (nreads is variable) */
+    Statistics * stats ; /* nreads elements */
+    Statistics2* stats2; /* nreads elements */
+
+    Bases bases_count;
+} SraStatsTotal;
+typedef struct srastat_parms {
+    const char* table_path;
+
+    bool xml; /* output format (txt or xml) */
+    bool printMeta;
+    bool quick; /* quick mode: stats from meta */
+    bool skip_members; /* not to print spot_group statistics */
+    bool progress;     /* show progress */
+    bool skip_alignment; /* not to print alignment info */
+    bool print_arcinfo;
+    bool statistics; /* calculate average and stdev */
+    bool test; /* test stdev */
+
+    const XMLLogger *logger;
+
+    int64_t  start, stop;
+
+    bool hasSPOT_GROUP;
+    bool variableReadLength;
+
+    SraStatsTotal total; /* is used in srastat_print */
+} srastat_parms;
+
 static
 void Statistics2Init(Statistics2* self, double sum, int64_t  count)
 {
@@ -281,7 +352,7 @@ static void Statistics2Print(const Statistics2* selfs,
         look the same */
     uint64_t spot_count)
 {
-    int i = 0;
+    uint32_t i = 0;
 
     if (nreads) {
         assert(selfs && indent);
@@ -311,29 +382,80 @@ static bool columnUndefined(rc_t rc) {
         || rc == SILENT_RC(rcVDB, rcCursor, rcUpdating, rcColumn, rcNotFound );
 }
 
-typedef struct {
-    uint64_t cnt[5];
-    bool CS_NATIVE;
-    const VCursor   *curs;
-    uint32_t         idx;
-
-    bool finalized;
-} Bases;
-
-static rc_t BasesInit(Bases *self, const VTable *vtbl) {
+static rc_t BasesInit(Bases *self, const Ctx *ctx, const VTable *vtbl,
+                      const srastat_parms *pb)
+{
     rc_t rc = 0;
 
     assert(self);
     memset(self, 0, sizeof *self);
 
-    if (rc == 0) {
+    assert(ctx && pb);
+
+    if (ctx->db != NULL && pb -> start == 0 && pb -> stop == 0) {
+        const VTable * tbl = NULL;
+        rc_t r2 = VDatabaseOpenTableRead(ctx->db, &tbl, "PRIMARY_ALIGNMENT");
+        if (r2 == 0) {
+            const VCursor *curs = NULL;
+            rc = VTableCreateCachedCursorRead ( tbl, & curs,
+                                                DEFAULT_CURSOR_CAPACITY);
+            DISP_RC(rc,
+                "Cannot VTableCreateCachedCursorRead(PRIMARY_ALIGNMENT)");
+            if (rc == 0) {
+                rc = VCursorPermitPostOpenAdd(curs);
+                DISP_RC(rc, "Cannot VCursorPermitPostOpenAdd");
+            }
+            if (rc == 0) {
+                rc = VCursorOpen(curs);
+                DISP_RC(rc, "Cannot VCursorOpen");
+            }
+            if (rc == 0) {
+                const char name [] = "(INSDC:4na:bin)RAW_READ";
+                rc = VCursorAddColumn(curs, &self->idxALIGNMENT, name);
+                DISP_RC2(rc, "Cannot VCursorAddColumn", name);
+            }
+            if (rc == 0) {
+                const char name [] = "READ_LEN";
+                rc = VCursorAddColumn(curs, &self->idxALN_READ_LEN, name);
+                if (rc != 0)
+                    PLOGERR(klogInt, (klogInt, rc, "Cannot VCursorAddColumn"
+                        "(PRIMARY_ALIGNMENT:$(name))", "name=%s", name));
+            }
+            if (rc == 0) {
+                const char name [] = "READ_TYPE";
+                rc = VCursorAddColumn(curs, &self->idxALN_READ_TYPE, name);
+                if (rc != 0)
+                    PLOGERR(klogInt, (klogInt, rc, "Cannot VCursorAddColumn"
+                        "(PRIMARY_ALIGNMENT:$(name))", "name=%s", name));
+            }
+            if (rc == 0) {
+                int64_t first = 0;
+                uint64_t count = 0;
+                rc = VCursorIdRange(curs, 0, &first, &count);
+                if ( rc == 0 ) {
+                    self->startALIGNMENT = first;
+                    self->stopALIGNMENT = first + count;
+                }
+            }
+            if (rc != 0)
+                VCursorRelease ( curs );
+            else
+                self -> cursALIGNMENT = curs;
+            self->basesType = ebtRAW_READ;
+        }
+        RELEASE(VTable, tbl);
+    }
+
+    if (rc == 0 && self->cursSEQUENCE == NULL
+                && self->basesType != ebtRAW_READ)
+    {
         const char name[] = "CS_NATIVE";
         const void *base = NULL;
 
         const VCursor *curs = NULL;
         uint32_t idx = 0;
 
-        self->CS_NATIVE = true;
+        self->basesType = ebtCSREAD;
 
         rc = VTableCreateCachedCursorRead(vtbl, &curs, DEFAULT_CURSOR_CAPACITY);
         DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
@@ -352,7 +474,7 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
             rc = VCursorAddColumn(curs, &idx, "%s", name);
             if (rc != 0) {
                 if (columnUndefined(rc)) {
-                    self->CS_NATIVE = false;
+                    self->basesType = ebtREAD;
                     rc = 0;
                 }
                 else {
@@ -377,7 +499,8 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
                 }
 
                 if (rc == 0) {
-                    self->CS_NATIVE = *((bool*)base);
+                    bool CS_NATIVE = *((bool*)base);
+                    self -> basesType = CS_NATIVE ? ebtCSREAD : ebtREAD;
                 }
 
             }
@@ -386,28 +509,64 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
         RELEASE(VCursor, curs);
     }
 
-    {
-        const char *name = self->CS_NATIVE ? "CSREAD" : "READ";
-        const char *datatype
-            = self->CS_NATIVE ? "INSDC:x2cs:bin" : "INSDC:x2na:bin";
-        rc = VTableCreateCachedCursorRead(vtbl, &self->curs, DEFAULT_CURSOR_CAPACITY);
+    if (self->cursSEQUENCE == NULL && rc == 0) {
+        const char *name = self->basesType == ebtCSREAD
+            ? "(INSDC:x2cs:bin)CSREAD" :
+              self->basesType == ebtREAD
+                  ? "(INSDC:x2na:bin)READ" : "(INSDC:x2na:bin)CMP_READ";
+        rc = VTableCreateCachedCursorRead(vtbl, &self->cursSEQUENCE,
+                                          DEFAULT_CURSOR_CAPACITY);
         DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
         if (rc == 0) {
-            rc = VCursorAddColumn(self->curs,
-                &self->idx, "(%s)%s", datatype, name);
-            if (rc != 0) {
+            rc = VCursorAddColumn(self->cursSEQUENCE, &self->idxSEQUENCE, name);
+            if (rc != 0)
                 PLOGERR(klogInt, (klogInt, rc,
-                    "Cannot VCursorAddColumn(($(type)),$(name)",
-                    "type=%s,name=%s", datatype, name));
-            }
+                    "Cannot VCursorAddColumn($(name))", "name=%s", name));
         }
         if (rc == 0) {
-            rc = VCursorOpen(self->curs);
-            if (rc != 0) {
+            const char name [] = "READ_LEN";
+            rc = VCursorAddColumn(self->cursSEQUENCE,
+                                  &self->idxSEQ_READ_LEN, name);
+            if (rc != 0)
+                PLOGERR(klogInt, (klogInt, rc,
+                    "Cannot VCursorAddColumn($(name))", "name=%s", name));
+        }
+        if (rc == 0) {
+            const char name [] = "READ_TYPE";
+            rc = VCursorAddColumn(self->cursSEQUENCE,
+                                  &self->idxSEQ_READ_TYPE, name);
+            if (rc != 0)
+                PLOGERR(klogInt, (klogInt, rc,
+                    "Cannot VCursorAddColumn($(name))", "name=%s", name));
+        }
+        if (rc == 0) {
+            rc = VCursorOpen(self->cursSEQUENCE);
+            if (rc != 0)
                 PLOGERR(klogInt, (klogInt, rc,
-                    "Cannot VCursorOpen(($(type)),$(name)))",
-                    "type=%s,name=%s", datatype, name));
+                    "Cannot VCursorOpen($(name))", "name=%s", name));
+        }
+    }
+
+    if ( rc == 0 ) {
+        int64_t first = 0;
+        uint64_t count = 0;
+        rc = VCursorIdRange(self->cursSEQUENCE, 0, &first, &count);
+        if ( rc == 0 ) {
+            if (pb->start > 0) {
+                self->startSEQUENCE = pb->start;
+                if (self->startSEQUENCE < first)
+                    self->startSEQUENCE = first;
+            }
+            else
+                self->startSEQUENCE = first;
+
+            if (pb->stop > 0) {
+                self->stopSEQUENCE = pb->stop;
+                if ( ( uint64_t ) self->stopSEQUENCE > first + count)
+                    self->stopSEQUENCE = first + count;
             }
+            else
+                self->stopSEQUENCE = first + count;
         }
     }
 
@@ -417,7 +576,7 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
 static void BasesFinalize(Bases *self) {
     assert(self);
 
-    if (self->curs == NULL) {
+    if (self->cursSEQUENCE == NULL) {
         LOGMSG(klogInfo, "Bases statistics will not be printed : "
             "READ cursor was not opened during BasesFinalize()");
         return;
@@ -431,66 +590,174 @@ static rc_t BasesRelease(Bases *self) {
 
     assert(self);
 
-    RELEASE(VCursor  , self->curs);
+    RELEASE(VCursor, self->cursSEQUENCE);
+    RELEASE(VCursor, self->cursALIGNMENT);
 
     return rc;
 }
 
-static void BasesAdd(Bases *self, int64_t spotid) {
+static rc_t BasesAdd(Bases *self, int64_t spotid, bool alignment) {
     rc_t rc = 0;
     const void *base = NULL;
     bitsz_t row_bits = ~0;
     bitsz_t i = ~0;
     const unsigned char *bases = NULL;
+    bitsz_t boff = 0;
+
+    const VCursor * c = NULL;
+    int64_t row_id = 0;
+
+    uint32_t dREAD_LEN  [MAX_NREADS];
+    uint8_t  dREAD_TYPE [MAX_NREADS] = { 1 };
+    int nreads = 0;
+
+    int read = 0;
+    uint32_t nxtRdStart = 0;
 
     assert(self);
 
-    if (self->curs == NULL) {
-        return;
+    if (self->cursSEQUENCE == NULL) {
+        return 0;
     }
 
-    {
+    if ( alignment ) {
+        c      = self -> cursALIGNMENT;
+        row_id = self -> idxALIGNMENT;
+    }
+    else {
+        c      = self -> cursSEQUENCE;
+        row_id = self -> idxSEQUENCE;
+    }
+    if (rc == 0) {
+        rc = VCursorColumnRead(c, spotid,
+            alignment ? self->idxALN_READ_LEN : self->idxSEQ_READ_LEN,
+            &base, &boff, &row_bits);
+        DISP_RC_Read(rc, "READ_LEN", spotid, "while calling VCursorColumnRead");
+        if (rc == 0) {
+            if (boff & 7)
+                rc = RC(rcExe, rcColumn, rcReading, rcOffset, rcInvalid);
+            else if (row_bits & 7)
+                rc = RC(rcExe, rcColumn, rcReading, rcSize, rcInvalid);
+            else if ((row_bits >> 3) > sizeof(dREAD_LEN))
+                rc = RC(rcExe, rcColumn, rcReading, rcBuffer, rcInsufficient);
+            DISP_RC_Read(rc, "READ_LEN", spotid,
+                         "after calling VCursorColumnRead");
+        }
+        if (rc == 0) {
+            nreads = (row_bits >> 3)  / sizeof(*dREAD_LEN);
+            memmove(dREAD_LEN, ((const char*)base) + (boff >> 3),
+                    ( size_t ) row_bits >> 3);
+        }
+    }
+
+    if (rc == 0) {
+        rc = VCursorColumnRead(c, spotid,
+             alignment ? self->idxALN_READ_TYPE : self->idxSEQ_READ_TYPE,
+             &base, &boff, &row_bits);
+        DISP_RC_Read(rc, "READ_TYPE", spotid,
+                     "while calling VCursorColumnRead");
+        if (rc == 0) {
+            if (boff & 7)
+                rc = RC(rcExe, rcColumn, rcReading, rcOffset, rcInvalid);
+            else if (row_bits & 7)
+                rc = RC(rcExe, rcColumn, rcReading, rcSize, rcInvalid);
+            else if ((row_bits >> 3) > sizeof(dREAD_TYPE))
+                rc = RC(rcExe, rcColumn, rcReading,
+                    rcBuffer, rcInsufficient);
+            else if (((row_bits >> 3) / sizeof(*dREAD_TYPE)) != nreads)
+                rc = RC(rcExe, rcColumn, rcReading, rcData, rcIncorrect);
+            DISP_RC_Read(rc, "READ_TYPE", spotid,
+                "after calling VCursorColumnRead");
+            if (rc == 0)
+                memmove(dREAD_TYPE,
+                    ((const char*)base) + (boff >> 3),
+                    ( size_t ) row_bits >> 3);
+        }
+    }
+
+    if ( rc == 0 ) {
         uint32_t elem_bits = 0, elem_off = 0, elem_cnt = 0;
-        rc = VCursorCellDataDirect(self->curs, spotid, self->idx,
-            &elem_bits, &base, &elem_off, &elem_cnt);
+        rc = VCursorCellDataDirect(c, spotid, row_id,
+                                   &elem_bits, &base, &elem_off, &elem_cnt);
         if (rc != 0) {
+            const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+                : self->basesType == ebtREAD ? "READ" : "RAW_READ";
             PLOGERR(klogInt, (klogErr, rc,
-                "while VCursorCellDataDirect(READ, $(type))",
-                "type=%s", self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE"));
+                "while VCursorCellDataDirect($(name))",
+                "name=%s", name));
             BasesRelease(self);
-            return;
+            return rc;
         }
 
         row_bits = elem_cnt * elem_bits;
     }
 
+    if ( rc != 0 )
+        return rc;
+
     if ((row_bits % 8) != 0) {
+        const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+            : self->basesType == ebtREAD ? "READ" : "RAW_READ";
         rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
         PLOGERR(klogInt, (klogErr, rc, "Invalid row_bits '$(row_bits) "
-            "while VCursorCellDataDirect(READ, $(type), spotid=$(spotid))",
-            "row_bits=%lu,type=%s,spotid=%lu",
-            row_bits, self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE", spotid));
+            "while VCursorCellDataDirect($(name), spotid=$(spotid))",
+            "row_bits=%lu,name=%s,spotid=%lu", row_bits, name, spotid));
         BasesRelease(self);
-        return;
+        return rc;
     }
 
     row_bits /= 8;
     bases = base;
+
     for (i = 0; i < row_bits; ++i) {
         unsigned char base = bases[i];
+        if ( i == nxtRdStart) {
+            if ( read > nreads )
+                return RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
+            nxtRdStart += dREAD_LEN [ read ++ ];
+            if ( ( (dREAD_TYPE[read-1] & SRA_READ_TYPE_BIOLOGICAL) == 0 ) )
+                    /* skip non-biological reads */
+            {
+                i += dREAD_LEN [ read - 1 ] - 1;
+                continue;                
+            }
+        }
+
+        if ( alignment ) {
+            const unsigned char x [16]
+                = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, };
+            /*      0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
+                       A  C     G           T                    N  */
+            if (base > 15) {
+                rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
+                PLOGERR(klogInt, (klogErr, rc, "Invalid RAW_READ column "
+                    "value '$(base)' while VCursorCellDataDirect"
+                    "(spotid=$(spotid), index=$(i))",
+                    "base=%d,spotid=%lu,i=%lu", base, spotid, i));
+                BasesRelease(self);
+                return rc;
+            }
+            assert ( base < sizeof x / sizeof x [ 0 ] );
+            base = x [  base ];
+        }
+
         if (base > 4) {
+            const char * name = self->basesType == ebtCSREAD ? "CSREAD"
+                : self->basesType == ebtREAD ? "READ" : "RAW_READ";
             rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
             PLOGERR(klogInt, (klogErr, rc,
-                "Invalid READ column value '$(base) while VCursorCellDataDirect"
-                "($(type), spotid=$(spotid), index=$(i))",
-                "base=%d,type=%s,spotid=%lu,index=%lu",
-                base, self->CS_NATIVE ? "CS_NATIVE" : "not CS_NATIVE",
-                spotid, i));
+               "Invalid READ column value '$(base)' while VCursorCellDataDirect"
+               "($(name), spotid=$(spotid), index=$(i))",
+               "base=%d,name=%s,spotid=%lu,i=%lu",
+               base, name, spotid, i));
             BasesRelease(self);
-            return;
+            return rc;
         }
+
         ++self->cnt[base];
     }
+
+    return 0;
 }
 
 static rc_t BasesPrint(const Bases *self,
@@ -510,10 +777,10 @@ static rc_t BasesPrint(const Bases *self,
         return rc;
     }
 
-    name = self->CS_NATIVE ? "0123." : "ACGTN";
+    name = self->basesType == ebtCSREAD ? "0123." : "ACGTN";
 
-    OUTMSG(("%s<%s cs_native=\"%s\" count=\"%lu\">\n",
-        indent, tag, self->CS_NATIVE ? "true" : "false", base_count));
+    OUTMSG(("%s<%s cs_native=\"%s\" count=\"%lu\">\n", indent,
+        tag, self->basesType == ebtCSREAD ? "true" : "false", base_count));
 
     for (i = 0; i < 5; ++i) {
         OUTMSG(("%s  <Base value=\"%c\" count=\"%lu\"/>\n",
@@ -526,31 +793,13 @@ static rc_t BasesPrint(const Bases *self,
         self->cnt[3] + self->cnt[4] != base_count)
     {
         rc = RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
-        LOGERR(klogErr, rc, "stored base count did not match observed base count");
+        LOGERR(klogErr, rc,
+               "stored base count did not match observed base count");
     }
 
     return rc;
 }
 
-typedef struct {
-    uint64_t spot_count;
-    uint64_t spot_count_mates;
-    uint64_t BIO_BASE_COUNT; /* bio_len */
-    uint64_t bio_len_mates;
-    uint64_t BASE_COUNT; /* total_len */
-    uint64_t bad_spot_count;
-    uint64_t bad_bio_len;
-    uint64_t filtered_spot_count;
-    uint64_t filtered_bio_len;
-    uint64_t total_cmp_len; /* CMP_READ : compressed */
-
-    bool variable_nreads;
-    uint32_t nreads; /* if (nreads == 0) then (nreads is variable) */
-    Statistics * stats ; /* nreads elements */
-    Statistics2* stats2; /* nreads elements */
-
-    Bases bases_count;
-} SraStatsTotal;
 static
 rc_t SraStatsTotalMakeStatistics(SraStatsTotal* self, uint32_t nreads)
 {
@@ -592,7 +841,7 @@ void SraStatsTotalStatistics2Init(SraStatsTotal* self,
         uint64_t sum = sums[i];
         Statistics2* stats = self->stats2 + i;
         assert(stats);
-        Statistics2Init(stats, sum, count[i]);
+        Statistics2Init(stats, ( double ) sum, count[i]);
     }
 }
 
@@ -652,7 +901,7 @@ static
 void SraStatsTotalAdd(SraStatsTotal* self,
     uint32_t* values, uint32_t nreads)
 {
-    int i = ~0;
+    uint32_t i = 0;
 
     assert(self && values);
 
@@ -677,7 +926,7 @@ void SraStatsTotalAdd(SraStatsTotal* self,
 
 static
 void SraStatsTotalAdd2(SraStatsTotal* self, uint32_t* values) {
-    int i = 0;
+    uint32_t i = 0;
 
     assert(self && values);
 
@@ -709,7 +958,7 @@ void print_double_or_int(const char* name, double val, bool variable) {
 static void StatisticsPrint(const Statistics* selfs,
     uint32_t nreads, const char* indent)
 {
-    int i = ~0;
+    uint32_t i = 0;
 
     if (nreads) {
         assert(selfs && indent);
@@ -733,7 +982,7 @@ static rc_t StatisticsDiff(const Statistics* ss,
 {
     rc_t rc = 0;
 
-    int i = ~0;
+    uint32_t i = 0;
 
     assert(ss && ss2);
 
@@ -802,28 +1051,6 @@ static rc_t SraStatsTotalPrintStatistics(
     return rc;
 }
 
-typedef struct srastat_parms {
-    const char* table_path;
-
-    bool xml; /* output format (txt or xml) */
-    bool printMeta;
-    bool quick; /* quick mode: stats from meta */
-    bool skip_members; /* not to print spot_group statistics */
-    bool progress;     /* show progress */
-    bool skip_alignment; /* not to print alignment info */
-    bool print_arcinfo;
-    bool statistics; /* calculate average and stdev */
-    bool test; /* test stdev */
-
-    const XMLLogger *logger;
-
-    int64_t  start, stop;
-
-    bool hasSPOT_GROUP;
-    bool variableReadLength;
-
-    SraStatsTotal total; /* is used in srastat_print */
-} srastat_parms;
 typedef struct SraStatsMeta {
     bool found;
     uint64_t BASE_COUNT;
@@ -856,7 +1083,7 @@ typedef struct SraMeta {
     Formatter formatter;
     Loader loader;
 } SraMeta;
-typedef struct ArcInfo_struct {
+typedef struct ArcInfo {
     KTime_t timestamp;
     struct {
         const char* tag;
@@ -868,15 +1095,6 @@ typedef struct Quality {
     uint32_t value;
     uint64_t count;
 } Quality;
-typedef struct QualityStats {
-    Quality* QUALITY;
-    size_t allocated;
-    size_t used;
-} QualityStats;
-typedef enum EMetaState {
-    eMSNotFound,
-    eMSFound
-} EMetaState;
 typedef struct Count {
     EMetaState state;
     uint64_t value;
@@ -888,39 +1106,21 @@ typedef struct Counts {
     Count BASE_COUNT;
     Count SPOT_COUNT;
 } Counts;
-typedef struct TableCounts {
-    EMetaState state;
-    Counts* count;
-    size_t allocated;
-    size_t used;
-} TableCounts;
-typedef struct Ctx {
-    const BSTree* tr;
-    const MetaDataStats* meta;
-    const SraMeta* info;
-    const SraSizeStats* sizes;
-    const ArcInfo* arc_info;
-    srastat_parms* pb;
-    SraStatsTotal* total;
-    const VDatabase* db;
-    QualityStats quality;
-    TableCounts tables;
-} Ctx;
 typedef rc_t (CC * RG_callback)(const BAM_HEADER_RG* rg, const void* data);
 static
 rc_t CC meta_RG_callback(const BAM_HEADER_RG* rg, const void* data)
 {
     rc_t rc = 0;
 
-    const MetaDataStats* meta = data;
+    const MetaDataStats* meta_stats = data;
 
-    int i = 0;
+    uint32_t i = 0;
     bool found = false;
 
-    assert(rg && meta && rg->ID);
+    assert(rg && meta_stats && rg->ID);
 
-    for (i = 0; i < meta->spotGroupN && rc == 0; ++i) {
-        SraStatsMeta* ss = &meta->spotGroup[i];
+    for (i = 0; i < meta_stats->spotGroupN && rc == 0; ++i) {
+        SraStatsMeta* ss = &meta_stats->spotGroup[i];
         const char* spot_group = ss->spot_group;
 
         assert(spot_group);
@@ -987,7 +1187,7 @@ static rc_t QualityParse(Quality* self,
 {
     rc_t rc = 0;
     const char start[] = "PHRED_";
-    int i = strlen(start);
+    size_t i = strlen(start);
 
     assert(self && name);
 
@@ -1282,7 +1482,7 @@ rc_t TableCountsRead(TableCounts* self, const VDatabase* db)
     memset(self, 0, sizeof *self);
 
     if (db == NULL)
-    {   return 0; }
+        return 0;
 
     rc = VDatabaseListTbl(db, &names);
     DISP_RC(rc, "while calling VDatabaseListTbl");
@@ -2134,20 +2334,20 @@ static rc_t printXmlEscaped ( const char * c ) {
 }
 
 static
-void srastatmeta_print(const MetaDataStats* meta, srastat_parms *pb)
+void srastatmeta_print(const MetaDataStats* meta_stats, srastat_parms *pb)
 {
     uint32_t i = 0;
-    assert(pb && meta);
+    assert(pb && meta_stats);
 
-    if (meta->spotGroupN) {
-        for (i = 0; i < meta->spotGroupN; ++i) {
-            const SraStatsMeta* ss = &meta->spotGroup[i];
+    if (meta_stats->spotGroupN) {
+        for (i = 0; i < meta_stats->spotGroupN; ++i) {
+            const SraStatsMeta* ss = &meta_stats->spotGroup[i];
             if (!pb->skip_members) {
                 const char* spot_group = "";
                 if (strcmp("default", ss->spot_group) != 0)
                 {   spot_group = ss->spot_group; }
                 if ((spot_group == NULL || spot_group[0] == '\0')
-                    && meta->spotGroupN > 1)
+                    && meta_stats->spotGroupN > 1)
                 {
                     /* skip an empty "extra" 'default' spot_group */
                     if (ss->BASE_COUNT == 0     && ss->BIO_BASE_COUNT == 0 &&
@@ -2191,13 +2391,13 @@ void srastatmeta_print(const MetaDataStats* meta, srastat_parms *pb)
         if (!pb->xml && !(pb->quick && pb->skip_members)) {
                 const char* spot_group = "";
                 OUTMSG(("%s|%s|%ld:%ld:%ld|:|:|:\n",
-                        pb->table_path, spot_group, meta->table.spot_count,
-                        meta->table.BASE_COUNT, meta->table.BIO_BASE_COUNT));
+                        pb->table_path, spot_group, meta_stats->table.spot_count,
+                        meta_stats->table.BASE_COUNT, meta_stats->table.BIO_BASE_COUNT));
         }
-        pb->total.spot_count = meta->table.spot_count;
-        pb->total.BASE_COUNT = meta->table.BASE_COUNT;
-        pb->total.BIO_BASE_COUNT = meta->table.BIO_BASE_COUNT;
-        pb->total.total_cmp_len = meta->table.CMP_BASE_COUNT;
+        pb->total.spot_count = meta_stats->table.spot_count;
+        pb->total.BASE_COUNT = meta_stats->table.BASE_COUNT;
+        pb->total.BIO_BASE_COUNT = meta_stats->table.BIO_BASE_COUNT;
+        pb->total.total_cmp_len = meta_stats->table.CMP_BASE_COUNT;
     }
 }
 
@@ -2246,7 +2446,7 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
 {
     rc_t rc = 0;
     uint32_t count = 0;
-    int i = 0;
+    uint32_t i = 0;
     const VDBDependencies* dep = NULL;
 
     assert(indent && ctx);
@@ -2285,11 +2485,20 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
             DISP_RC(rc, "while calling VDBDependenciesName");
             break;
         }
+
         rc = VDBDependenciesPath(dep, &path, i);
         if (rc != 0) {
             DISP_RC(rc, "while calling VDBDependenciesPath");
             break;
         }
+        if ( path == NULL ) {
+            rc = VDBDependenciesPathRemote(dep, &path, i);
+            if (rc != 0) {
+                DISP_RC(rc, "while calling VDBDependenciesPath");
+                break;
+            }
+        }
+
         rc = VDBDependenciesLocal(dep, &local, i);
         if (rc != 0) {
             DISP_RC(rc, "while calling VDBDependenciesLocal");
@@ -2318,6 +2527,181 @@ rc_t process_align_info(const char* indent, const Ctx* ctx)
     return rc;
 }
 
+/******************************************************************************/
+
+static
+void printChangeValue ( const char * value,
+                        size_t size )
+{
+    assert ( value );
+
+    switch ( size ) {
+        case 1:
+            OUTMSG ( ( "%hu", ( ( const uint8_t * ) value ) [ 0 ] ) );
+            break;
+        case 2:
+            OUTMSG ( ( "%u", ( ( const uint16_t * ) value ) [ 0 ] ) );
+            break;
+        case 4:
+            OUTMSG ( ( "%u", ( ( const uint32_t * ) value ) [ 0 ] ) );
+            break;
+        case 8:
+            OUTMSG ( ( "%lu", ( ( const uint64_t * ) value ) [ 0 ] ) );
+            break;
+        default: {
+            size_t i = 0;
+            for ( i = 0; i < size; ++ i ) {
+                char c = value [ i ];
+                if ( isprint ( c ) )
+                    OUTMSG ( ( "%c", c ) );
+                else
+                    OUTMSG ( ( "\\x%02X", ( ( const uint8_t* ) value ) [ i ] ));
+            }
+            break;
+        }
+    }
+}
+
+static
+rc_t KMDataNodePrintAttr ( const KMDataNode * self,
+                           const KNamelist * attrs,
+                           uint32_t idx )
+{
+    rc_t rc = 0;
+
+    const char * attr = NULL;
+    char value [ 512 ] = "";
+    size_t size = 0;
+
+    rc = KNamelistGet ( attrs, idx, & attr );
+
+    if ( rc == 0 )
+        rc = KMDataNodeReadAttr ( self, attr, value, sizeof value, & size );
+
+    if ( rc == 0 )
+        rc = OUTMSG ( ( " %s=\"%s\"", attr, value ) );
+
+    if ( rc != 0 )
+        OUTMSG ( ( " CANNOT_READ_ATTRIBULE=\"%R\"", rc ) );
+
+    return rc;
+}
+
+static
+rc_t KMDataNodePrintChange ( const KMDataNode * self,
+                            const char * name,
+                            const char * indent )
+{
+    rc_t rc = 0;
+
+    const KMDataNode * node = NULL;
+
+    rc = KMDataNodeOpenNodeRead ( self, & node, name );
+    DISP_RC2 ( rc, name,
+        "while calling KMDataNodeOpenNodeRead" );
+
+    if ( rc == 0 ) {
+        const char tag [] = "Change";
+        char value [] = "sra-stat: ERROR WHILE READING CHANGES CHILD VALUE";
+        size_t size = 0;
+
+        KNamelist * names = NULL;
+        uint32_t count = 0;
+
+        KMDataNodeRead ( node, 0, value, sizeof value, & size, NULL );
+        if ( size == 0 )
+            size = sizeof value;
+
+        rc = KMDataNodeListAttr ( node, & names );
+        DISP_RC2 ( rc, name,
+            "while calling KMDataNodeListAttr" );
+
+        if ( rc == 0 )
+            rc = KNamelistCount ( names, & count );
+
+        if ( rc == 0 ) {
+            uint32_t i = 0;
+
+            OUTMSG ( ( "  %s" "<%s", indent, tag ) );
+            for ( i = 0; i < count; ++i )
+                KMDataNodePrintAttr ( node, names, i );
+            OUTMSG ( ( ">" ) );
+
+            printChangeValue ( value, size );
+
+            OUTMSG ( ( "</%s>\n", tag ) );
+        }
+
+        RELEASE ( KNamelist, names );
+    }
+
+    RELEASE ( KMDataNode, node );
+
+    return rc;
+}
+
+static rc_t CtxPrintCHANGES ( const Ctx * self, const char * indent ) {
+    rc_t rc = 0;
+
+    const KMetadata * meta = NULL;
+    const KMDataNode * parent = NULL;
+    bool toReleaseMeta = false;
+
+    const char meta_tag [] = "CHANGES";
+    const char tag [] = "Changes";
+
+    assert ( self && indent );
+
+    meta = self -> meta;
+    if ( meta == NULL ) {
+        rc = VDatabaseOpenMetadataRead ( self -> db, & meta );
+        if ( rc != 0 )
+            return rc;
+        toReleaseMeta = true;
+    }
+
+    KMetadataOpenNodeRead ( meta, & parent, meta_tag );
+
+    if ( parent != NULL ) {
+        KNamelist * names = NULL;
+        rc = KMDataNodeListChild ( parent, & names );
+        DISP_RC2 ( rc, meta_tag, "while calling KMDataNodeListChild" );
+
+        OUTMSG ( ( "%s" "<%s>"  "\n", indent, tag ) );
+
+        if ( rc == 0 ) {
+            uint32_t count = 0;
+            rc = KNamelistCount ( names, & count );
+            DISP_RC2 ( rc, meta_tag, "while calling KNamelistCount" );
+
+            if ( rc == 0 ) {
+                uint32_t i = 0;
+                for ( i = 0; i < count && rc == 0; ++i ) {
+                    const char * child = NULL;
+                    rc = KNamelistGet ( names, i, & child );
+                    if ( rc != 0 )
+                       PLOGERR ( klogInt, ( klogInt, rc,
+                            "while calling $(n)::KNamelistGet($(idx))",
+                            "n=%s,idx=%i", meta_tag, i ) );
+                    else
+                        rc = KMDataNodePrintChange ( parent, child, indent );
+                }
+            }
+        }
+
+        OUTMSG ( ( "%s" "</%s>" "\n", indent, tag ) );
+
+        RELEASE ( KNamelist, names );
+    }
+
+    if ( toReleaseMeta )
+        RELEASE ( KMetadata, meta );
+
+    RELEASE ( KMDataNode, parent );
+
+    return rc;
+}
+
 static
 rc_t print_results(const Ctx* ctx)
 {
@@ -2326,23 +2710,23 @@ rc_t print_results(const Ctx* ctx)
     bool mismatch = false;
 
     assert(ctx && ctx->pb
-        && ctx->tr && ctx->sizes && ctx->info && ctx->meta && ctx->total);
+        && ctx->tr && ctx->sizes && ctx->info && ctx->meta_stats && ctx->total);
 
     if (ctx->pb->quick) {
-        if ( ! ctx->meta -> found )
+        if ( ! ctx->meta_stats -> found )
             LOGMSG(klogWarn, "Statistics metadata not found");
         else
         {
-            ctx->pb->hasSPOT_GROUP = ctx->meta->spotGroupN > 1 ||
-                (ctx->meta->spotGroupN == 1
-                 && strcmp("default", ctx->meta->spotGroup[0].spot_group) != 0);
+            ctx->pb->hasSPOT_GROUP = ctx->meta_stats->spotGroupN > 1 ||
+                (ctx->meta_stats->spotGroupN == 1
+                 && strcmp("default", ctx->meta_stats->spotGroup[0].spot_group) != 0);
         }
     }
 
-    if (ctx->meta->found && ! ctx->pb->quick) {
+    if (ctx->meta_stats->found && ! ctx->pb->quick) {
 /*      bool mismatch = false; */
         SraStats* ss = (SraStats*)BSTreeFind(ctx->tr, "", srastats_cmp);
-        const SraStatsMeta* m = &ctx->meta->table;
+        const SraStatsMeta* m = &ctx->meta_stats->table;
         if (ctx->total->BASE_COUNT != m->BASE_COUNT)
         { mismatch = true; }
         if (ctx->total->BIO_BASE_COUNT != m->BIO_BASE_COUNT)
@@ -2352,11 +2736,11 @@ rc_t print_results(const Ctx* ctx)
         if (ctx->total->total_cmp_len != m->CMP_BASE_COUNT)
         { mismatch = true; }
         if (ss != NULL) {
-            const SraStatsMeta* m = &ctx->meta->table;
+            const SraStatsMeta* m = &ctx->meta_stats->table;
             uint32_t i = 0;
-            for (i = 0; i < ctx->meta->spotGroupN && !mismatch; ++i) {
+            for (i = 0; i < ctx->meta_stats->spotGroupN && !mismatch; ++i) {
                 const char* spot_group = "";
-                m = &ctx->meta->spotGroup[i];
+                m = &ctx->meta_stats->spotGroup[i];
                 assert(m);
                 if (strcmp("default", m->spot_group) != 0)
                 {   spot_group = m->spot_group; }
@@ -2379,18 +2763,18 @@ rc_t print_results(const Ctx* ctx)
 
     if (ctx->pb->xml) {
         OUTMSG(("<Run accession=\"%s\"", ctx->pb->table_path));
-        if (!ctx->pb->quick || ! ctx->meta->found) {
+        if (!ctx->pb->quick || ! ctx->meta_stats->found) {
             OUTMSG((" read_length=\"%s\"",
                 ctx->pb->variableReadLength ? "variable" : "fixed"));
         }
         if (ctx->pb->quick) {
             OUTMSG((" spot_count=\"%ld\" base_count=\"%ld\"",
-                ctx->meta->table.spot_count, ctx->meta->table.BASE_COUNT));
+                ctx->meta_stats->table.spot_count, ctx->meta_stats->table.BASE_COUNT));
             OUTMSG((" base_count_bio=\"%ld\"",
-                ctx->meta->table.BIO_BASE_COUNT));
-            if (ctx->meta->table.CMP_BASE_COUNT > 0) {
+                ctx->meta_stats->table.BIO_BASE_COUNT));
+            if (ctx->meta_stats->table.CMP_BASE_COUNT > 0) {
                 OUTMSG((" cmp_base_count=\"%ld\"",
-                    ctx->meta->table.CMP_BASE_COUNT));
+                    ctx->meta_stats->table.CMP_BASE_COUNT));
             }
         }
         else {
@@ -2413,8 +2797,8 @@ rc_t print_results(const Ctx* ctx)
     else if (ctx->pb->skip_members) {
         if (ctx->pb->quick) {
             OUTMSG(("%s||%ld:%ld:%ld|:|:|:\n",
-                ctx->pb->table_path, ctx->meta->table.spot_count,
-                ctx->meta->table.BASE_COUNT, ctx->meta->table.BIO_BASE_COUNT));
+                ctx->pb->table_path, ctx->meta_stats->table.spot_count,
+                ctx->meta_stats->table.BASE_COUNT, ctx->meta_stats->table.BIO_BASE_COUNT));
         }
         else {
             OUTMSG(("%s||%ld:%ld:%ld|%ld:%ld|%ld:%ld|%ld:%ld\n",
@@ -2427,14 +2811,14 @@ rc_t print_results(const Ctx* ctx)
         }
     }
 
-    if (ctx->pb->quick && ctx->meta->found) {
+    if (ctx->pb->quick && ctx->meta_stats->found) {
         memset(&ctx->pb->total, 0, sizeof ctx->pb->total);
-        rc = parse_bam_header(ctx->db, meta_RG_callback, ctx->meta);
-        srastatmeta_print(ctx->meta, ctx->pb);
-        if (ctx->pb->total.spot_count != ctx->meta->table.spot_count ||
-            ctx->pb->total.BIO_BASE_COUNT != ctx->meta->table.BIO_BASE_COUNT ||
-            ctx->pb->total.BASE_COUNT != ctx->meta->table.BASE_COUNT ||
-            ctx->pb->total.total_cmp_len != ctx->meta->table.CMP_BASE_COUNT)
+        rc = parse_bam_header(ctx->db, meta_RG_callback, ctx->meta_stats);
+        srastatmeta_print(ctx->meta_stats, ctx->pb);
+        if (ctx->pb->total.spot_count != ctx->meta_stats->table.spot_count ||
+            ctx->pb->total.BIO_BASE_COUNT != ctx->meta_stats->table.BIO_BASE_COUNT ||
+            ctx->pb->total.BASE_COUNT != ctx->meta_stats->table.BASE_COUNT ||
+            ctx->pb->total.total_cmp_len != ctx->meta_stats->table.CMP_BASE_COUNT)
         {
             rc = RC(rcExe, rcData, rcValidating, rcData, rcUnequal);
         }
@@ -2444,8 +2828,8 @@ rc_t print_results(const Ctx* ctx)
         memset(&ctx->pb->total, 0, sizeof ctx->pb->total);
         rc = parse_bam_header(ctx->db, tree_RG_callback, ctx->tr);
         BSTreeForEach(ctx->tr, false, srastat_print, ctx->pb);
-        if (ctx->meta->found) {
-            const SraStatsMeta* m = &ctx->meta->table;
+        if (ctx->meta_stats->found) {
+            const SraStatsMeta* m = &ctx->meta_stats->table;
             if (ctx->pb->total.BASE_COUNT != m->BASE_COUNT
                 || ctx->pb->total.BIO_BASE_COUNT != m->BIO_BASE_COUNT
                 || ctx->pb->total.spot_count != m->spot_count)
@@ -2520,7 +2904,7 @@ rc_t print_results(const Ctx* ctx)
         }
         if (rc == 0 && !ctx->pb->quick) {
             rc2 = BasesPrint(&ctx->total->bases_count,
-                ctx->total->BASE_COUNT, "  ");
+                             ctx->total->BIO_BASE_COUNT, "  ");
         }
         if (rc == 0 && !ctx->pb->skip_alignment) {
             rc = process_align_info("  ", ctx);
@@ -2540,7 +2924,7 @@ rc_t print_results(const Ctx* ctx)
             }
             else {
                 size_t k = strftime(b, sizeof(b), "%Y-%m-%dT%H:%M:%S", tm);
-                OUTMSG(("  <Archive timestamp=\"%.*s\"", k, b));
+                OUTMSG(("  <Archive timestamp=\"%.*s\"", ( int ) k, b));
                 for(i = 0; i < sizeof(a->i) / sizeof(a->i[0]); i++) {
                     OUTMSG((" size.%s=\"%lu\" md5.%s=\"%s\"",
                         a->i[i].tag, a->i[i].size, a->i[i].tag, a->i[i].md5));
@@ -2552,6 +2936,14 @@ rc_t print_results(const Ctx* ctx)
         {   rc = QualityStatsPrint(&ctx->quality, "  "); }
         if (rc == 0)
         {   rc = TableCountsPrint(&ctx->tables, "  "); }
+        if ( rc == 0 )
+            rc = CtxPrintCHANGES ( ctx, "  " );
+        if ( rc == 0 && ctx -> n90 > 0 )
+            OUTMSG ( ("  <AssemblyStatistics "
+                "n50=\"%lu\" l50=\"%lu\" n90=\"%lu\" l90=\"%lu\" "
+                "n=\"%lu\" l=\"%lu\"/>\n",
+                ctx -> n50, ctx -> l50, ctx -> n90, ctx -> l90,\
+                ctx -> n, ctx -> l ) );
         OUTMSG(("</Run>\n"));
     }
     if (mismatch && ctx->pb->start == 0 && ctx->pb->stop == 0) {
@@ -2581,7 +2973,7 @@ int64_t CC srastats_sort ( const BSTNode *item, const BSTNode *n )
 }
 
 static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
-    SraStatsTotal* total, const VTable *vtbl)
+    SraStatsTotal* total, const Ctx * ctx, const VTable *vtbl)
 {
     rc_t rc = 0;
 
@@ -2684,18 +3076,6 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                 rc = VCursorIdRange(curs, 0, &first, &count);
                 DISP_RC(rc, "VCursorIdRange() failed");
                 if (rc == 0) {
-                    rc = BasesInit(&total->bases_count, vtbl);
-                }
-                if (rc == 0) {
-                    const KLoadProgressbar *pr = NULL;
-                    bool bad_read_filter = false;
-                    bool fixedNReads = true;
-                    bool fixedReadLength = true;
-
-                    uint32_t g_dREAD_LEN[MAX_NREADS];
-
-                    memset(g_dREAD_LEN, 0, sizeof g_dREAD_LEN);
-
                     if (pb->start > 0) {
                         start = pb->start;
                         if (start < first) {
@@ -2708,13 +3088,26 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
 
                     if (pb->stop > 0) {
                         stop = pb->stop;
-                        if (stop > first + count) {
+                        if ( ( uint64_t ) stop > first + count) {
                             stop = first + count;
                         }
                     }
                     else {
                         stop = first + count;
                     }
+                }
+                if (rc == 0) {
+                    rc = BasesInit(&total->bases_count, ctx, vtbl, pb);
+                }
+                if (rc == 0) {
+                    const KLoadProgressbar *pr = NULL;
+                    bool bad_read_filter = false;
+                    bool fixedNReads = true;
+                    bool fixedReadLength = true;
+
+                    uint32_t g_dREAD_LEN[MAX_NREADS];
+
+                    memset(g_dREAD_LEN, 0, sizeof g_dREAD_LEN);
 
                     for (spotid = start; spotid < stop && rc == 0; ++spotid) {
                         SraStats* ss;
@@ -2733,7 +3126,13 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                         }
 
                         if (rc == 0 && pb->progress && pr == NULL) {
-                            rc = KLoadProgressbar_Make(&pr, stop + 1 - start);
+                            uint64_t b = total->bases_count.stopSEQUENCE + 1
+                                       - total->bases_count.startSEQUENCE;
+                            if ( total->bases_count.stopALIGNMENT > 0 )
+                                b +=  total->bases_count.stopALIGNMENT + 1
+                                    - total->bases_count.startALIGNMENT;
+                            rc = KLoadProgressbar_Make(&pr,
+                                                       stop + 1 - start + b);
                             if (rc != 0) {
                                 DISP_RC(rc, "cannot initialize progress bar");
                                 rc = 0;
@@ -2755,11 +3154,11 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                 rc = RC(rcExe, rcColumn, rcReading,
                                     rcOffset, rcInvalid);
                             }
-                            if (row_bits & 7) {
+                            else if (row_bits & 7) {
                                 rc = RC(rcExe, rcColumn, rcReading,
                                     rcSize, rcInvalid);
                             }
-                            if ((row_bits >> 3) > sizeof(dREAD_LEN)) {
+                            else if ((row_bits >> 3) > sizeof(dREAD_LEN)) {
                                 rc = RC(rcExe, rcColumn, rcReading,
                                     rcBuffer, rcInsufficient);
                             }
@@ -2768,9 +3167,10 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                         }
                         if (rc == 0) {
                             int i, bio_len, bio_count, bad_cnt, filt_cnt;
-                            memmove(dREAD_LEN,
-                                ((const char*)base) + (boff>>3), row_bits >> 3);
-                            nreads = (row_bits >> 3) / sizeof(*dREAD_LEN);
+                            memmove(dREAD_LEN, ((const char*)base) + (boff>>3),
+                                    ( size_t ) row_bits >> 3);
+                            nreads
+                                = (int) ((row_bits >> 3) / sizeof(*dREAD_LEN));
                             if (spotid == start) {
                                 g_nreads = nreads;
                                 if (pb->statistics) {
@@ -2792,15 +3192,15 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcOffset, rcInvalid);
                                     }
-                                    if (row_bits & 7) {
+                                    else if (row_bits & 7) {
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcSize, rcInvalid);
                                     }
-                                    if ((row_bits >> 3) > sizeof(dREAD_TYPE)) {
+                                    else if ((row_bits >> 3)
+                                             > sizeof(dREAD_TYPE))
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcBuffer, rcInsufficient);
-                                    }
-                                    if ((row_bits >> 3) !=  nreads) {
+                                    else if ((row_bits >> 3) !=  nreads) {
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcData, rcIncorrect);
                                     }
@@ -2811,7 +3211,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                             if (rc == 0) {
                                 memmove(dREAD_TYPE,
                                     ((const char*)base) + (boff >> 3),
-                                    row_bits >> 3);
+                                    ( size_t ) row_bits >> 3);
                                 if (idxSPOT_GROUP != 0) {
                                     rc = VCursorColumnRead(curs, spotid,
                                         idxSPOT_GROUP, &base, &boff, &row_bits);
@@ -2824,11 +3224,11 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                                     rcReading,
                                                     rcOffset, rcInvalid);
                                             }
-                                            if (row_bits & 7) {
+                                            else if (row_bits & 7) {
                                                 rc = RC(rcExe, rcColumn,
                                                     rcReading,
                                                     rcSize, rcInvalid); }
-                                            if ((row_bits >> 3)
+                                            else if ((row_bits >> 3)
                                                 > sizeof(dSPOT_GROUP))
                                             {
                                                 rc = RC(rcExe, rcColumn,
@@ -2839,10 +3239,10 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                                "after calling VCursorColumnRead"
                                                );
                                             if (rc == 0) {
-                                                int n = row_bits >> 3;
+                                                bitsz_t n = row_bits >> 3;
                                                 memmove(dSPOT_GROUP,
                                                   ((const char*)base)+(boff>>3),
-                                                  row_bits>>3);
+                                                  ( size_t ) row_bits>>3);
                                                 dSPOT_GROUP[n]='\0';
                                                 if (n > 1 ||
                                                     (n == 1 && dSPOT_GROUP[0]))
@@ -2868,15 +3268,15 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                     DISP_RC_Read(rc, RD_FILTER, spotid,
                                         "while calling VCursorColumnRead");
                                     if (rc == 0) {
-                                        int size = row_bits >> 3;
+                                        bitsz_t size = row_bits >> 3;
                                         if (boff & 7) {
                                             rc = RC(rcExe, rcColumn, rcReading,
                                                 rcOffset, rcInvalid); }
-                                        if (row_bits & 7) {
+                                        else if (row_bits & 7) {
                                             rc = RC(rcExe, rcColumn, rcReading,
                                                 rcSize, rcInvalid);
                                         }
-                                        if (size > sizeof dRD_FILTER) {
+                                        else if (size > sizeof dRD_FILTER) {
                                             rc = RC(rcExe, rcColumn, rcReading,
                                                 rcBuffer, rcInsufficient);
                                         }
@@ -2885,7 +3285,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                         if (rc == 0) {
                                             memmove(dRD_FILTER,
                                                 ((const char*)base) + (boff>>3),
-                                                size);
+                                                ( size_t ) size);
                                             if (size < nreads) {
                              /* RD_FILTER is expected to have nreads elements */
                                                 if (size == 1) {
@@ -2936,7 +3336,7 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                     if (boff & 7) {
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcOffset, rcInvalid); }
-                                    if (row_bits & 7) {
+                                    else if (row_bits & 7) {
                                         rc = RC(rcExe, rcColumn, rcReading,
                                             rcSize, rcInvalid);
                                     }
@@ -2977,8 +3377,6 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                                 ss->total_cmp_len += cmp_len;
                                 total->total_cmp_len += cmp_len;
 
-                                BasesAdd(&total->bases_count, spotid);
-
                                 if (pb->statistics) {
                                     SraStatsTotalAdd(total, dREAD_LEN, nreads);
                                 }
@@ -3075,6 +3473,32 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                     } /* for (spotid = start; spotid <= stop && rc == 0;
                               ++spotid) */
 
+                    for (spotid = total->bases_count.startALIGNMENT;
+                         !pb->quick &&
+                           spotid < total->bases_count.stopALIGNMENT && rc == 0;
+                         ++spotid)
+                    {
+                        rc = BasesAdd(&total->bases_count, spotid, true);
+                        if ( rc == 0 && pb->progress )
+                            KLoadProgressbar_Process ( pr, 1, false );
+                        rc = Quitting();
+                        if (rc != 0)
+                            LOGMSG(klogWarn, "Interrupted");
+                    }
+
+                    for (spotid = total->bases_count.startSEQUENCE;
+                         !pb->quick &&
+                           spotid < total->bases_count.stopSEQUENCE && rc == 0;
+                         ++spotid)
+                    {
+                        rc = BasesAdd(&total->bases_count, spotid, false);
+                        if ( rc == 0 && pb->progress )
+                            KLoadProgressbar_Process ( pr, 1, false );
+                        rc = Quitting();
+                        if (rc != 0)
+                            LOGMSG(klogWarn, "Interrupted");
+                    }
+
                     if (rc == 0) {
                         BasesFinalize(&total->bases_count);
                         pb->variableReadLength = !fixedReadLength;
@@ -3150,7 +3574,8 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
                               rcBuffer, rcInsufficient);
             }
             if (rc == 0) {
-                memmove(dREAD_LEN, ((const char*)base) + (boff>>3), row_bits>>3);
+                memmove(dREAD_LEN, ((const char*)base) + (boff>>3),
+                        ( size_t ) row_bits>>3);
             }
             for (i = 0; i < g_nreads; ++i) {
                 diff_sq[i] +=
@@ -3183,13 +3608,16 @@ rc_t run(srastat_parms* pb)
     assert(pb && pb->table_path);
 
     rc = VDBManagerMakeRead(&vmgr, NULL);
-    if (rc != 0) {
+    if (rc != 0)
         LOGERR(klogInt, rc, "failed to open VDBManager");
-    }
     else {
         SraSizeStats sizes;
         ArcInfo arc_info;
         SraMeta info;
+
+        const VDatabase * db = NULL;  /* sra-srat argument is a DB */
+        const VTable    * tbl = NULL; /* sra-srat argument is a table:
+                                         don't release it*/
         const VTable* vtbl = NULL;
 
         VSchema *schema = NULL;
@@ -3201,11 +3629,12 @@ rc_t run(srastat_parms* pb)
         if (rc == 0) {
             rc = VDBManagerOpenTableRead(vmgr, &vtbl,
                 schema, "%s", pb->table_path);
-            if (rc != 0 && GetRCObject(rc) == (enum RCObject)rcTable
-                        && GetRCState (rc) == rcIncorrect)
+            if (rc == 0 )
+                tbl = vtbl; /* sra-srat argument is a table */
+            else if ( GetRCObject(rc) == (enum RCObject)rcTable
+                   && GetRCState (rc) == rcIncorrect)
             {
                 const char altname[] = "SEQUENCE";
-                const VDatabase *db = NULL;
                 rc_t rc2 = VDBManagerOpenDBRead(vmgr,
                     &db, schema, pb->table_path);
                 if (rc2 == 0) {
@@ -3214,7 +3643,6 @@ rc_t run(srastat_parms* pb)
                         rc = 0;
                     }
                 }
-                VDatabaseRelease ( db );
             }
             if (rc != 0) {
                 PLOGERR(klogInt, (klogInt, rc,
@@ -3226,13 +3654,15 @@ rc_t run(srastat_parms* pb)
             SraStatsTotal total;
             const KTable* ktbl = NULL;
             const KMetadata* meta = NULL;
-            const VDatabase* db = NULL;
 
             BSTree tr;
             Ctx ctx;
 
             BSTreeInit(&tr);
+
             memset(&ctx, 0, sizeof ctx);
+            ctx . db  = db;
+            ctx . tbl = tbl;
 
             memset(&total, 0, sizeof total);
 
@@ -3243,11 +3673,6 @@ rc_t run(srastat_parms* pb)
                 DISP_RC(rc, "While calling KTableOpenMetadataRead");
             }
             if (rc == 0) {
-                rc = VTableOpenParentRead(vtbl, &db);
-                DISP_RC2(rc, pb->table_path,
-                    "while calling VTableOpenParentRead");
-            }
-            if (rc == 0) {
                 rc = get_stats_meta(meta, &stats, pb->quick);
                 if (rc == 0) {
                     if (pb->quick && !stats.found) {
@@ -3265,7 +3690,7 @@ rc_t run(srastat_parms* pb)
                 rc = get_load_info(meta, &info);
             }
             if (rc == 0 && !pb->quick) {
-                rc = sra_stat(pb, &tr, &total, vtbl);
+                rc = sra_stat(pb, &tr, &total, &ctx, vtbl);
             }
             if (rc == 0 && pb->print_arcinfo ) {
                 rc = get_arc_info(pb->table_path, &arc_info, vmgr, vtbl);
@@ -3282,10 +3707,13 @@ rc_t run(srastat_parms* pb)
                     TableCountsSort(&ctx.tables);
                 }
             }
+            if ( rc == 0 )
+                rc = CalculateNL ( db, & ctx );
             if (rc == 0) {
-                ctx.db = db;
+                if ( db == NULL )
+                    ctx . meta = meta;
                 ctx.info = &info;
-                ctx.meta = &stats;
+                ctx.meta_stats = &stats;
                 ctx.pb = pb;
                 ctx.sizes = &sizes;
                 ctx.total = &total;
@@ -3295,10 +3723,9 @@ rc_t run(srastat_parms* pb)
             }
             BSTreeWhack(&tr, bst_whack_free, NULL);
             SraStatsTotalFree(&total);
-            RELEASE(VDatabase, db);
             RELEASE(KTable, ktbl);
             {
-                int i; 
+                uint32_t i; 
                 for (i = 0; i < stats.spotGroupN; ++i) {
                     SraStatsMetaDestroy(&stats.spotGroup[i]);
                 }
@@ -3311,6 +3738,7 @@ rc_t run(srastat_parms* pb)
             CtxRelease(&ctx);
             RELEASE(KMetadata, meta);
         }
+        RELEASE(VDatabase, db);
         RELEASE(VSchema, schema);
         RELEASE(VTable, vtbl);
     }


=====================================
tools/sra-stat/sra-stat.h
=====================================
--- a/tools/sra-stat/sra-stat.h
+++ b/tools/sra-stat/sra-stat.h
@@ -33,10 +33,61 @@ struct KFile;
 struct VCursor;
 struct VTable;
 
+typedef enum EMetaState {
+    eMSNotFound,
+    eMSFound
+} EMetaState;
+
+typedef struct QualityStats {
+    struct Quality* QUALITY;
+    size_t allocated;
+    size_t used;
+} QualityStats;
+
+typedef struct TableCounts {
+    EMetaState state;
+    struct Counts* count;
+    size_t allocated;
+    size_t used;
+} TableCounts;
+
+typedef struct Ctx {
+    const struct BSTree* tr;
+    const struct MetaDataStats* meta_stats;
+    const struct SraMeta* info;
+    const struct SraSizeStats* sizes;
+    const struct ArcInfo* arc_info;
+    struct srastat_parms* pb;
+    struct SraStatsTotal* total;
+
+    const struct VDatabase * db;   /* sra-srat argument is a DB */
+    const struct VTable    * tbl;  /* sra-srat argument is a table */
+
+    const struct KMetadata* meta; /* from Table (when running on table) */
+    QualityStats quality;
+    TableCounts tables;
+
+    uint64_t n;
+    uint64_t l;
+    uint64_t n50;
+    uint64_t l50;
+    uint64_t n90;
+    uint64_t l90;
+} Ctx;
+
 rc_t CC VCursorColumnRead(const struct VCursor *self, int64_t id,
     uint32_t idx, const void **base, bitsz_t *offset, bitsz_t *size);
 
 rc_t CC VTableMakeSingleFileArchive(const struct VTable *self,
     const struct KFile **sfa, bool lightweight);
 
+rc_t CC CalculateNL ( const struct VDatabase * db, Ctx * ctx );
+
+#define RELEASE(type, obj) do { rc_t rc2 = type##Release(obj); \
+    if (rc2 && !rc) { rc = rc2; } obj = NULL; } while (false)
+
+
+#define DISP_RC(rc, msg) (void)((rc == 0) ? 0 : LOGERR(klogInt, rc, msg))
+
+
 #endif /* _h_sra_stat_tools_ */



View it on GitLab: https://salsa.debian.org/med-team/sra-sdk/compare/8717fcc9b25b5fd443131a29f2de215eea42955f...de21e4654473da150b5a90678d3b3c7b853fa1c3

---
View it on GitLab: https://salsa.debian.org/med-team/sra-sdk/compare/8717fcc9b25b5fd443131a29f2de215eea42955f...de21e4654473da150b5a90678d3b3c7b853fa1c3
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/debian-med-commit/attachments/20180218/201d1482/attachment-0001.html>


More information about the debian-med-commit mailing list