[med-svn] [Git][med-team/stacks][master] 3 commits: New upstream version 2.58+dfsg

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Fri Jun 11 22:19:30 BST 2021



Nilesh Patra pushed to branch master at Debian Med / stacks


Commits:
c366057c by Nilesh Patra at 2021-06-12T02:36:29+05:30
New upstream version 2.58+dfsg
- - - - -
11f63a97 by Nilesh Patra at 2021-06-12T02:36:32+05:30
Update upstream source from tag 'upstream/2.58+dfsg'

Update to upstream version '2.58+dfsg'
with Debian dir cd7ec33e1f38a186983ffa1cecdd5d27a0daa37f
- - - - -
4a6139a7 by Nilesh Patra at 2021-06-12T02:37:10+05:30
Interim changelog entry

- - - - -


9 changed files:

- ChangeLog
- configure.ac
- debian/changelog
- src/DNANSeq.cc
- src/DNANSeq.h
- src/aln_utils.cc
- src/kmers.cc
- src/populations.cc
- src/ustacks.cc


Changes:

=====================================
ChangeLog
=====================================
@@ -1,3 +1,7 @@
+Stacks 2.58 - June, 08, 2021
+----------------------------
+    Bugfix: Fixed several memory errors in ustacks related to processing trimmed reads. 
+
 Stacks 2.57 - May, 10, 2021
 ---------------------------
     Feature: updated process_radtags so that if you specify the same sample name in the barcodes file for multiple


=====================================
configure.ac
=====================================
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ(2.59)
-AC_INIT([Stacks], [2.57])
+AC_INIT([Stacks], [2.58])
 AC_CONFIG_AUX_DIR([config])
 AM_INIT_AUTOMAKE([-Wall -Werror foreign parallel-tests subdir-objects])
 AC_CONFIG_SRCDIR([src/ustacks.cc])


=====================================
debian/changelog
=====================================
@@ -1,7 +1,7 @@
-stacks (2.57+dfsg-1) UNRELEASED; urgency=medium
+stacks (2.58+dfsg-1) UNRELEASED; urgency=medium
 
   * Team upload.
-  * New upstream version 2.57+dfsg
+  * New upstream version 2.58+dfsg
 
  -- Nilesh Patra <nilesh at debian.org>  Sat, 15 May 2021 17:12:18 +0530
 


=====================================
src/DNANSeq.cc
=====================================
@@ -134,15 +134,15 @@ char DNANSeq::operator[](uint pos) const {
 }
 
 void DNANSeq::seq(char* buf) const {
-    for (uint i = 0; i < size(); i++)
+    for (uint i = 0; i < this->size(); i++)
         buf[i] = (*this)[i];
-    buf[size()] = '\0';
+    buf[this->size()] = '\0';
 }
 
 string DNANSeq::seq() const {
     string str;
-    str.reserve(size());
-    for (uint i = 0; i < size(); i++)
+    str.reserve(this->size());
+    for (uint i = 0; i < this->size(); i++)
         str.push_back((*this)[i]);
     return str;
 }


=====================================
src/DNANSeq.h
=====================================
@@ -1,6 +1,6 @@
 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
 //
-// Copyright 2011, Julian Catchen <jcatchen at uoregon.edu>
+// Copyright 2021, Julian Catchen <jcatchen at illinois.edu>
 //
 // This file is part of Stacks.
 //


=====================================
src/aln_utils.cc
=====================================
@@ -232,15 +232,14 @@ apply_cigar_to_seq(const char *seq, const Cigar &cigar)
     string edited_seq;
 
     //
-    // Calculate the overall sequence length.
+    // Calculate the sequence length and the length prescribed by the CIGAR.
     //
-    uint seqlen = 0;
-    for (uint i = 0; i < size; i++)
-        seqlen += cigar[i].second;
+    uint seqlen       = strlen(seq);
+    uint cigar_seqlen = cigar_length_padded(cigar);
 
     bp  = 0;
 
-    edited_seq.reserve(seqlen);
+    edited_seq.reserve(cigar_seqlen);
 
     for (uint i = 0; i < size; i++)  {
         op   = cigar[i].first;
@@ -264,10 +263,14 @@ apply_cigar_to_seq(const char *seq, const Cigar &cigar)
         case 'I':
         case 'M':
             stop = bp + dist;
-            while (bp < stop) {
+            while (bp < stop && bp < seqlen) {
                 edited_seq.push_back(seq[bp]);
                 bp++;
             }
+            while (bp < stop) {
+                edited_seq.push_back('N');
+                bp++;
+            }
             break;
         default:
             break;
@@ -286,15 +289,14 @@ apply_cigar_to_model_seq(const char *seq, const Cigar &cigar)
     string edited_seq;
 
     //
-    // Calculate the overall sequence length.
+    // Calculate the sequence length and the length prescribed by the CIGAR.
     //
-    uint seqlen = 0;
-    for (uint i = 0; i < size; i++)
-        seqlen += cigar[i].second;
+    uint seqlen       = strlen(seq);
+    uint cigar_seqlen = cigar_length_padded(cigar);
 
     bp  = 0;
 
-    edited_seq.reserve(seqlen);
+    edited_seq.reserve(cigar_seqlen);
 
     for (uint i = 0; i < size; i++)  {
         op   = cigar[i].first;
@@ -318,10 +320,14 @@ apply_cigar_to_model_seq(const char *seq, const Cigar &cigar)
         case 'I':
         case 'M':
             stop = bp + dist;
-            while (bp < stop) {
+            while (bp < stop && bp < seqlen) {
                 edited_seq.push_back(seq[bp]);
                 bp++;
             }
+            while (bp < stop) {
+                edited_seq.push_back('U');
+                bp++;
+            }
             break;
         default:
             break;


=====================================
src/kmers.cc
=====================================
@@ -1,6 +1,6 @@
 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
 //
-// Copyright 2010-2015, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2010-2021, Julian Catchen <jcatchen at illinois.edu>
 //
 // This file is part of Stacks.
 //
@@ -196,19 +196,20 @@ populate_kmer_hash(map<int, MergedStack *> &merged, KmerHashMap &kmer_map, vecto
     MergedStack    *tag;
     vector<char *>  kmers;
     bool            exists;
+    int             num_kmers;
 
     //
     // Break each stack down into k-mers and create a hash map of those k-mers
     // recording in which sequences they occur.
     //
-    int num_kmers = strlen(merged.begin()->second->con) - kmer_len + 1;
-
     for (it = merged.begin(); it != merged.end(); it++) {
         tag = it->second;
 
         // Don't compute distances for masked tags
         if (tag->masked) continue;
 
+        num_kmers = tag->len - kmer_len + 1;
+        
         generate_kmers(tag->con, kmer_len, num_kmers, kmers);
 
         // Hash the kmers
@@ -248,7 +249,7 @@ populate_kmer_hash(map<int, Locus *> &catalog, CatKmerHashMap &kmer_map, vector<
     for (it = catalog.begin(); it != catalog.end(); it++) {
         tag = it->second;
 
-        num_kmers = strlen(tag->con) - kmer_len + 1;
+        num_kmers = tag->len - kmer_len + 1;
 
         //
         // Iterate through the possible Catalog alleles


=====================================
src/populations.cc
=====================================
@@ -3415,7 +3415,7 @@ int load_marker_list(string path, set<int> &list) {
 int
 LocusFilter::load_whitelist(string path)
 {
-    char     line[id_len];
+    char     line[max_len];
     ifstream fh(path.c_str(), ifstream::in);
 
     if (fh.fail()) {
@@ -3428,7 +3428,7 @@ LocusFilter::load_whitelist(string path)
     char *e;
 
     uint line_num = 0;
-    while (fh.getline(line, id_len)) {
+    while (fh.getline(line, max_len)) {
         ++line_num;
 
         //
@@ -3482,7 +3482,7 @@ LocusFilter::load_whitelist(string path)
 }
 
 int load_marker_column_list(string path, map<int, set<int> > &list) {
-    char     line[id_len];
+    char     line[max_len];
     ifstream fh(path.c_str(), ifstream::in);
 
     if (fh.fail()) {
@@ -3495,7 +3495,7 @@ int load_marker_column_list(string path, map<int, set<int> > &list) {
     char *e;
 
     uint line_num = 1;
-    while (fh.getline(line, id_len)) {
+    while (fh.getline(line, max_len)) {
 
         //
         // Skip blank & commented lines ; correct windows-style line ends.


=====================================
src/ustacks.cc
=====================================
@@ -1,6 +1,6 @@
 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
 //
-// Copyright 2010-2020, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2010-2021, Julian Catchen <jcatchen at illinois.edu>
 //
 // This file is part of Stacks.
 //
@@ -198,6 +198,7 @@ try{
     //
     cerr << "Merging secondary stacks (max. dist. N=" << max_rem_dist << " from consensus)...\n";
     size_t utilized    = merge_remainders(merged, unique, remainders);
+    call_consensus(merged, unique, remainders, false);
     size_t util_gapped = merge_gapped_remainders(merged, unique, remainders);
     call_consensus(merged, unique, remainders, false);
     cerr << "  Merged " << utilized+util_gapped << " out of " << n_r_reads << " secondary reads ("
@@ -258,6 +259,16 @@ try{
     write_results(merged, unique, remainders);
     cerr << "done.\n";
 
+    //
+    // Free remaining memory.
+    //
+    for (auto it = unique.begin(); it != unique.end(); it++)
+        delete it->second;
+    for (auto it = remainders.begin(); it != remainders.end(); it++)
+        delete it->second;
+    for (auto it = merged.begin(); it != merged.end(); it++)
+        delete it->second;
+    
     cerr << "ustacks is done.\n";
     return 0;
 } catch(std::exception& e) {
@@ -305,14 +316,13 @@ merge_gapped_alns(map<int, Stack *> &unique, map<int, Rem *> &rem, map<int, Merg
         if (tag_2->alns.size() > 1 && tag_2->alns[0].pct_id == tag_2->alns[1].pct_id)
             continue;
 
-        if (tag_1->id != (int)tag_2->alns[0].id)
+        if (tag_1->id != (int) tag_2->alns[0].id)
             continue;
 
         cigar_1 = invert_cigar(tag_1->alns[0].cigar);
         cigar_2 = tag_2->alns[0].cigar;
 
         if (cigar_1 == cigar_2) {
-            cerr << "CIGAR: " << cigar_1 << "\n";
             parse_cigar(tag_1->alns[0].cigar.c_str(), cigar);
 
             //
@@ -400,9 +410,10 @@ edit_gapped_seqs(map<int, Stack *> &unique, map<int, Rem *> &rem, MergedStack *t
     int    stack_id;
     Stack *s;
     Rem   *r;
-    char  *buf = new char[cigar_length_padded(cigar) + 1];
     string seq;
 
+    char *buf = new char[cigar_length_padded(cigar) + 1];
+    
     for (uint i = 0; i < tag->utags.size(); i++) {
         stack_id = tag->utags[i];
         s = unique[stack_id];
@@ -417,7 +428,7 @@ edit_gapped_seqs(map<int, Stack *> &unique, map<int, Rem *> &rem, MergedStack *t
     for (uint i = 0; i < tag->remtags.size(); i++) {
         stack_id = tag->remtags[i];
         r = rem[stack_id];
-
+        
         r->seq->seq(buf);
         seq = apply_cigar_to_seq(buf, cigar);
 
@@ -1025,25 +1036,7 @@ merge_gapped_remainders(map<int, MergedStack *> &merged, map<int, Stack *> &uniq
             for (uint i = 0; i < tag_1->rem_queue.size(); i++) {
                 Rem *r = rem.at(tag_1->rem_queue[i]);
 
-                // //
-                // // We want to align the last max_rem_dist * 2 nucleotides. If sequences are already the wrong length,
-                // // adjust so the 3' ends are lined up properly in the alignment matrix.
-                // //
-                // diff    = tag_1->len - r->seq->size();
-                // q_start = r->seq->size() - (max_rem_dist * 2) - 1;
-                // q_end   = r->seq->size() - 1;
-                // s_start = tag_1->len - (max_rem_dist * 2) - 1;
-                // s_end   = tag_1->len - 1;
-
-                // q_start = diff > 0 ? q_start + diff : q_start;
-                // s_start = diff < 0 ? s_start + abs(diff) : s_start;
-
-                // cerr << "Consensus size: " << tag_1->len << "; remainder size: " << r->seq->size() << "\n"
-                //      << "Con seq: " << tag_1->con << " (" << strlen(tag_1->con) << ")\n"
-                //      << "Rem seq: " << r->seq->seq() << "\n"
-                //      << "q_start: " << q_start << "; q_end: " << q_end << "; s_start: " << s_start << "; s_end: " << s_end << "\n";
-
-                aln->init(r->seq->size(), tag_1->len);
+                aln->init(r->seq->size(), tag_1->len, true);
 
                 if (aln->align(r->seq->seq().c_str(), tag_1->con)) {
                     a = aln->result();
@@ -1092,20 +1085,27 @@ merge_gapped_remainders(map<int, MergedStack *> &merged, map<int, Stack *> &uniq
                         }
                         pos += cigar[j].second;
                     }
+                    //
+                    // 1. Push newly created alignment gaps into existing constituent primary and remainder sequences.
+                    //
                     edit_gapped_seqs(unique, rem, tag_1, cigar);
-
                     //
-                    // Add this aligned remainder tag to the locus after adjusting the other reads but before adjusting the consensus.
+                    // 2. Add this aligned remainder tag to the locus after adjusting the other reads but before adjusting the consensus.
                     //
                     tag_1->remtags.push_back(r->id);
+                    //
+                    // 3. Adjust the consensus.
+                    //
                     update_consensus(tag_1, unique, rem);
                 }
             }
 
             tag_1->rem_queue.clear();
         }
-    }
 
+        delete aln;
+    }
+        
     return utilized;
 }
 
@@ -1117,24 +1117,27 @@ call_alleles(MergedStack *mtag, vector<DNANSeq *> &reads, vector<read_type> &rea
     string  allele;
     DNANSeq *d;
     char    base;
+    size_t  snp_cnt;
     vector<SNP *>::iterator snp;
 
     for (row = 0; row < height; row++) {
-        allele.clear();
-
-        uint snp_cnt = 0;
-
         //
         // Only call a haplotype from primary reads.
         //
         if (!call_sec_hapl && read_types[row] == secondary) continue;
 
+        allele.clear();
+        snp_cnt = 0;
+        d = reads[row];
+
         for (snp = mtag->snps.begin(); snp != mtag->snps.end(); snp++) {
             if ((*snp)->type != snp_type_het) continue;
 
             snp_cnt++;
 
-            d    = reads[row];
+            if ((*snp)->col >= d->size())
+                break;
+
             base = (*d)[(*snp)->col];
 
             //
@@ -1183,30 +1186,34 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
             // that tag into our array as many times as it originally occurred.
             //
             vector<int>::iterator j;
-            vector<DNANSeq *> reads;
-            vector<read_type> read_types;
-            uint length = unique[mtag->utags.front()]->seq->size();
-
+            vector<DNANSeq *>     reads;
+            vector<read_type>     read_types;
+            size_t                length = 0;
+            
             for (j = mtag->utags.begin(); j != mtag->utags.end(); j++) {
                 utag = unique[*j];
 
+                // Check for maximum length.
+                length = utag->seq->size() > length ? utag->seq->size() : length;
+
                 for (uint k = 0; k < utag->count(); k++) {
                     reads.push_back(utag->seq);
                     read_types.push_back(primary);
-
-                    assert(utag->seq->size() == length);
                 }
             }
 
+            //
             // For each remainder tag that has been merged into this Stack, add the sequence.
+            //
             for (j = mtag->remtags.begin(); j != mtag->remtags.end(); j++) {
                 r = rem[*j];
 
+                // Check for maximum length.
+                length = r->seq->size() > length ? r->seq->size() : length;
+
                 for (uint k = 0; k < r->count(); k++) {
                     reads.push_back(r->seq);
                     read_types.push_back(secondary);
-
-                    assert(r->seq->size() == length);
                 }
             }
 
@@ -1230,7 +1237,7 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
 
                 for (row = 0; row < height; row++) {
                     d = reads[row];
-                    if (nuc.count((*d)[col]))
+                    if (col < d->size() && nuc.count((*d)[col]))
                         nuc[(*d)[col]]++;
                 }
 
@@ -1267,13 +1274,13 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
                     }
 
                     switch(model_type) {
-                    case snp:
+                    case modelt::snp:
                         call_multinomial_snp(mtag, col, nuc, true);
                         break;
-                    case bounded:
+                    case modelt::bounded:
                         call_bounded_multinomial_snp(mtag, col, nuc, true);
                         break;
-                    case ::fixed:
+                    case modelt::fixed:
                         call_multinomial_fixed(mtag, col, nuc);
                         break;
                     default:
@@ -1290,7 +1297,7 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
 
                 call_alleles(mtag, reads, read_types);
 
-                if (model_type == ::fixed) {
+                if (model_type == modelt::fixed) {
                     //
                     // Mask nucleotides that are not fixed.
                     //
@@ -1314,6 +1321,8 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
 {
     Stack *utag;
     Rem   *r;
+    size_t length = 0;
+    
     //
     // Create a two-dimensional array, each row containing one read. For
     // each unique tag that has been merged together, add the sequence for
@@ -1324,6 +1333,9 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
     for (auto j = mtag->utags.begin(); j != mtag->utags.end(); j++) {
         utag = unique[*j];
 
+        // Check for maximum length.
+        length = utag->seq->size() > length ? utag->seq->size() : length;
+
         for (uint k = 0; k < utag->count(); k++)
             reads.push_back(utag->seq);
     }
@@ -1332,6 +1344,9 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
     for (auto j = mtag->remtags.begin(); j != mtag->remtags.end(); j++) {
         r = rem[*j];
 
+        // Check for maximum length.
+        length = r->seq->size() > length ? r->seq->size() : length;
+
         for (uint k = 0; k < r->count(); k++)
             reads.push_back(r->seq);
     }
@@ -1341,7 +1356,6 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
     //
     string   con;
     uint     row, col;
-    uint     length = reads[0]->size();
     uint     height = reads.size();
     DNANSeq *d;
 
@@ -1358,6 +1372,10 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
 
         for (row = 0; row < height; row++) {
             d = reads[row];
+
+            if (col >= d->size())
+                continue;
+            
             switch ((*d)[col]) {
             case 'A':
                 nucs[0]++;
@@ -1672,7 +1690,9 @@ merge_tags(MergedStack *tag_1, MergedStack *tag_2, int id)
     return new_tag;
 }
 
-MergedStack *merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, int id) {
+MergedStack *
+merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, int id)
+{
     MergedStack *tag_1, *tag_2;
 
     tag_1     = new MergedStack;
@@ -1701,7 +1721,9 @@ MergedStack *merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, i
     return tag_1;
 }
 
-MergedStack *merge_tags(map<int, MergedStack *> &merged, int *merge_list, int merge_list_size, int id) {
+MergedStack *
+merge_tags(map<int, MergedStack *> &merged, int *merge_list, int merge_list_size, int id)
+{
     int                   i;
     vector<int>::iterator j;
     MergedStack *tag_1, *tag_2;
@@ -2703,7 +2725,7 @@ void load_radtags(string in_file, DNASeqHashMap &radtags, size_t& n_reads) {
         cerr << "Error: different sequence lengths detected, this will interfere with Stacks "
              << "algorithms, trim reads to uniform length (override this check with --force-diff-len).\n";
         exit(1);
-    } else if (force_diff_len) {
+    } else if (len_mismatch && force_diff_len) {
         cerr << "Warning: different sequence lengths detected, this could interfere with Stacks algorithms.\n";
     }
     if (corrected > 0)
@@ -3026,11 +3048,12 @@ void help() {
          << "  H: disable calling haplotypes from secondary reads.\n"
          << "\n"
          << "  Stack assembly options:\n"
-         << "    d,--deleverage: enable the Deleveraging algorithm, used for resolving over merged tags.\n"
+         << "    --force-diff-len: allow raw input reads of different lengths, e.g. after trimming (default: ustacks perfers raw input reads of uniform length).\n"
          << "    --keep-high-cov: disable the algorithm that removes highly-repetitive stacks and nearby errors.\n"
          << "    --high-cov-thres: highly-repetitive stacks threshold, in standard deviation units (default: 3.0).\n"
          << "    --max-locus-stacks <num>: maximum number of stacks at a single de novo locus (default 3).\n"
-         << "     --k-len <len>: specify k-mer size for matching between alleles and loci (automatically calculated by default).\n\n"
+         << "     --k-len <len>: specify k-mer size for matching between alleles and loci (automatically calculated by default).\n"
+         << "    --deleverage: enable the Deleveraging algorithm, used for resolving over merged tags.\n\n"
          << "  Gapped assembly options:\n"
          << "    --max-gaps: number of gaps allowed between stacks before merging (default: 2).\n"
          << "    --min-aln-len: minimum length of aligned sequence in a gapped alignment (default: 0.80).\n\n"



View it on GitLab: https://salsa.debian.org/med-team/stacks/-/compare/45ba2bd2e6aecab03b507669aa1d377c44f753aa...4a6139a7ac614a947168a87fd345048133ce1ecf

-- 
View it on GitLab: https://salsa.debian.org/med-team/stacks/-/compare/45ba2bd2e6aecab03b507669aa1d377c44f753aa...4a6139a7ac614a947168a87fd345048133ce1ecf
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210611/67dbfb97/attachment-0001.htm>


More information about the debian-med-commit mailing list