[med-svn] [Git][med-team/stacks][master] 3 commits: New upstream version 2.58+dfsg
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Fri Jun 11 22:19:30 BST 2021
Nilesh Patra pushed to branch master at Debian Med / stacks
Commits:
c366057c by Nilesh Patra at 2021-06-12T02:36:29+05:30
New upstream version 2.58+dfsg
- - - - -
11f63a97 by Nilesh Patra at 2021-06-12T02:36:32+05:30
Update upstream source from tag 'upstream/2.58+dfsg'
Update to upstream version '2.58+dfsg'
with Debian dir cd7ec33e1f38a186983ffa1cecdd5d27a0daa37f
- - - - -
4a6139a7 by Nilesh Patra at 2021-06-12T02:37:10+05:30
Interim changelog entry
- - - - -
9 changed files:
- ChangeLog
- configure.ac
- debian/changelog
- src/DNANSeq.cc
- src/DNANSeq.h
- src/aln_utils.cc
- src/kmers.cc
- src/populations.cc
- src/ustacks.cc
Changes:
=====================================
ChangeLog
=====================================
@@ -1,3 +1,7 @@
+Stacks 2.58 - June, 08, 2021
+----------------------------
+ Bugfix: Fixed several memory errors in ustacks related to processing trimmed reads.
+
Stacks 2.57 - May, 10, 2021
---------------------------
Feature: updated process_radtags so that if you specify the same sample name in the barcodes file for multiple
=====================================
configure.ac
=====================================
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT([Stacks], [2.57])
+AC_INIT([Stacks], [2.58])
AC_CONFIG_AUX_DIR([config])
AM_INIT_AUTOMAKE([-Wall -Werror foreign parallel-tests subdir-objects])
AC_CONFIG_SRCDIR([src/ustacks.cc])
=====================================
debian/changelog
=====================================
@@ -1,7 +1,7 @@
-stacks (2.57+dfsg-1) UNRELEASED; urgency=medium
+stacks (2.58+dfsg-1) UNRELEASED; urgency=medium
* Team upload.
- * New upstream version 2.57+dfsg
+ * New upstream version 2.58+dfsg
-- Nilesh Patra <nilesh at debian.org> Sat, 15 May 2021 17:12:18 +0530
=====================================
src/DNANSeq.cc
=====================================
@@ -134,15 +134,15 @@ char DNANSeq::operator[](uint pos) const {
}
void DNANSeq::seq(char* buf) const {
- for (uint i = 0; i < size(); i++)
+ for (uint i = 0; i < this->size(); i++)
buf[i] = (*this)[i];
- buf[size()] = '\0';
+ buf[this->size()] = '\0';
}
string DNANSeq::seq() const {
string str;
- str.reserve(size());
- for (uint i = 0; i < size(); i++)
+ str.reserve(this->size());
+ for (uint i = 0; i < this->size(); i++)
str.push_back((*this)[i]);
return str;
}
=====================================
src/DNANSeq.h
=====================================
@@ -1,6 +1,6 @@
// -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
//
-// Copyright 2011, Julian Catchen <jcatchen at uoregon.edu>
+// Copyright 2021, Julian Catchen <jcatchen at illinois.edu>
//
// This file is part of Stacks.
//
=====================================
src/aln_utils.cc
=====================================
@@ -232,15 +232,14 @@ apply_cigar_to_seq(const char *seq, const Cigar &cigar)
string edited_seq;
//
- // Calculate the overall sequence length.
+ // Calculate the sequence length and the length prescribed by the CIGAR.
//
- uint seqlen = 0;
- for (uint i = 0; i < size; i++)
- seqlen += cigar[i].second;
+ uint seqlen = strlen(seq);
+ uint cigar_seqlen = cigar_length_padded(cigar);
bp = 0;
- edited_seq.reserve(seqlen);
+ edited_seq.reserve(cigar_seqlen);
for (uint i = 0; i < size; i++) {
op = cigar[i].first;
@@ -264,10 +263,14 @@ apply_cigar_to_seq(const char *seq, const Cigar &cigar)
case 'I':
case 'M':
stop = bp + dist;
- while (bp < stop) {
+ while (bp < stop && bp < seqlen) {
edited_seq.push_back(seq[bp]);
bp++;
}
+ while (bp < stop) {
+ edited_seq.push_back('N');
+ bp++;
+ }
break;
default:
break;
@@ -286,15 +289,14 @@ apply_cigar_to_model_seq(const char *seq, const Cigar &cigar)
string edited_seq;
//
- // Calculate the overall sequence length.
+ // Calculate the sequence length and the length prescribed by the CIGAR.
//
- uint seqlen = 0;
- for (uint i = 0; i < size; i++)
- seqlen += cigar[i].second;
+ uint seqlen = strlen(seq);
+ uint cigar_seqlen = cigar_length_padded(cigar);
bp = 0;
- edited_seq.reserve(seqlen);
+ edited_seq.reserve(cigar_seqlen);
for (uint i = 0; i < size; i++) {
op = cigar[i].first;
@@ -318,10 +320,14 @@ apply_cigar_to_model_seq(const char *seq, const Cigar &cigar)
case 'I':
case 'M':
stop = bp + dist;
- while (bp < stop) {
+ while (bp < stop && bp < seqlen) {
edited_seq.push_back(seq[bp]);
bp++;
}
+ while (bp < stop) {
+ edited_seq.push_back('U');
+ bp++;
+ }
break;
default:
break;
=====================================
src/kmers.cc
=====================================
@@ -1,6 +1,6 @@
// -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
//
-// Copyright 2010-2015, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2010-2021, Julian Catchen <jcatchen at illinois.edu>
//
// This file is part of Stacks.
//
@@ -196,19 +196,20 @@ populate_kmer_hash(map<int, MergedStack *> &merged, KmerHashMap &kmer_map, vecto
MergedStack *tag;
vector<char *> kmers;
bool exists;
+ int num_kmers;
//
// Break each stack down into k-mers and create a hash map of those k-mers
// recording in which sequences they occur.
//
- int num_kmers = strlen(merged.begin()->second->con) - kmer_len + 1;
-
for (it = merged.begin(); it != merged.end(); it++) {
tag = it->second;
// Don't compute distances for masked tags
if (tag->masked) continue;
+ num_kmers = tag->len - kmer_len + 1;
+
generate_kmers(tag->con, kmer_len, num_kmers, kmers);
// Hash the kmers
@@ -248,7 +249,7 @@ populate_kmer_hash(map<int, Locus *> &catalog, CatKmerHashMap &kmer_map, vector<
for (it = catalog.begin(); it != catalog.end(); it++) {
tag = it->second;
- num_kmers = strlen(tag->con) - kmer_len + 1;
+ num_kmers = tag->len - kmer_len + 1;
//
// Iterate through the possible Catalog alleles
=====================================
src/populations.cc
=====================================
@@ -3415,7 +3415,7 @@ int load_marker_list(string path, set<int> &list) {
int
LocusFilter::load_whitelist(string path)
{
- char line[id_len];
+ char line[max_len];
ifstream fh(path.c_str(), ifstream::in);
if (fh.fail()) {
@@ -3428,7 +3428,7 @@ LocusFilter::load_whitelist(string path)
char *e;
uint line_num = 0;
- while (fh.getline(line, id_len)) {
+ while (fh.getline(line, max_len)) {
++line_num;
//
@@ -3482,7 +3482,7 @@ LocusFilter::load_whitelist(string path)
}
int load_marker_column_list(string path, map<int, set<int> > &list) {
- char line[id_len];
+ char line[max_len];
ifstream fh(path.c_str(), ifstream::in);
if (fh.fail()) {
@@ -3495,7 +3495,7 @@ int load_marker_column_list(string path, map<int, set<int> > &list) {
char *e;
uint line_num = 1;
- while (fh.getline(line, id_len)) {
+ while (fh.getline(line, max_len)) {
//
// Skip blank & commented lines ; correct windows-style line ends.
=====================================
src/ustacks.cc
=====================================
@@ -1,6 +1,6 @@
// -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
//
-// Copyright 2010-2020, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2010-2021, Julian Catchen <jcatchen at illinois.edu>
//
// This file is part of Stacks.
//
@@ -198,6 +198,7 @@ try{
//
cerr << "Merging secondary stacks (max. dist. N=" << max_rem_dist << " from consensus)...\n";
size_t utilized = merge_remainders(merged, unique, remainders);
+ call_consensus(merged, unique, remainders, false);
size_t util_gapped = merge_gapped_remainders(merged, unique, remainders);
call_consensus(merged, unique, remainders, false);
cerr << " Merged " << utilized+util_gapped << " out of " << n_r_reads << " secondary reads ("
@@ -258,6 +259,16 @@ try{
write_results(merged, unique, remainders);
cerr << "done.\n";
+ //
+ // Free remaining memory.
+ //
+ for (auto it = unique.begin(); it != unique.end(); it++)
+ delete it->second;
+ for (auto it = remainders.begin(); it != remainders.end(); it++)
+ delete it->second;
+ for (auto it = merged.begin(); it != merged.end(); it++)
+ delete it->second;
+
cerr << "ustacks is done.\n";
return 0;
} catch(std::exception& e) {
@@ -305,14 +316,13 @@ merge_gapped_alns(map<int, Stack *> &unique, map<int, Rem *> &rem, map<int, Merg
if (tag_2->alns.size() > 1 && tag_2->alns[0].pct_id == tag_2->alns[1].pct_id)
continue;
- if (tag_1->id != (int)tag_2->alns[0].id)
+ if (tag_1->id != (int) tag_2->alns[0].id)
continue;
cigar_1 = invert_cigar(tag_1->alns[0].cigar);
cigar_2 = tag_2->alns[0].cigar;
if (cigar_1 == cigar_2) {
- cerr << "CIGAR: " << cigar_1 << "\n";
parse_cigar(tag_1->alns[0].cigar.c_str(), cigar);
//
@@ -400,9 +410,10 @@ edit_gapped_seqs(map<int, Stack *> &unique, map<int, Rem *> &rem, MergedStack *t
int stack_id;
Stack *s;
Rem *r;
- char *buf = new char[cigar_length_padded(cigar) + 1];
string seq;
+ char *buf = new char[cigar_length_padded(cigar) + 1];
+
for (uint i = 0; i < tag->utags.size(); i++) {
stack_id = tag->utags[i];
s = unique[stack_id];
@@ -417,7 +428,7 @@ edit_gapped_seqs(map<int, Stack *> &unique, map<int, Rem *> &rem, MergedStack *t
for (uint i = 0; i < tag->remtags.size(); i++) {
stack_id = tag->remtags[i];
r = rem[stack_id];
-
+
r->seq->seq(buf);
seq = apply_cigar_to_seq(buf, cigar);
@@ -1025,25 +1036,7 @@ merge_gapped_remainders(map<int, MergedStack *> &merged, map<int, Stack *> &uniq
for (uint i = 0; i < tag_1->rem_queue.size(); i++) {
Rem *r = rem.at(tag_1->rem_queue[i]);
- // //
- // // We want to align the last max_rem_dist * 2 nucleotides. If sequences are already the wrong length,
- // // adjust so the 3' ends are lined up properly in the alignment matrix.
- // //
- // diff = tag_1->len - r->seq->size();
- // q_start = r->seq->size() - (max_rem_dist * 2) - 1;
- // q_end = r->seq->size() - 1;
- // s_start = tag_1->len - (max_rem_dist * 2) - 1;
- // s_end = tag_1->len - 1;
-
- // q_start = diff > 0 ? q_start + diff : q_start;
- // s_start = diff < 0 ? s_start + abs(diff) : s_start;
-
- // cerr << "Consensus size: " << tag_1->len << "; remainder size: " << r->seq->size() << "\n"
- // << "Con seq: " << tag_1->con << " (" << strlen(tag_1->con) << ")\n"
- // << "Rem seq: " << r->seq->seq() << "\n"
- // << "q_start: " << q_start << "; q_end: " << q_end << "; s_start: " << s_start << "; s_end: " << s_end << "\n";
-
- aln->init(r->seq->size(), tag_1->len);
+ aln->init(r->seq->size(), tag_1->len, true);
if (aln->align(r->seq->seq().c_str(), tag_1->con)) {
a = aln->result();
@@ -1092,20 +1085,27 @@ merge_gapped_remainders(map<int, MergedStack *> &merged, map<int, Stack *> &uniq
}
pos += cigar[j].second;
}
+ //
+ // 1. Push newly created alignment gaps into existing constituent primary and remainder sequences.
+ //
edit_gapped_seqs(unique, rem, tag_1, cigar);
-
//
- // Add this aligned remainder tag to the locus after adjusting the other reads but before adjusting the consensus.
+ // 2. Add this aligned remainder tag to the locus after adjusting the other reads but before adjusting the consensus.
//
tag_1->remtags.push_back(r->id);
+ //
+ // 3. Adjust the consensus.
+ //
update_consensus(tag_1, unique, rem);
}
}
tag_1->rem_queue.clear();
}
- }
+ delete aln;
+ }
+
return utilized;
}
@@ -1117,24 +1117,27 @@ call_alleles(MergedStack *mtag, vector<DNANSeq *> &reads, vector<read_type> &rea
string allele;
DNANSeq *d;
char base;
+ size_t snp_cnt;
vector<SNP *>::iterator snp;
for (row = 0; row < height; row++) {
- allele.clear();
-
- uint snp_cnt = 0;
-
//
// Only call a haplotype from primary reads.
//
if (!call_sec_hapl && read_types[row] == secondary) continue;
+ allele.clear();
+ snp_cnt = 0;
+ d = reads[row];
+
for (snp = mtag->snps.begin(); snp != mtag->snps.end(); snp++) {
if ((*snp)->type != snp_type_het) continue;
snp_cnt++;
- d = reads[row];
+ if ((*snp)->col >= d->size())
+ break;
+
base = (*d)[(*snp)->col];
//
@@ -1183,30 +1186,34 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
// that tag into our array as many times as it originally occurred.
//
vector<int>::iterator j;
- vector<DNANSeq *> reads;
- vector<read_type> read_types;
- uint length = unique[mtag->utags.front()]->seq->size();
-
+ vector<DNANSeq *> reads;
+ vector<read_type> read_types;
+ size_t length = 0;
+
for (j = mtag->utags.begin(); j != mtag->utags.end(); j++) {
utag = unique[*j];
+ // Check for maximum length.
+ length = utag->seq->size() > length ? utag->seq->size() : length;
+
for (uint k = 0; k < utag->count(); k++) {
reads.push_back(utag->seq);
read_types.push_back(primary);
-
- assert(utag->seq->size() == length);
}
}
+ //
// For each remainder tag that has been merged into this Stack, add the sequence.
+ //
for (j = mtag->remtags.begin(); j != mtag->remtags.end(); j++) {
r = rem[*j];
+ // Check for maximum length.
+ length = r->seq->size() > length ? r->seq->size() : length;
+
for (uint k = 0; k < r->count(); k++) {
reads.push_back(r->seq);
read_types.push_back(secondary);
-
- assert(r->seq->size() == length);
}
}
@@ -1230,7 +1237,7 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
for (row = 0; row < height; row++) {
d = reads[row];
- if (nuc.count((*d)[col]))
+ if (col < d->size() && nuc.count((*d)[col]))
nuc[(*d)[col]]++;
}
@@ -1267,13 +1274,13 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
}
switch(model_type) {
- case snp:
+ case modelt::snp:
call_multinomial_snp(mtag, col, nuc, true);
break;
- case bounded:
+ case modelt::bounded:
call_bounded_multinomial_snp(mtag, col, nuc, true);
break;
- case ::fixed:
+ case modelt::fixed:
call_multinomial_fixed(mtag, col, nuc);
break;
default:
@@ -1290,7 +1297,7 @@ call_consensus(map<int, MergedStack *> &merged, map<int, Stack *> &unique, map<i
call_alleles(mtag, reads, read_types);
- if (model_type == ::fixed) {
+ if (model_type == modelt::fixed) {
//
// Mask nucleotides that are not fixed.
//
@@ -1314,6 +1321,8 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
{
Stack *utag;
Rem *r;
+ size_t length = 0;
+
//
// Create a two-dimensional array, each row containing one read. For
// each unique tag that has been merged together, add the sequence for
@@ -1324,6 +1333,9 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
for (auto j = mtag->utags.begin(); j != mtag->utags.end(); j++) {
utag = unique[*j];
+ // Check for maximum length.
+ length = utag->seq->size() > length ? utag->seq->size() : length;
+
for (uint k = 0; k < utag->count(); k++)
reads.push_back(utag->seq);
}
@@ -1332,6 +1344,9 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
for (auto j = mtag->remtags.begin(); j != mtag->remtags.end(); j++) {
r = rem[*j];
+ // Check for maximum length.
+ length = r->seq->size() > length ? r->seq->size() : length;
+
for (uint k = 0; k < r->count(); k++)
reads.push_back(r->seq);
}
@@ -1341,7 +1356,6 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
//
string con;
uint row, col;
- uint length = reads[0]->size();
uint height = reads.size();
DNANSeq *d;
@@ -1358,6 +1372,10 @@ update_consensus(MergedStack *mtag, map<int, Stack *> &unique, map<int, Rem *> &
for (row = 0; row < height; row++) {
d = reads[row];
+
+ if (col >= d->size())
+ continue;
+
switch ((*d)[col]) {
case 'A':
nucs[0]++;
@@ -1672,7 +1690,9 @@ merge_tags(MergedStack *tag_1, MergedStack *tag_2, int id)
return new_tag;
}
-MergedStack *merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, int id) {
+MergedStack *
+merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, int id)
+{
MergedStack *tag_1, *tag_2;
tag_1 = new MergedStack;
@@ -1701,7 +1721,9 @@ MergedStack *merge_tags(map<int, MergedStack *> &merged, set<int> &merge_list, i
return tag_1;
}
-MergedStack *merge_tags(map<int, MergedStack *> &merged, int *merge_list, int merge_list_size, int id) {
+MergedStack *
+merge_tags(map<int, MergedStack *> &merged, int *merge_list, int merge_list_size, int id)
+{
int i;
vector<int>::iterator j;
MergedStack *tag_1, *tag_2;
@@ -2703,7 +2725,7 @@ void load_radtags(string in_file, DNASeqHashMap &radtags, size_t& n_reads) {
cerr << "Error: different sequence lengths detected, this will interfere with Stacks "
<< "algorithms, trim reads to uniform length (override this check with --force-diff-len).\n";
exit(1);
- } else if (force_diff_len) {
+ } else if (len_mismatch && force_diff_len) {
cerr << "Warning: different sequence lengths detected, this could interfere with Stacks algorithms.\n";
}
if (corrected > 0)
@@ -3026,11 +3048,12 @@ void help() {
<< " H: disable calling haplotypes from secondary reads.\n"
<< "\n"
<< " Stack assembly options:\n"
- << " d,--deleverage: enable the Deleveraging algorithm, used for resolving over merged tags.\n"
+ << " --force-diff-len: allow raw input reads of different lengths, e.g. after trimming (default: ustacks perfers raw input reads of uniform length).\n"
<< " --keep-high-cov: disable the algorithm that removes highly-repetitive stacks and nearby errors.\n"
<< " --high-cov-thres: highly-repetitive stacks threshold, in standard deviation units (default: 3.0).\n"
<< " --max-locus-stacks <num>: maximum number of stacks at a single de novo locus (default 3).\n"
- << " --k-len <len>: specify k-mer size for matching between alleles and loci (automatically calculated by default).\n\n"
+ << " --k-len <len>: specify k-mer size for matching between alleles and loci (automatically calculated by default).\n"
+ << " --deleverage: enable the Deleveraging algorithm, used for resolving over merged tags.\n\n"
<< " Gapped assembly options:\n"
<< " --max-gaps: number of gaps allowed between stacks before merging (default: 2).\n"
<< " --min-aln-len: minimum length of aligned sequence in a gapped alignment (default: 0.80).\n\n"
View it on GitLab: https://salsa.debian.org/med-team/stacks/-/compare/45ba2bd2e6aecab03b507669aa1d377c44f753aa...4a6139a7ac614a947168a87fd345048133ce1ecf
--
View it on GitLab: https://salsa.debian.org/med-team/stacks/-/compare/45ba2bd2e6aecab03b507669aa1d377c44f753aa...4a6139a7ac614a947168a87fd345048133ce1ecf
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210611/67dbfb97/attachment-0001.htm>
More information about the debian-med-commit
mailing list