[med-svn] [Git][med-team/stacks][upstream] New upstream version 2.55+dfsg
Nilesh Patra
gitlab at salsa.debian.org
Sun Jan 10 12:11:54 GMT 2021
Nilesh Patra pushed to branch upstream at Debian Med / stacks
Commits:
9b10a4d4 by Nilesh Patra at 2021-01-10T17:35:37+05:30
New upstream version 2.55+dfsg
- - - - -
12 changed files:
- ChangeLog
- Makefile.am
- configure.ac
- scripts/stacks-count-reads-per-sample-per-locus
- scripts/stacks-integrate-alignments
- src/GappedAln.h
- src/PopSum.cc
- src/export_formats.cc
- src/export_formats.h
- src/populations.cc
- src/renz.cc
- src/ustacks.cc
Changes:
=====================================
ChangeLog
=====================================
@@ -1,3 +1,8 @@
+Stacks 2.55 - January 07, 2021
+------------------------------
+ Feature: Added NgoMIV restriction enzyme to process_radtags.
+ Feature: Added GTF export to populations, for reference-aligned data.
+
Stacks 2.54 - September 03, 2020
--------------------------------
Feature: Added BtgI, PacI, and PspXI, HpyCH4IV restriction enzymes to process_radtags.
=====================================
Makefile.am
=====================================
@@ -101,7 +101,7 @@ EXTRA_DIST = LICENSE INSTALL README ChangeLog $(TESTS)
pkglocalstatedir = $(localstatedir)/$(PACKAGE)
debug:
- $(MAKE) all "CXXFLAGS=-g -Wall -DDEBUG -O0"
+ $(MAKE) all "CXXFLAGS=-g -Wall -DDEBUG -Og"
install-data-hook:
sed -e 's,_VERSION_,$(VERSION),' -e 's,_BINDIR_,$(bindir)/,g' -e 's,_PKGDATADIR_,$(pkgdatadir)/,g' $(DESTDIR)$(bindir)/denovo_map.pl > $(DESTDIR)$(bindir)/denovo_map.pl.subst
=====================================
configure.ac
=====================================
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59)
-AC_INIT([Stacks], [2.54])
+AC_INIT([Stacks], [2.55])
AC_CONFIG_AUX_DIR([config])
AM_INIT_AUTOMAKE([-Wall -Werror foreign parallel-tests subdir-objects])
AC_CONFIG_SRCDIR([src/ustacks.cc])
=====================================
scripts/stacks-count-reads-per-sample-per-locus
=====================================
@@ -3,9 +3,13 @@ set -e -o pipefail
out_path=reads_per_sample_per_locus.tsv
usage="\
-usage: $(basename ${BASH_SOURCE[0]}) STACKS_DENOVO_DIR
+usage: $(basename "${BASH_SOURCE[0]}") STACKS_DENOVO_DIR
-Reads the 'matches.bam' files; outputs to '$out_path'.
+Reads all samples' \"matches.bam\" files to tally the number of samples present
+and number of reads present at all loci in the catalog. Then, runs
+stacks-hist2d-loci-samples-coverage.
+
+Outputs to '$out_path' and '$out_path.pdf'.
"
if (( $# != 1 )) || [[ "$1" == -h ]] || [[ "$1" == --help ]] ;then
@@ -16,8 +20,8 @@ cd "$1" || exit
command -v samtools >/dev/null || { samtools; exit; }
command -v Rscript >/dev/null || { Rscript; exit; }
+ls ./*.matches.bam >/dev/null || exit
for f in *.matches.bam ;do
- ls "$f" >/dev/null || exit
sample=${f%.matches.bam}
samtools view -f0x40 $f |
cut -f3 |
@@ -26,3 +30,6 @@ done | Rscript -e "\
t = table(read.table('stdin'));\
rownames(t) = paste0('c',rownames(t));\
write.table(t,sep='\t',quote=F,file='$out_path')"
+
+# Plot the table.
+"$(dirname "${BASH_SOURCE[0]}")/stacks-hist2d-loci-samples-coverage" "$out_path"
=====================================
scripts/stacks-integrate-alignments
=====================================
@@ -2,7 +2,7 @@
usage="\
Usage:
- $(basename "$0") -P stacks_dir -B bam_file -O out_dir
+ $(basename "$0") -P stacks_dir -B bam_file [-q min_MAPQ=20] -O out_dir
Extracts the coordinates of the RAD loci from the given BAM file into a
'locus_coordinates.tsv' table, then rewrites the 'catalog.fa.gz' and
@@ -33,11 +33,13 @@ for arg in "$@" ;do
done
# Parse arguments.
-while getopts 'P:B:O:hv' opt ;do
+min_mapq=20
+while getopts 'P:B:O:q:hv' opt ;do
case "$opt" in
P) stacks_d="${OPTARG/%\//}" ;;
B) bam_f="${OPTARG/%\//}" ;;
O) out_d="${OPTARG/%\//}" ;;
+ q) min_mapq="$OPTARG" ;;
h) echo -n "$usage" >&2; exit 1 ;;
v) echo "$version" >&2; exit 1 ;;
*) echo "Error: Bad arguments (-h for help)." >&2; exit 1 ;;
@@ -50,9 +52,10 @@ if [[ $# -ne 0 ]] ;then
fi
# Check them.
-[[ "$stacks_d" ]] || { echo -n "Error: -P is required." >&2; exit 1; }
-[[ "$bam_f" ]] || { echo -n "Error: -B is required." >&2; exit 1; }
-[[ "$out_d" ]] || { echo -n "Error: -O is required." >&2; exit 1; }
+[[ "$stacks_d" ]] || { echo "Error: -P is required." >&2; exit 1; }
+[[ "$bam_f" ]] || { echo "Error: -B is required." >&2; exit 1; }
+[[ "$out_d" ]] || { echo "Error: -O is required." >&2; exit 1; }
+[[ "$min_mapq" =~ ^[0-9]+$ ]] || { echo "Error: -q expects a positive integer (received '$min_mapq')." >&2; exit 1; }
[[ -e "$stacks_d" ]] || { ls -- "$stacks_d"; exit 1; }
[[ -e "$bam_f" ]] || { ls -- "$bam_f"; exit 1; }
@@ -95,7 +98,7 @@ echo
# ==========
echo "Extracting locus coordinates..."
-samtools view -F 0x904 "$bam_f" |
+samtools view -F 0x904 -q "$min_mapq" "$bam_f" |
# Convert SAM to `LOCUS_ID \t CHR \t POS \t STRAND`.
awk -F '\t' -v OFS='\t' '{
if (int( $2 % (2*16) / 16 )) {
=====================================
src/GappedAln.h
=====================================
@@ -273,6 +273,7 @@ GappedAln::align(const string& tag_1, const string& tag_2)
}
this->score(false, tag_1, 1, this->_m - 1, tag_2, 1, this->_n - 1);
+ // this->dump_alignment(tag_1, tag_2);
if (this->trace_global_alignment(tag_1, tag_2))
return 1;
@@ -505,6 +506,7 @@ GappedAln::score(bool local,
//
for (int i = q_start; i <= q_end; i++) {
for (int j = s_start; j <= s_end; j++) {
+
// Calculate the score:
// 1) If we were to move down from the above cell.
score_down = this->matrix[i - 1][j];
@@ -771,11 +773,10 @@ GappedAln::trace_global_alignment(const string& tag_1, const string& tag_2)
cigar += buf;
}
+ // cerr << "Cigar: " << cigar << "\n";
+
alns.push_back(AlignRes(move(cigar), gaps, contiguity, (ident / (double) len)));
- // cerr << aln_1 << " [" << cigar << ", contiguity: " << contiguity << ", gaps: " << gaps << "]\n"
- // << aln_2 << "\n";
-
} while (more_paths);
sort(alns.begin(), alns.end(), compare_alignres);
=====================================
src/PopSum.cc
=====================================
@@ -829,6 +829,10 @@ LocPopSum::haplotype_diversity(int start, int end, Datum const*const* d)
//
// Calculate haplotype diversity, Pi.
+ // Defined in Tajima, 1993, Measurement of DNA Polymorphism
+ // In: Mechanisms of Molecular Evolution: Introduction to Molecular Paleopopulation Biology
+ // Editors: Takahata and Clark
+ // Also defined in Arlequin Manual as Mean Number of Pairwise Distances (pi), section 8.1.2.1
//
for (uint i = 0; i < haplotypes.size(); i++) {
for (uint j = 0; j < haplotypes.size(); j++) {
=====================================
src/export_formats.cc
=====================================
@@ -3036,6 +3036,84 @@ TreemixExport::write_site(const CSLocus* cloc,
return 0;
}
+int
+GtfExport::open(const MetaPopInfo *mpopi)
+{
+ this->_path = out_path + out_prefix + ".gtf";
+ this->_mpopi = mpopi;
+
+ this->_fh.open(this->_path.c_str(), ofstream::out);
+ check_open(this->_fh, this->_path);
+
+ cout << "Loci positions in GTF format will be written to '" << this->_path << "'\n";
+
+ return 0;
+}
+
+int
+GtfExport::write_header()
+{
+ //
+ // Obtain the current date.
+ //
+ time_t rawtime;
+ struct tm *timeinfo;
+ char date[32], date2[32];
+ time(&rawtime);
+ timeinfo = localtime(&rawtime);
+ strftime(date, 32, "%B %d, %Y", timeinfo);
+
+ //
+ // Output the header of the file.
+ //
+ this->_fh << "# Stacks v" << VERSION << "; GTF export; " << date << "\n";
+
+ return 0;
+}
+
+int
+GtfExport::write_batch(const vector<LocBin *> &loci)
+{
+ CSLocus *cloc;
+ const LocStat *l;
+
+ const vector<Pop> &pops = this->_mpopi->pops();
+
+ //
+ // Output each locus.
+ //
+ for (uint i = 0; i < loci.size(); i++) {
+ //
+ // Sum the number of haplotypes that survived filtering.
+ //
+ size_t hap_cnt = 0;
+ for (uint pop = 0; pop < pops.size(); pop++) {
+ l = loci[i]->s->hapstats_per_pop(pop);
+
+ if (l == NULL)
+ continue;
+ hap_cnt += (int) l->alleles;
+ }
+
+ if (hap_cnt == 0)
+ continue;
+
+ cloc = loci[i]->cloc;
+
+ this->_fh << cloc->loc.chr() << "\tStacks\t"
+ << "locus\t";
+
+ if (cloc->loc.strand == strand_type::strand_plus)
+ this->_fh << cloc->loc.bp + 1 << "\t" << (cloc->loc.bp + cloc->len) << "\t.\t" << "+";
+ else
+ this->_fh << (cloc->loc.bp - cloc->len + 2) << "\t" << cloc->loc.bp + 1 << "\t.\t" << "-";
+
+ this->_fh << "\t.\t" << "locus_id \"" << cloc->id << "\"; sample_cnt \"" << hap_cnt / 2 << "\"\n";
+ }
+
+ return 0;
+}
+
int
JoinMapExport::open(const MetaPopInfo *mpopi)
{
=====================================
src/export_formats.h
=====================================
@@ -1,6 +1,6 @@
// -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
//
-// Copyright 2012-2019, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2012-2021, Julian Catchen <jcatchen at illinois.edu>
//
// This file is part of Stacks.
//
@@ -445,6 +445,20 @@ class TreemixExport: public OrderableExport {
int write_site(const CSLocus* cloc, const LocPopSum* psum, Datum const*const* datums, size_t col, size_t index);
};
+class GtfExport: public Export {
+ //
+ // Output the locus-level haplotype statistics.
+ //
+ const MetaPopInfo *_mpopi;
+
+ public:
+ GtfExport() : _mpopi(NULL) {}
+ ~GtfExport() {}
+ int open(const MetaPopInfo *mpopi);
+ int write_header();
+ int write_batch(const vector<LocBin *> &);
+};
+
class JoinMapExport: public Export {
const MetaPopInfo*_mpopi;
const MappingGenotypesProcessor *_gproc;
=====================================
src/populations.cc
=====================================
@@ -1,6 +1,6 @@
// -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
//
-// Copyright 2012-2020, Julian Catchen <jcatchen at illinois.edu>
+// Copyright 2012-2021, Julian Catchen <jcatchen at illinois.edu>
//
// This file is part of Stacks.
//
@@ -3634,6 +3634,7 @@ parse_command_line(int argc, char* argv[])
{"phylip-var-all", no_argument, NULL, 'T'},
{"hzar", no_argument, NULL, 'Z'},
{"treemix", no_argument, NULL, 'U'},
+ {"gtf", no_argument, NULL, 1018},
{"merge-sites", no_argument, NULL, 'D'},
{"sigma", required_argument, NULL, 1005},
{"threads", required_argument, NULL, 't'},
@@ -3925,6 +3926,9 @@ parse_command_line(int argc, char* argv[])
case 'U':
add_export<TreemixExport>();
break;
+ case 1018:
+ add_export<GtfExport>();
+ break;
case 'W':
wl_file = optarg;
break;
@@ -4198,6 +4202,7 @@ void help() {
<< " --phylip-var: include variable sites in the phylip output encoded using IUPAC notation.\n"
<< " --phylip-var-all: include all sequence as well as variable sites in the phylip output encoded using IUPAC notation.\n"
<< " --treemix: output SNPs in a format useable for the TreeMix program (Pickrell and Pritchard).\n"
+ << " --gtf: output locus positions in a GTF annotation file.\n"
<< " --no-hap-exports: omit haplotype outputs.\n"
<< " --fasta-samples-raw: output all haplotypes observed in each sample, for each locus, in FASTA format.\n"
<< "\n"
=====================================
src/renz.cc
=====================================
@@ -92,6 +92,8 @@ const char *ncoI[] = {"CATGG", // C/CATGG, NcoI
"CCATG"};
const char *ndeI[] = {"TATG", // CA/TATG, NdeI
"CATA"};
+const char *ngoMIV[] = {"CCGGC" // G/CCGGC, NgoMIV
+ "GCCGG"};
const char *nheI[] = {"CTAGC", // G/CTAGC, NheI
"GCTAG"};
const char *nlaIII[] = {"CATG", // CATG, NlaIII
@@ -190,6 +192,7 @@ initialize_renz(map<string, const char **> &renz, map<string, int> &renz_cnt, ma
renz["pacI"] = pacI; // AAT/TAATT, PacI
renz["pspXI"] = pspXI; // VC/TCGAGB, PspXI
renz["hpyCH4IV"] = hpyCH4IV; // A/CGT, HpyCH4IV
+ renz["ngoMIV"] = ngoMIV; // G/CCGGC, NgoMIV
renz_cnt["sbfI"] = 1;
renz_cnt["pstI"] = 1;
@@ -247,6 +250,7 @@ initialize_renz(map<string, const char **> &renz, map<string, int> &renz_cnt, ma
renz_cnt["pacI"] = 1;
renz_cnt["pspXI"] = 3;
renz_cnt["hpyCH4IV"] = 1;
+ renz_cnt["ngoMIV"] = 1;
renz_len["sbfI"] = 6;
renz_len["pstI"] = 5;
@@ -304,6 +308,7 @@ initialize_renz(map<string, const char **> &renz, map<string, int> &renz_cnt, ma
renz_len["pacI"] = 5;
renz_len["pspXI"] = 6;
renz_len["hpyCH4IV"] = 3;
+ renz_len["ngoMIV"] = 5;
}
void
=====================================
src/ustacks.cc
=====================================
@@ -312,6 +312,7 @@ merge_gapped_alns(map<int, Stack *> &unique, map<int, Rem *> &rem, map<int, Merg
cigar_2 = tag_2->alns[0].cigar;
if (cigar_1 == cigar_2) {
+ cerr << "CIGAR: " << cigar_1 << "\n";
parse_cigar(tag_1->alns[0].cigar.c_str(), cigar);
//
View it on GitLab: https://salsa.debian.org/med-team/stacks/-/commit/9b10a4d491c7305748c0e244bee791385cbfa87a
--
View it on GitLab: https://salsa.debian.org/med-team/stacks/-/commit/9b10a4d491c7305748c0e244bee791385cbfa87a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210110/82d4aa94/attachment-0001.html>
More information about the debian-med-commit
mailing list