[med-svn] [Git][med-team/augur][master] 5 commits: routine-update: New upstream version
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sun Mar 10 09:01:07 GMT 2024
Étienne Mollier pushed to branch master at Debian Med / augur
Commits:
4e57a1da by Étienne Mollier at 2024-03-10T09:34:59+01:00
routine-update: New upstream version
- - - - -
15e45c24 by Étienne Mollier at 2024-03-10T09:35:00+01:00
New upstream version 24.2.3
- - - - -
37bf65b2 by Étienne Mollier at 2024-03-10T09:35:17+01:00
Update upstream source from tag 'upstream/24.2.3'
Update to upstream version '24.2.3'
with Debian dir 32c116dcd9d56c8971fd32181617c935007a758a
- - - - -
cbe6598e by Étienne Mollier at 2024-03-10T09:59:48+01:00
pandas2.0.patch: document bug forwarding upstream.
- - - - -
44cb3b8f by Étienne Mollier at 2024-03-10T10:00:29+01:00
ready to upload to unstable.
- - - - -
8 changed files:
- CHANGES.md
- augur/__version__.py
- augur/filter/__init__.py
- augur/filter/_run.py
- augur/frequencies.py
- debian/changelog
- debian/patches/pandas2.0.patch
- + tests/functional/frequencies/cram/diffusion-region.t
Changes:
=====================================
CHANGES.md
=====================================
@@ -3,6 +3,16 @@
## __NEXT__
+## 24.2.3 (23 February 2024)
+
+### Bug Fixes
+
+* filter: Updated the help and report text of `--min-length` to explicitly state that the minimum length filter only counts standard nucleotide characters A, C, G, or T (case-insensitive). This has been the behavior since version 3.0.3.dev1, but has never been explicitly documented. [#1422][] (@joverlee521)
+* frequencies: Fixed a bug introduced in 24.2.0 and 24.1.0 that prevented `--regions` from working when providing regions other than the default "global" region. [#1424]
+
+[#1422]: https://github.com/nextstrain/augur/pull/1422
+[#1424]: https://github.com/nextstrain/augur/pull/1424
+
## 24.2.2 (16 February 2024)
### Bug Fixes
=====================================
augur/__version__.py
=====================================
@@ -1,4 +1,4 @@
-__version__ = '24.2.2'
+__version__ = '24.2.3'
def is_augur_version_compatible(version):
=====================================
augur/filter/__init__.py
=====================================
@@ -51,7 +51,7 @@ def register_arguments(parser):
of an entry in --sequences.""")
sequence_filter_group = parser.add_argument_group("sequence filters", "filters to apply to sequence data")
- sequence_filter_group.add_argument('--min-length', type=int, help="minimal length of the sequences")
+ sequence_filter_group.add_argument('--min-length', type=int, help="minimal length of the sequences, only counting standard nucleotide characters A, C, G, or T (case-insensitive)")
sequence_filter_group.add_argument('--non-nucleotide', action='store_true', help="exclude sequences that contain illegal characters")
subsample_group = parser.add_argument_group("subsampling", "options to subsample filtered data")
=====================================
augur/filter/_run.py
=====================================
@@ -428,7 +428,7 @@ def run(args):
include_exclude_rules.filter_by_ambiguous_date.__name__: "{count} {were} dropped because of their ambiguous date in {ambiguity}",
include_exclude_rules.filter_by_min_date.__name__: "{count} {were} dropped because {they} {were} earlier than {min_date} or missing a date",
include_exclude_rules.filter_by_max_date.__name__: "{count} {were} dropped because {they} {were} later than {max_date} or missing a date",
- include_exclude_rules.filter_by_sequence_length.__name__: "{count} {were} dropped because {they} {were} shorter than minimum length of {min_length}bp",
+ include_exclude_rules.filter_by_sequence_length.__name__: "{count} {were} dropped because {they} {were} shorter than minimum length of {min_length}bp when only counting standard nucleotide characters A, C, G, or T (case-insensitive)",
include_exclude_rules.filter_by_non_nucleotide.__name__: "{count} {were} dropped because {they} had non-nucleotide characters",
include_exclude_rules.skip_group_by_with_ambiguous_year.__name__: "{count} {were} dropped during grouping due to ambiguous year information",
include_exclude_rules.skip_group_by_with_ambiguous_month.__name__: "{count} {were} dropped during grouping due to ambiguous month information",
=====================================
augur/frequencies.py
=====================================
@@ -14,6 +14,8 @@ from .io.file import open_file
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata
from .utils import write_json
+REGION_COLUMN = 'region'
+DEFAULT_REGION = 'global'
def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("frequencies", help=__doc__)
@@ -26,8 +28,10 @@ def register_parser(parent_subparsers):
help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
parser.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+",
help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
- parser.add_argument('--regions', type=str, nargs='+', default=['global'],
- help="region to subsample to")
+ parser.add_argument('--regions', type=str, nargs='+', default=[DEFAULT_REGION],
+ help="region to filter to. " \
+ f"Regions should match values in the {REGION_COLUMN!r} column of the metadata file " \
+ f"if specifying values other than the default {DEFAULT_REGION!r} region.")
parser.add_argument("--pivot-interval", type=int, default=3,
help="number of units between pivots")
parser.add_argument("--pivot-interval-units", type=str, default="months", choices=['months', 'weeks'],
@@ -97,6 +101,11 @@ def run(args):
columns_to_load = [metadata_object.id_column, METADATA_DATE_COLUMN]
if args.weights_attribute:
columns_to_load.append(args.weights_attribute)
+
+ filter_to_region = any(region != DEFAULT_REGION for region in args.regions)
+ if filter_to_region:
+ columns_to_load.append(REGION_COLUMN)
+
metadata = read_metadata(
args.metadata,
delimiters=[metadata_object.delimiter],
@@ -130,6 +139,9 @@ def run(args):
# Annotate tip with weight attribute.
tip.attr[weights_attribute] = metadata.loc[tip.name, weights_attribute]
+ if filter_to_region:
+ tip.attr[REGION_COLUMN] = metadata.loc[tip.name, REGION_COLUMN]
+
if args.method == "diffusion":
# estimate tree frequencies
pivots = get_pivots(tps, args.pivot_interval, args.min_date, args.max_date, args.pivot_interval_units)
@@ -139,10 +151,10 @@ def run(args):
for region in args.regions:
# Omit strains sampled prior to the first pivot from frequency calculations.
# (these tend to be reference strains included for phylogenetic context)
- if region=='global':
+ if region==DEFAULT_REGION:
node_filter_func = lambda node: node.attr["num_date"] >= pivots[0]
else:
- node_filter_func = lambda node: (node.attr["region"] == region
+ node_filter_func = lambda node: (node.attr[REGION_COLUMN] == region
and node.attr["num_date"] >= pivots[0])
tree_freqs = tree_frequencies(tree, pivots, method='SLSQP',
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+augur (24.2.3-1) unstable; urgency=medium
+
+ * New upstream version 24.2.3
+ * pandas2.0.patch: document bug forwarding upstream.
+
+ -- Étienne Mollier <emollier at debian.org> Sun, 10 Mar 2024 10:00:17 +0100
+
augur (24.2.2-1) unstable; urgency=medium
* New upstream version 24.2.2
=====================================
debian/patches/pandas2.0.patch
=====================================
@@ -1,6 +1,8 @@
Description: Replace parse_time_string() by parse_datetime_string_with_reso()
to gain pandas 2.0 compatitibility (Thanks for the hint to s3v <c0llapsed at yahoo.it>)
+Bug: https://github.com/nextstrain/augur/issues/1303
Bug-Debian: https://bugs.debian.org/1044079
+Forwarded: https://github.com/nextstrain/augur/pull/1436
Author: Andreas Tille <tille at debian.org>
Last-Update: Thu, 15 Feb 2024 19:04:46 +0100
=====================================
tests/functional/frequencies/cram/diffusion-region.t
=====================================
@@ -0,0 +1,247 @@
+Setup
+
+ $ source "$TESTDIR"/_setup.sh
+
+Calculate diffusion-based tip frequencies from a refined tree with `--regions`.
+
+ $ ${AUGUR} frequencies \
+ > --method diffusion \
+ > --tree "$TESTDIR/../data/tree.nwk" \
+ > --metadata "$TESTDIR/../data/metadata.tsv" \
+ > --regions "global" "North America" "South America" \
+ > --pivot-interval 3 \
+ > --output tip-frequencies.json > /dev/null
+
+ $ cat tip-frequencies.json
+ {
+ "BRA/2016/FC_6706": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.2,
+ 0.2,
+ 0.2,
+ 0.2
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "COL/FLR_00008/2015": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.2,
+ 0.2,
+ 0.2,
+ 0.2
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "Colombia/2016/ZC204Se": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.2,
+ 0.2,
+ 0.2,
+ 0.2
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "DOM/2016/BB_0183": {
+ "North America": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25
+ ],
+ "South America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "EcEs062_16": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.2,
+ 0.2,
+ 0.2,
+ 0.2
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "HND/2016/HU_ME59": {
+ "North America": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25
+ ],
+ "South America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "PAN/CDC_259359_V1_V3/2015": {
+ "North America": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25
+ ],
+ "South America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "PRVABC59": {
+ "North America": [
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25
+ ],
+ "South America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "VEN/UF_1/2016": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.2,
+ 0.2,
+ 0.2,
+ 0.2
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "ZKC2/2016": {
+ "North America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "South America": [
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "global": [
+ 0.1,
+ 0.1,
+ 0.1,
+ 0.1
+ ]
+ },
+ "counts": {
+ "North America": [
+ 0,
+ 2,
+ 2,
+ 0
+ ],
+ "South America": [
+ 0,
+ 2,
+ 3,
+ 0
+ ],
+ "global": [
+ 0,
+ 5,
+ 5,
+ 0
+ ]
+ },
+ "generated_by": {
+ "program": "augur",
+ "version": ".*" (re)
+ },
+ "pivots": [
+ 2015.7521,
+ 2016.0041,
+ 2016.2527,
+ 2016.5014
+ ]
+ } (no-eol)
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/a2d981c0f471448531c7ba6d60b7fe357c887afb...44cb3b8f4aaceb589b9d7ccb92f571d3a1c27e55
--
View it on GitLab: https://salsa.debian.org/med-team/augur/-/compare/a2d981c0f471448531c7ba6d60b7fe357c887afb...44cb3b8f4aaceb589b9d7ccb92f571d3a1c27e55
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240310/e4994d71/attachment-0001.htm>
More information about the debian-med-commit
mailing list