[med-svn] [Git][med-team/python-treetime][upstream] New upstream version 0.8.6
Andreas Tille (@tille)
gitlab at salsa.debian.org
Sat Feb 19 07:43:10 GMT 2022
Andreas Tille pushed to branch upstream at Debian Med / python-treetime
Commits:
de590170 by Andreas Tille at 2022-02-19T08:37:39+01:00
New upstream version 0.8.6
- - - - -
3 changed files:
- changelog.md
- treetime/__init__.py
- treetime/treeanc.py
Changes:
=====================================
changelog.md
=====================================
@@ -1,3 +1,7 @@
+# 0.8.6
+ * optionally allow incomplete alignment [PR #178](https://github.com/neherlab/treetime/pull/178)
+ * reduce memory footprint through better clean up and optimizing types. [PR #179](https://github.com/neherlab/treetime/pull/179)
+
# 0.8.5
* bug fixes related to edge cases were sequences consist only of missing data
* bug fix when the CLI command `treetime` is run without alignment
=====================================
treetime/__init__.py
=====================================
@@ -1,5 +1,5 @@
from __future__ import print_function, division, absolute_import
-version="0.8.5"
+version="0.8.6"
class TreeTimeError(Exception):
"""TreeTimeError class"""
=====================================
treetime/treeanc.py
=====================================
@@ -56,7 +56,7 @@ class TreeAnc(object):
def __init__(self, tree=None, aln=None, gtr=None, fill_overhangs=True,
ref=None, verbose = ttconf.VERBOSE, ignore_gaps=True,
convert_upper=True, seq_multiplicity=None, log=None,
- compress=True, seq_len=None,
+ compress=True, seq_len=None, ignore_missing_alns=False,
**kwargs):
"""
TreeAnc constructor. It prepares the tree, attaches sequences to the leaf nodes,
@@ -78,22 +78,22 @@ class TreeAnc(object):
GTR model object. If string passed, it is interpreted as the type of
the GTR model. A new GTR instance will be created for this type.
- fill_overhangs : bool
+ fill_overhangs : bool, default True
In some cases, the missing data on both ends of the alignment is
filled with the gap sign('-'). If set to True, the end-gaps are converted to "unknown"
characters ('N' for nucleotides, 'X' for aminoacids). Otherwise, the alignment is treated as-is
ref : None, optional
- Reference sequence used in VCF mode
+ Reference sequence used in VCF mode
- verbose : int
+ verbose : int, default 3
Verbosity level as number from 0 (lowest) to 10 (highest).
- ignore_gaps : bool
+ ignore_gaps : bool, default True
Ignore gaps in branch length calculations
- convert_upper : bool, optional
- Description
+ convert_upper : bool, default True
+ Convert all sequences to upper case
seq_multiplicity : dict
If individual nodes in the tree correspond to multiple sampled sequences
@@ -101,7 +101,7 @@ class TreeAnc(object):
specified as a dictionary. This currently only affects rooting and
can be used to weigh individual tips by abundance or important during root search.
- compress : bool, optional
+ compress : bool, default True
reduce identical alignment columns to one (not useful when
inferring site specific GTR models).
@@ -109,6 +109,8 @@ class TreeAnc(object):
length of the sequence. this is inferred from the input alignment or the reference
sequence in most cases but can be specified for other applications.
+ ignore_missing_alns : bool, default False
+
**kwargs
Keyword arguments to construct the GTR model
@@ -139,6 +141,7 @@ class TreeAnc(object):
self.ignore_gaps = ignore_gaps
self.reconstructed_tip_sequences = False
self.sequence_reconstruction = None
+ self.ignore_missing_alns = ignore_missing_alns
self._tree = None
self.tree = tree
@@ -335,7 +338,7 @@ class TreeAnc(object):
Returns
-------
float
- inverse of the uncompressed sequene length - length scale for short branches
+ inverse of the uncompressed sequence length - length scale for short branches
"""
return 1.0/self.data.full_length if self.data.full_length else np.nan
@@ -376,7 +379,7 @@ class TreeAnc(object):
if l.name not in self.data.compressed_alignment and l.is_terminal():
self.logger("***WARNING: TreeAnc._attach_sequences_to_nodes: NO SEQUENCE FOR LEAF: %s" % l.name, 0, warn=True)
failed_leaves += 1
- if failed_leaves > self.tree.count_terminals()/3:
+ if not self.ignore_missing_alns and failed_leaves > self.tree.count_terminals()/3:
raise MissingDataError("TreeAnc._check_alignment_tree_gtr_consistency: At least 30\\% terminal nodes cannot be assigned a sequence!\n"
"Are you sure the alignment belongs to the tree?")
else: # could not assign sequence for internal node - is OK
@@ -906,12 +909,19 @@ class TreeAnc(object):
# this is prod_ch L_x(i)
msg_from_children = np.sum(np.stack([c.joint_Lx for c in node.clades], axis=0), axis=0)
+ if not debug:
+ # Now that we have calculated the current node's likelihood
+ # from its children, clean up likelihood matrices attached
+ # to children to save memory.
+ for c in node.clades:
+ del c.joint_Lx
+
# for every possible state of the parent node,
# get the best state of the current node
# and compute the likelihood of this state
# preallocate storage
- node.joint_Lx = np.zeros((L, n_states)) # likelihood array
- node.joint_Cx = np.zeros((L, n_states), dtype=int) # max LH indices
+ node.joint_Lx = np.zeros((L, n_states)) # likelihood array
+ node.joint_Cx = np.zeros((L, n_states), dtype=np.uint16) # max LH indices
for char_i, char in enumerate(self.gtr.alphabet):
# Pij(i) * L_ch(i) for given parent state j
msg_to_parent = (log_transitions[:,char_i].T + msg_from_children)
@@ -973,7 +983,10 @@ class TreeAnc(object):
# do clean-up
if not debug:
for node in self.tree.find_clades(order='preorder'):
- del node.joint_Lx
+ # Check for the likelihood matrix, since we might have cleaned
+ # it up earlier.
+ if hasattr(node, "joint_Lx"):
+ del node.joint_Lx
del node.joint_Cx
if hasattr(node, 'seq_idx'):
del node.seq_idx
View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7
--
View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220219/8763ad96/attachment-0001.htm>
More information about the debian-med-commit
mailing list