[med-svn] [Git][med-team/python-treetime][upstream] New upstream version 0.8.6

Andreas Tille (@tille) gitlab at salsa.debian.org
Sat Feb 19 07:43:10 GMT 2022



Andreas Tille pushed to branch upstream at Debian Med / python-treetime


Commits:
de590170 by Andreas Tille at 2022-02-19T08:37:39+01:00
New upstream version 0.8.6
- - - - -


3 changed files:

- changelog.md
- treetime/__init__.py
- treetime/treeanc.py


Changes:

=====================================
changelog.md
=====================================
@@ -1,3 +1,7 @@
+# 0.8.6
+ * optionally allow incomplete alignment [PR #178](https://github.com/neherlab/treetime/pull/178)
+ * reduce memory footprint through better clean up and optimizing types. [PR #179](https://github.com/neherlab/treetime/pull/179)
+
 # 0.8.5
  * bug fixes related to edge cases were sequences consist only of missing data
  * bug fix when the CLI command `treetime` is run without alignment


=====================================
treetime/__init__.py
=====================================
@@ -1,5 +1,5 @@
 from __future__ import print_function, division, absolute_import
-version="0.8.5"
+version="0.8.6"
 
 class TreeTimeError(Exception):
     """TreeTimeError class"""


=====================================
treetime/treeanc.py
=====================================
@@ -56,7 +56,7 @@ class TreeAnc(object):
     def __init__(self, tree=None, aln=None, gtr=None, fill_overhangs=True,
                 ref=None, verbose = ttconf.VERBOSE, ignore_gaps=True,
                 convert_upper=True, seq_multiplicity=None, log=None,
-                 compress=True, seq_len=None,
+                 compress=True, seq_len=None, ignore_missing_alns=False,
                 **kwargs):
         """
         TreeAnc constructor. It prepares the tree, attaches sequences to the leaf nodes,
@@ -78,22 +78,22 @@ class TreeAnc(object):
            GTR model object. If string passed, it is interpreted as the type of
            the GTR model. A new GTR instance will be created for this type.
 
-        fill_overhangs : bool
+        fill_overhangs : bool, default True
            In some cases, the missing data on both ends of the alignment is
            filled with the gap sign('-'). If set to True, the end-gaps are converted to "unknown"
            characters ('N' for nucleotides, 'X' for aminoacids). Otherwise, the alignment is treated as-is
 
         ref : None, optional
-            Reference sequence used in VCF mode
+           Reference sequence used in VCF mode
 
-        verbose : int
+        verbose : int, default 3
            Verbosity level as number from 0 (lowest) to 10 (highest).
 
-        ignore_gaps : bool
+        ignore_gaps : bool, default True
            Ignore gaps in branch length calculations
 
-        convert_upper : bool, optional
-            Description
+        convert_upper : bool, default True
+           Convert all sequences to upper case
 
         seq_multiplicity : dict
            If individual nodes in the tree correspond to multiple sampled sequences
@@ -101,7 +101,7 @@ class TreeAnc(object):
            specified as a dictionary. This currently only affects rooting and
            can be used to weigh individual tips by abundance or important during root search.
 
-        compress : bool, optional
+        compress : bool, default True
             reduce identical alignment columns to one (not useful when
             inferring site specific GTR models).
 
@@ -109,6 +109,8 @@ class TreeAnc(object):
             length of the sequence. this is inferred from the input alignment or the reference
             sequence in most cases but can be specified for other applications.
 
+        ignore_missing_alns : bool, default False
+
         **kwargs
            Keyword arguments to construct the GTR model
 
@@ -139,6 +141,7 @@ class TreeAnc(object):
         self.ignore_gaps = ignore_gaps
         self.reconstructed_tip_sequences = False
         self.sequence_reconstruction = None
+        self.ignore_missing_alns = ignore_missing_alns
 
         self._tree = None
         self.tree = tree
@@ -335,7 +338,7 @@ class TreeAnc(object):
         Returns
         -------
         float
-            inverse of the uncompressed sequene length - length scale for short branches
+            inverse of the uncompressed sequence length - length scale for short branches
         """
         return 1.0/self.data.full_length if self.data.full_length else np.nan
 
@@ -376,7 +379,7 @@ class TreeAnc(object):
             if l.name not in self.data.compressed_alignment and l.is_terminal():
                 self.logger("***WARNING: TreeAnc._attach_sequences_to_nodes: NO SEQUENCE FOR LEAF: %s" % l.name, 0, warn=True)
                 failed_leaves += 1
-                if failed_leaves > self.tree.count_terminals()/3:
+                if not self.ignore_missing_alns and failed_leaves > self.tree.count_terminals()/3:
                     raise MissingDataError("TreeAnc._check_alignment_tree_gtr_consistency: At least 30\\% terminal nodes cannot be assigned a sequence!\n"
                                            "Are you sure the alignment belongs to the tree?")
             else: # could not assign sequence for internal node - is OK
@@ -906,12 +909,19 @@ class TreeAnc(object):
                 # this is prod_ch L_x(i)
                 msg_from_children = np.sum(np.stack([c.joint_Lx for c in node.clades], axis=0), axis=0)
 
+                if not debug:
+                    # Now that we have calculated the current node's likelihood
+                    # from its children, clean up likelihood matrices attached
+                    # to children to save memory.
+                    for c in node.clades:
+                        del c.joint_Lx
+
             # for every possible state of the parent node,
             # get the best state of the current node
             # and compute the likelihood of this state
             # preallocate storage
-            node.joint_Lx = np.zeros((L, n_states))             # likelihood array
-            node.joint_Cx = np.zeros((L, n_states), dtype=int)  # max LH indices
+            node.joint_Lx = np.zeros((L, n_states)) # likelihood array
+            node.joint_Cx = np.zeros((L, n_states), dtype=np.uint16)  # max LH indices
             for char_i, char in enumerate(self.gtr.alphabet):
                 # Pij(i) * L_ch(i) for given parent state j
                 msg_to_parent = (log_transitions[:,char_i].T + msg_from_children)
@@ -973,7 +983,10 @@ class TreeAnc(object):
         # do clean-up
         if not debug:
             for node in self.tree.find_clades(order='preorder'):
-                del node.joint_Lx
+                # Check for the likelihood matrix, since we might have cleaned
+                # it up earlier.
+                if hasattr(node, "joint_Lx"):
+                    del node.joint_Lx
                 del node.joint_Cx
                 if hasattr(node, 'seq_idx'):
                     del node.seq_idx



View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220219/8763ad96/attachment-0001.htm>


More information about the debian-med-commit mailing list