[med-svn] [Git][med-team/python-cobra][upstream] New upstream version 0.21.0

Andreas Tille gitlab at salsa.debian.org
Sun Feb 7 08:05:16 GMT 2021



Andreas Tille pushed to branch upstream at Debian Med / python-cobra


Commits:
6a842b8f by Andreas Tille at 2021-02-07T08:49:14+01:00
New upstream version 0.21.0
- - - - -


8 changed files:

- INSTALL.rst
- + release-notes/0.21.0.md
- release-notes/next-release.md
- setup.cfg
- setup.py
- src/cobra/__init__.py
- src/cobra/exceptions.py
- src/cobra/sampling/hr_sampler.py


Changes:

=====================================
INSTALL.rst
=====================================
@@ -38,7 +38,7 @@ available from the ``conda-forge`` channel.
 Installation for development
 ============================
 
-Get the `detailed contribution instructions <CONTRIBUTING.rst>`_ for
+Get the `detailed contribution instructions <.github/CONTRIBUTING.rst>`_ for
 contributing to COBRApy.
 
 Solvers


=====================================
release-notes/0.21.0.md
=====================================
@@ -0,0 +1,7 @@
+# Release notes for cobrapy 0.21.0
+
+## Other
+
+* Update libSBML dependency to `python-libsbml==5.19.0`.
+* Docstring and style changes.
+


=====================================
release-notes/next-release.md
=====================================
@@ -4,6 +4,8 @@
 
 ## Fixes
 
+## Other
+
 ## Deprecated features
 
 ## Backwards incompatible changes


=====================================
setup.cfg
=====================================
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.20.0
+current_version = 0.21.0
 commit = True
 tag = True
 parse = (?P<major>\d+)
@@ -62,7 +62,7 @@ install_requires =
 	optlang <1.4.6
 	pandas ~=1.0
 	pydantic ~=1.6
-	python-libsbml-experimental ==5.18.3
+	python-libsbml ==5.19.0
 	rich ~=6.0
 	ruamel.yaml ~=0.16
 	six
@@ -107,4 +107,3 @@ replace = version="{new_version}"
 [bumpversion:file:src/cobra/__init__.py]
 search = __version__ = "{current_version}"
 replace = __version__ = "{new_version}"
-


=====================================
setup.py
=====================================
@@ -19,4 +19,4 @@ if version_info[:2] < (3, 6):
 
 # All other arguments are defined in `setup.cfg`.
 if __name__ == "__main__":
-    setup(version="0.20.0")
+    setup(version="0.21.0")


=====================================
src/cobra/__init__.py
=====================================
@@ -1,5 +1,5 @@
 __author__ = "The cobrapy core development team."
-__version__ = "0.20.0"
+__version__ = "0.21.0"
 
 
 from cobra.core import (


=====================================
src/cobra/exceptions.py
=====================================
@@ -1,28 +1,36 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
+"""Module for shared exceptions in the Cobra package."""
 import optlang.interface
 
 
 class OptimizationError(Exception):
+    """Exception for Optimization issues."""
+
     def __init__(self, message):
+        """Inherit parent behaviors."""
         super(OptimizationError, self).__init__(message)
 
 
 class Infeasible(OptimizationError):
+    """Exception for Infeasible issues."""
+
     pass
 
 
 class Unbounded(OptimizationError):
+    """Exception for Unbounded issues."""
+
     pass
 
 
 class FeasibleButNotOptimal(OptimizationError):
+    """Exception for Non-Optimal issues."""
+
     pass
 
 
 class UndefinedSolution(OptimizationError):
+    """Exception for Undefined issues."""
+
     pass
 
 


=====================================
src/cobra/sampling/hr_sampler.py
=====================================
@@ -1,46 +1,47 @@
-# -*- coding: utf-8 -*-
-
-"""Provide base class for Hit-and-Run samplers.
-
-New samplers should derive from the abstract `HRSampler` class
-where possible to provide a uniform interface."""
-
-from __future__ import absolute_import, division
+"""Provide the base class and associated functions for Hit-and-Run samplers."""
 
 import ctypes
-from collections import namedtuple
-from logging import getLogger
+import logging
+from abc import ABC, abstractmethod
 from multiprocessing import Array
 from time import time
+from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple
 
 import numpy as np
+import pandas as pd
 from optlang.interface import OPTIMAL
 from optlang.symbolics import Zero
 
 from cobra.util import constraint_matrices, create_stoichiometric_matrix, nullspace
 
 
-LOGGER = getLogger(__name__)
+if TYPE_CHECKING:
+    from cobra import Model
+
+
+logger = logging.getLogger(__name__)
 
 
 # Maximum number of retries for sampling
 MAX_TRIES = 100
 
 
-Problem = namedtuple(
+Problem = NamedTuple(
     "Problem",
     [
-        "equalities",
-        "b",
-        "inequalities",
-        "bounds",
-        "variable_fixed",
-        "variable_bounds",
-        "nullspace",
-        "homogeneous",
+        ("equalities", np.ndarray),
+        ("b", np.ndarray),
+        ("inequalities", np.ndarray),
+        ("bounds", np.ndarray),
+        ("variable_fixed", np.ndarray),
+        ("variable_bounds", np.ndarray),
+        ("nullspace", np.matrix),
+        ("homogeneous", bool),
     ],
 )
-"""Defines the matrix representation of a sampling problem.
+"""Define the matrix representation of a sampling problem.
+
+A named tuple consisting of 6 arrays, 1 matrix and 1 boolean.
 
 Attributes
 ----------
@@ -52,33 +53,49 @@ inequalities : numpy.array
     All inequality constraints in the model.
 bounds : numpy.array
     The lower and upper bounds for the inequality constraints.
+variable_fixed : numpy.array
+    A boolean vector indicating whether the variable at that index is
+    fixed i.e., whether `variable.lower_bound == variable.upper_bound`.
 variable_bounds : numpy.array
     The lower and upper bounds for the variables.
-homogeneous: boolean
-    Indicates whether the sampling problem is homogenous, e.g. whether there
-    exist no non-zero fixed variables or constraints.
 nullspace : numpy.matrix
-    A matrix containing the nullspace of the equality constraints. Each column
-    is one basis vector.
+    A matrix containing the nullspace of the equality constraints.
+    Each column is one basis vector.
+homogeneous: bool
+    Indicates whether the sampling problem is homogeneous, e.g. whether
+    there exist no non-zero fixed variables or constraints.
 
 """
 
 
-def shared_np_array(shape, data=None, integer=False):
+def shared_np_array(
+    shape: Tuple[int, int], data: Optional[np.ndarray] = None, integer: bool = False
+) -> np.ndarray:
     """Create a new numpy array that resides in shared memory.
 
     Parameters
     ----------
-    shape : tuple of ints
+    shape : tuple of int
         The shape of the new array.
-    data : numpy.array
-        Data to copy to the new array. Has to have the same shape.
-    integer : boolean
-        Whether to use an integer array. Defaults to False which means
-        float array.
+    data : numpy.array, optional
+        Data to copy to the new array. Has to have the same shape
+        (default None).
+    integer : bool, optional
+        Whether to use an integer array. By default, float array is used
+        (default False).
+
+    Returns
+    -------
+    numpy.array
+        The newly created shared numpy array.
+
+    Raises
+    ------
+    ValueError
+        If the input `data` (if provided) size is not equal to the created
+        array.
 
     """
-
     size = np.prod(shape)
 
     if integer:
@@ -92,81 +109,95 @@ def shared_np_array(shape, data=None, integer=False):
 
     if data is not None:
         if len(shape) != len(data.shape):
-            raise ValueError(
-                "`data` must have the same dimensions" "as the created array."
-            )
+            raise ValueError("`data` must have the same shape as the created array.")
+
         same = all(x == y for x, y in zip(shape, data.shape))
 
         if not same:
-            raise ValueError("`data` must have the same shape" "as the created array.")
+            raise ValueError("`data` must have the same shape as the created array.")
+
         np_array[:] = data
 
     return np_array
 
 
-class HRSampler(object):
-    """The abstract base class for hit-and-run samplers.
+class HRSampler(ABC):
+    """
+    The abstract base class for hit-and-run samplers.
+
+    New samplers should derive from this class where possible to provide
+    a uniform interface.
 
     Parameters
     ----------
     model : cobra.Model
         The cobra model from which to generate samples.
     thinning : int
-        The thinning factor of the generated sampling chain. A thinning of 10
-        means samples are returned every 10 steps.
+        The thinning factor of the generated sampling chain. A thinning of
+        10 means samples are returned every 10 steps.
     nproj : int > 0, optional
-        How often to reproject the sampling point into the feasibility space.
-        Avoids numerical issues at the cost of lower sampling. If you observe
-        many equality constraint violations with `sampler.validate` you should
-        lower this number.
+        How often to reproject the sampling point into the feasibility
+        space. Avoids numerical issues at the cost of lower sampling. If
+        you observe many equality constraint violations with
+        `sampler.validate` you should lower this number (default None).
     seed : int > 0, optional
-        The random number seed that should be used.
+        Sets the random number seed. Initialized to the current time stamp
+        if None (default None).
 
     Attributes
     ----------
-    model : cobra.Model
-        The cobra model from which the sampes get generated.
     feasibility_tol: float
         The tolerance used for checking equalities feasibility.
     bounds_tol: float
         The tolerance used for checking bounds feasibility.
-    thinning : int
-        The currently used thinning factor.
     n_samples : int
         The total number of samples that have been generated by this
         sampler instance.
     retries : int
         The overall of sampling retries the sampler has observed. Larger
         values indicate numerical instabilities.
-    problem : collections.namedtuple
-        A python object whose attributes define the entire sampling problem in
-        matrix form. See docstring of `Problem`.
+    problem : Problem
+        A NamedTuple whose attributes define the entire sampling problem in
+        matrix form.
     warmup : numpy.matrix
-        A matrix of with as many columns as reactions in the model and more
-        than 3 rows containing a warmup sample in each row. None if no warmup
-        points have been generated yet.
-    nproj : int
-        How often to reproject the sampling point into the feasibility space.
-    seed : int > 0, optional
-        Sets the random number seed. Initialized to the current time stamp if
-        None.
+        A numpy matrix with as many columns as reactions in the model and
+        more than 3 rows containing a warmup sample in each row. None if no
+        warmup points have been generated yet.
     fwd_idx : numpy.array
-        Has one entry for each reaction in the model containing the index of
-        the respective forward variable.
+        A numpy array having one entry for each reaction in the model,
+        containing the index of the respective forward variable.
     rev_idx : numpy.array
-        Has one entry for each reaction in the model containing the index of
-        the respective reverse variable.
+        A numpy array having one entry for each reaction in the model,
+        containing the index of the respective reverse variable.
 
     """
 
-    def __init__(self, model, thinning, nproj=None, seed=None):
-        """Initialize a new sampler object."""
+    def __init__(
+        self,
+        model: "Model",
+        thinning: int,
+        nproj: Optional[int] = None,
+        seed: Optional[int] = None,
+        **kwargs,
+    ) -> None:
+        """Initialize a new sampler object.
+
+        Other Parameters
+        ----------------
+        kwargs :
+            Further keyword arguments are passed on to the parent class.
+
+        Raises
+        ------
+        TypeError
+            If integer problem is found.
 
+        """
         # This currently has to be done to reset the solver basis which is
         # required to get deterministic warmup point generation
-        # (in turn required for a working `seed` arg)
+        # (in turn required for a working `seed`)
         if model.solver.is_integer:
-            raise TypeError("sampling does not work with integer problems :(")
+            raise TypeError("Sampling does not work with integer problems.")
 
         self.model = model.copy()
         self.feasibility_tol = model.tolerance
@@ -201,9 +232,15 @@ class HRSampler(object):
         # Avoid overflow
         self._seed = self._seed % np.iinfo(np.int32).max
 
-    def __build_problem(self):
-        """Build the matrix representation of the sampling problem."""
+    def __build_problem(self) -> Problem:
+        """Build the matrix representation of the sampling problem.
 
+        Returns
+        -------
+        Problem
+            The matrix representation in the form of a NamedTuple.
+
+        """
         # Set up the mathematical problem
         prob = constraint_matrices(self.model, zero_tol=self.feasibility_tol)
 
@@ -244,7 +281,7 @@ class HRSampler(object):
             homogeneous=homogeneous,
         )
 
-    def generate_fva_warmup(self):
+    def generate_fva_warmup(self) -> None:
         """Generate the warmup points for the sampler.
 
         Generates warmup points by setting each flux as the sole objective
@@ -252,8 +289,13 @@ class HRSampler(object):
         warmup points into the nullspace for non-homogeneous problems (only
         if necessary).
 
-        """
+        Raises
+        ------
+        ValueError
+            If flux cone contains a single point or the problem is
+            inhomogeneous.
 
+        """
         self.n_warmup = 0
         reactions = self.model.reactions
         self.warmup = np.zeros((2 * len(reactions), len(self.model.variables)))
@@ -270,7 +312,7 @@ class HRSampler(object):
 
                 # Omit fixed reactions if they are non-homogeneous
                 if r.upper_bound - r.lower_bound < self.bounds_tol:
-                    LOGGER.info("skipping fixed reaction %s" % r.id)
+                    logger.info(f"Skipping fixed reaction {r.id}")
                     continue
 
                 self.model.objective.set_linear_coefficients(
@@ -280,7 +322,7 @@ class HRSampler(object):
                 self.model.slim_optimize()
 
                 if not self.model.solver.status == OPTIMAL:
-                    LOGGER.info("can not maximize reaction %s, skipping it" % r.id)
+                    logger.info(f"Cannot maximize reaction {r.id}, skipping it.")
                     continue
 
                 primals = self.model.solver.primal_values
@@ -305,14 +347,14 @@ class HRSampler(object):
 
         # Catch some special cases
         if len(self.warmup.shape) == 1 or self.warmup.shape[0] == 1:
-            raise ValueError("Your flux cone consists only of a single point!")
+            raise ValueError("Flux cone only consists a single point.")
         elif self.n_warmup == 2:
             if not self.problem.homogeneous:
                 raise ValueError(
-                    "Can not sample from an inhomogenous problem"
-                    " with only 2 search directions :("
+                    "Cannot sample from an inhomogenous problem "
+                    "with only 2 search directions."
                 )
-            LOGGER.info("All search directions on a line, adding another one.")
+            logger.info("All search directions on a line, adding another one.")
             newdir = self.warmup.T.dot([0.25, 0.25])
             self.warmup = np.vstack([self.warmup, newdir])
             self.n_warmup += 1
@@ -322,11 +364,12 @@ class HRSampler(object):
             (self.n_warmup, len(self.model.variables)), self.warmup
         )
 
-    def _reproject(self, p):
+    def _reproject(self, p: np.ndarray) -> np.ndarray:
         """Reproject a point into the feasibility region.
 
         This function is guaranteed to return a new feasible point. However,
-        no guarantees in terms of proximity to the original point can be made.
+        no guarantee can be made in terms of proximity to the original
+        point.
 
         Parameters
         ----------
@@ -336,10 +379,9 @@ class HRSampler(object):
         Returns
         -------
         numpy.array
-            A new feasible point. If `p` was feasible it wil return p.
+            A new feasible point. If `p` is feasible, it will return `p`.
 
         """
-
         nulls = self.problem.nullspace
         equalities = self.problem.equalities
 
@@ -349,39 +391,34 @@ class HRSampler(object):
         ):
             new = p
         else:
-            LOGGER.info(
-                "feasibility violated in sample"
-                " %d, trying to reproject" % self.n_samples
+            logger.info(
+                f"Feasibility violated in sample {self.n_samples}, trying to reproject."
             )
             new = nulls.dot(nulls.T.dot(p))
 
         # Projections may violate bounds
         # set to random point in space in that case
         if any(new != p):
-            LOGGER.info(
-                "reprojection failed in sample"
-                " %d, using random point in space" % self.n_samples
+            logger.info(
+                f"Re-projection failed in sample {self.n_samples}, "
+                "using random point in space."
             )
             new = self._random_point()
 
         return new
 
-    def _random_point(self):
+    def _random_point(self) -> np.ndarray:
         """Find an approximately random point in the flux cone."""
-
         idx = np.random.randint(
             self.n_warmup, size=min(2, np.ceil(np.sqrt(self.n_warmup)))
         )
         return self.warmup[idx, :].mean(axis=0)
 
-    def _is_redundant(self, matrix, cutoff=None):
-        """Identify rdeundant rows in a matrix that can be removed."""
-
+    def _is_redundant(self, matrix: np.matrix, cutoff: Optional[float] = None) -> bool:
+        """Identify redundant rows in a matrix that can be removed."""
         cutoff = 1.0 - self.feasibility_tol
-
         # Avoid zero variances
         extra_col = matrix[:, 0] + 1
-
         # Avoid zero rows being correlated with constant rows
         extra_col[matrix.sum(axis=1) == 0] = 2
         corr = np.corrcoef(np.c_[matrix, extra_col])
@@ -389,9 +426,8 @@ class HRSampler(object):
 
         return (np.abs(corr) > cutoff).any(axis=1)
 
-    def _bounds_dist(self, p):
+    def _bounds_dist(self, p: np.ndarray) -> np.ndarray:
         """Get the lower and upper bound distances. Negative is bad."""
-
         prob = self.problem
         lb_dist = (
             p
@@ -425,44 +461,65 @@ class HRSampler(object):
 
         return np.array([lb_dist, ub_dist])
 
-    def sample(self, n, fluxes=True):
+    @abstractmethod
+    def sample(self, n: int, fluxes: bool = True) -> pd.DataFrame:
         """Abstract sampling function.
 
         Should be overwritten by child classes.
 
+        Parameters
+        ----------
+        n : int
+            The number of samples that are generated at once.
+        fluxes : bool, optional
+            Whether to return fluxes or the internal solver variables. If
+            set to False, will return a variable for each forward and
+            backward flux as well as all additional variables you might
+            have defined in the model (default True).
+
+        Returns
+        -------
+        pandas.DataFrame
+            Returns a pandas DataFrame with `n` rows, each containing a
+            flux sample.
+
         """
-        pass
+        raise NotImplementedError(
+            "This method needs to be implemented by the subclass."
+        )
 
-    def batch(self, batch_size, batch_num, fluxes=True):
+    def batch(
+        self, batch_size: int, batch_num: int, fluxes: bool = True
+    ) -> pd.DataFrame:
         """Create a batch generator.
 
-        This is useful to generate n batches of m samples each.
+        This is useful to generate `batch_num` batches of `batch_size`
+        samples each.
 
         Parameters
         ----------
         batch_size : int
-            The number of samples contained in each batch (m).
+            The number of samples contained in each batch.
         batch_num : int
-            The number of batches in the generator (n).
-        fluxes : boolean
-            Whether to return fluxes or the internal solver variables. If set
-            to False will return a variable for each forward and backward flux
-            as well as all additional variables you might have defined in the
-            model.
+            The number of batches in the generator.
+        fluxes : bool, optional
+            Whether to return fluxes or the internal solver variables. If
+            set to False, will return a variable for each forward and
+            backward flux as well as all additional variables you might
+            have defined in the model (default True).
 
         Yields
         ------
         pandas.DataFrame
             A DataFrame with dimensions (batch_size x n_r) containing
-            a valid flux sample for a total of n_r reactions (or variables if
-            fluxes=False) in each row.
+            a valid flux sample for a total of n_r reactions (or variables
+            if fluxes=False) in each row.
 
         """
-
-        for i in range(batch_num):
+        for _ in range(batch_num):
             yield self.sample(batch_size, fluxes=fluxes)
 
-    def validate(self, samples):
+    def validate(self, samples: np.matrix) -> np.ndarray:
         """Validate a set of samples for equality and inequality feasibility.
 
         Can be used to check whether the generated samples and warmup points
@@ -471,22 +528,25 @@ class HRSampler(object):
         Parameters
         ----------
         samples : numpy.matrix
-            Must be of dimension (n_samples x n_reactions). Contains the
+            Must be of dimension (samples x n_reactions). Contains the
             samples to be validated. Samples must be from fluxes.
 
         Returns
         -------
         numpy.array
-            A one-dimensional numpy array of length containing
+            A one-dimensional numpy array containing
             a code of 1 to 3 letters denoting the validation result:
-
             - 'v' means feasible in bounds and equality constraints
             - 'l' means a lower bound violation
             - 'u' means a lower bound validation
             - 'e' means and equality constraint violation
 
-        """
+        Raises
+        ------
+        ValueError
+            If wrong number of columns.
 
+        """
         samples = np.atleast_2d(samples)
         prob = self.problem
 
@@ -502,9 +562,9 @@ class HRSampler(object):
             bounds = prob.variable_bounds
         else:
             raise ValueError(
-                "Wrong number of columns. samples must have a "
+                "Wrong number of columns. Samples must have a "
                 "column for each flux or variable defined in the "
-                "model!"
+                "model."
             )
 
         feasibility = np.abs(S.dot(samples.T).T - b).max(axis=1)
@@ -563,10 +623,15 @@ class HRSampler(object):
 
 
 # Required by ACHRSampler and OptGPSampler
-# Has to be declared outside of class to be used for multiprocessing :(
-def step(sampler, x, delta, fraction=None, tries=0):
+# Has to be declared outside of class to be used for multiprocessing
+def step(
+    sampler: HRSampler,
+    x: np.ndarray,
+    delta: np.ndarray,
+    fraction: Optional[float] = None,
+    tries: int = 0,
+) -> np.ndarray:
     """Sample a new feasible point from the point `x` in direction `delta`."""
-
     prob = sampler.problem
     valid = (np.abs(delta) > sampler.feasibility_tol) & np.logical_not(
         prob.variable_fixed
@@ -589,6 +654,7 @@ def step(sampler, x, delta, fraction=None, tries=0):
         alphas = np.hstack([valphas, balphas])
     else:
         alphas = valphas
+
     pos_alphas = alphas[alphas > 0.0]
     neg_alphas = alphas[alphas <= 0.0]
     alpha_range = np.array(
@@ -614,13 +680,12 @@ def step(sampler, x, delta, fraction=None, tries=0):
     ):
         if tries > MAX_TRIES:
             raise RuntimeError(
-                "Can not escape sampling region, model seems"
-                " numerically unstable :( Reporting the "
-                "model to "
+                "Cannot escape sampling region, model seems to be "
+                "numerically unstable. Reporting the model to "
                 "https://github.com/opencobra/cobrapy/issues "
-                "will help us to fix this :)"
+                "will help us to fix this."
             )
-        LOGGER.info("found bounds infeasibility in sample, " "resetting to center")
+        logger.info("Found bounds infeasibility in sample, resetting to center.")
         newdir = sampler.warmup[np.random.randint(sampler.n_warmup)]
         sampler.retries += 1
 



View it on GitLab: https://salsa.debian.org/med-team/python-cobra/-/commit/6a842b8f16fe33da58c32b3c6d9fdc7de26f3990

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-cobra/-/commit/6a842b8f16fe33da58c32b3c6d9fdc7de26f3990
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210207/16f39d32/attachment-0001.html>


More information about the debian-med-commit mailing list