[med-svn] [Git][med-team/python-cobra][upstream] New upstream version 0.21.0
Andreas Tille
gitlab at salsa.debian.org
Sun Feb 7 08:05:16 GMT 2021
Andreas Tille pushed to branch upstream at Debian Med / python-cobra
Commits:
6a842b8f by Andreas Tille at 2021-02-07T08:49:14+01:00
New upstream version 0.21.0
- - - - -
8 changed files:
- INSTALL.rst
- + release-notes/0.21.0.md
- release-notes/next-release.md
- setup.cfg
- setup.py
- src/cobra/__init__.py
- src/cobra/exceptions.py
- src/cobra/sampling/hr_sampler.py
Changes:
=====================================
INSTALL.rst
=====================================
@@ -38,7 +38,7 @@ available from the ``conda-forge`` channel.
Installation for development
============================
-Get the `detailed contribution instructions <CONTRIBUTING.rst>`_ for
+Get the `detailed contribution instructions <.github/CONTRIBUTING.rst>`_ for
contributing to COBRApy.
Solvers
=====================================
release-notes/0.21.0.md
=====================================
@@ -0,0 +1,7 @@
+# Release notes for cobrapy 0.21.0
+
+## Other
+
+* Update libSBML dependency to `python-libsbml==5.19.0`.
+* Docstring and style changes.
+
=====================================
release-notes/next-release.md
=====================================
@@ -4,6 +4,8 @@
## Fixes
+## Other
+
## Deprecated features
## Backwards incompatible changes
=====================================
setup.cfg
=====================================
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 0.20.0
+current_version = 0.21.0
commit = True
tag = True
parse = (?P<major>\d+)
@@ -62,7 +62,7 @@ install_requires =
optlang <1.4.6
pandas ~=1.0
pydantic ~=1.6
- python-libsbml-experimental ==5.18.3
+ python-libsbml ==5.19.0
rich ~=6.0
ruamel.yaml ~=0.16
six
@@ -107,4 +107,3 @@ replace = version="{new_version}"
[bumpversion:file:src/cobra/__init__.py]
search = __version__ = "{current_version}"
replace = __version__ = "{new_version}"
-
=====================================
setup.py
=====================================
@@ -19,4 +19,4 @@ if version_info[:2] < (3, 6):
# All other arguments are defined in `setup.cfg`.
if __name__ == "__main__":
- setup(version="0.20.0")
+ setup(version="0.21.0")
=====================================
src/cobra/__init__.py
=====================================
@@ -1,5 +1,5 @@
__author__ = "The cobrapy core development team."
-__version__ = "0.20.0"
+__version__ = "0.21.0"
from cobra.core import (
=====================================
src/cobra/exceptions.py
=====================================
@@ -1,28 +1,36 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import absolute_import
-
+"""Module for shared exceptions in the Cobra package."""
import optlang.interface
class OptimizationError(Exception):
+ """Exception for Optimization issues."""
+
def __init__(self, message):
+ """Inherit parent behaviors."""
super(OptimizationError, self).__init__(message)
class Infeasible(OptimizationError):
+ """Exception for Infeasible issues."""
+
pass
class Unbounded(OptimizationError):
+ """Exception for Unbounded issues."""
+
pass
class FeasibleButNotOptimal(OptimizationError):
+ """Exception for Non-Optimal issues."""
+
pass
class UndefinedSolution(OptimizationError):
+ """Exception for Undefined issues."""
+
pass
=====================================
src/cobra/sampling/hr_sampler.py
=====================================
@@ -1,46 +1,47 @@
-# -*- coding: utf-8 -*-
-
-"""Provide base class for Hit-and-Run samplers.
-
-New samplers should derive from the abstract `HRSampler` class
-where possible to provide a uniform interface."""
-
-from __future__ import absolute_import, division
+"""Provide the base class and associated functions for Hit-and-Run samplers."""
import ctypes
-from collections import namedtuple
-from logging import getLogger
+import logging
+from abc import ABC, abstractmethod
from multiprocessing import Array
from time import time
+from typing import TYPE_CHECKING, NamedTuple, Optional, Tuple
import numpy as np
+import pandas as pd
from optlang.interface import OPTIMAL
from optlang.symbolics import Zero
from cobra.util import constraint_matrices, create_stoichiometric_matrix, nullspace
-LOGGER = getLogger(__name__)
+if TYPE_CHECKING:
+ from cobra import Model
+
+
+logger = logging.getLogger(__name__)
# Maximum number of retries for sampling
MAX_TRIES = 100
-Problem = namedtuple(
+Problem = NamedTuple(
"Problem",
[
- "equalities",
- "b",
- "inequalities",
- "bounds",
- "variable_fixed",
- "variable_bounds",
- "nullspace",
- "homogeneous",
+ ("equalities", np.ndarray),
+ ("b", np.ndarray),
+ ("inequalities", np.ndarray),
+ ("bounds", np.ndarray),
+ ("variable_fixed", np.ndarray),
+ ("variable_bounds", np.ndarray),
+ ("nullspace", np.matrix),
+ ("homogeneous", bool),
],
)
-"""Defines the matrix representation of a sampling problem.
+"""Define the matrix representation of a sampling problem.
+
+A named tuple consisting of 6 arrays, 1 matrix and 1 boolean.
Attributes
----------
@@ -52,33 +53,49 @@ inequalities : numpy.array
All inequality constraints in the model.
bounds : numpy.array
The lower and upper bounds for the inequality constraints.
+variable_fixed : numpy.array
+ A boolean vector indicating whether the variable at that index is
+ fixed i.e., whether `variable.lower_bound == variable.upper_bound`.
variable_bounds : numpy.array
The lower and upper bounds for the variables.
-homogeneous: boolean
- Indicates whether the sampling problem is homogenous, e.g. whether there
- exist no non-zero fixed variables or constraints.
nullspace : numpy.matrix
- A matrix containing the nullspace of the equality constraints. Each column
- is one basis vector.
+ A matrix containing the nullspace of the equality constraints.
+ Each column is one basis vector.
+homogeneous: bool
+ Indicates whether the sampling problem is homogeneous, e.g. whether
+ there exist no non-zero fixed variables or constraints.
"""
-def shared_np_array(shape, data=None, integer=False):
+def shared_np_array(
+ shape: Tuple[int, int], data: Optional[np.ndarray] = None, integer: bool = False
+) -> np.ndarray:
"""Create a new numpy array that resides in shared memory.
Parameters
----------
- shape : tuple of ints
+ shape : tuple of int
The shape of the new array.
- data : numpy.array
- Data to copy to the new array. Has to have the same shape.
- integer : boolean
- Whether to use an integer array. Defaults to False which means
- float array.
+ data : numpy.array, optional
+ Data to copy to the new array. Has to have the same shape
+ (default None).
+ integer : bool, optional
+ Whether to use an integer array. By default, float array is used
+ (default False).
+
+ Returns
+ -------
+ numpy.array
+ The newly created shared numpy array.
+
+ Raises
+ ------
+ ValueError
+ If the input `data` (if provided) size is not equal to the created
+ array.
"""
-
size = np.prod(shape)
if integer:
@@ -92,81 +109,95 @@ def shared_np_array(shape, data=None, integer=False):
if data is not None:
if len(shape) != len(data.shape):
- raise ValueError(
- "`data` must have the same dimensions" "as the created array."
- )
+ raise ValueError("`data` must have the same shape as the created array.")
+
same = all(x == y for x, y in zip(shape, data.shape))
if not same:
- raise ValueError("`data` must have the same shape" "as the created array.")
+ raise ValueError("`data` must have the same shape as the created array.")
+
np_array[:] = data
return np_array
-class HRSampler(object):
- """The abstract base class for hit-and-run samplers.
+class HRSampler(ABC):
+ """
+ The abstract base class for hit-and-run samplers.
+
+ New samplers should derive from this class where possible to provide
+ a uniform interface.
Parameters
----------
model : cobra.Model
The cobra model from which to generate samples.
thinning : int
- The thinning factor of the generated sampling chain. A thinning of 10
- means samples are returned every 10 steps.
+ The thinning factor of the generated sampling chain. A thinning of
+ 10 means samples are returned every 10 steps.
nproj : int > 0, optional
- How often to reproject the sampling point into the feasibility space.
- Avoids numerical issues at the cost of lower sampling. If you observe
- many equality constraint violations with `sampler.validate` you should
- lower this number.
+ How often to reproject the sampling point into the feasibility
+ space. Avoids numerical issues at the cost of lower sampling. If
+ you observe many equality constraint violations with
+ `sampler.validate` you should lower this number (default None).
seed : int > 0, optional
- The random number seed that should be used.
+ Sets the random number seed. Initialized to the current time stamp
+ if None (default None).
Attributes
----------
- model : cobra.Model
- The cobra model from which the sampes get generated.
feasibility_tol: float
The tolerance used for checking equalities feasibility.
bounds_tol: float
The tolerance used for checking bounds feasibility.
- thinning : int
- The currently used thinning factor.
n_samples : int
The total number of samples that have been generated by this
sampler instance.
retries : int
The overall of sampling retries the sampler has observed. Larger
values indicate numerical instabilities.
- problem : collections.namedtuple
- A python object whose attributes define the entire sampling problem in
- matrix form. See docstring of `Problem`.
+ problem : Problem
+ A NamedTuple whose attributes define the entire sampling problem in
+ matrix form.
warmup : numpy.matrix
- A matrix of with as many columns as reactions in the model and more
- than 3 rows containing a warmup sample in each row. None if no warmup
- points have been generated yet.
- nproj : int
- How often to reproject the sampling point into the feasibility space.
- seed : int > 0, optional
- Sets the random number seed. Initialized to the current time stamp if
- None.
+ A numpy matrix with as many columns as reactions in the model and
+ more than 3 rows containing a warmup sample in each row. None if no
+ warmup points have been generated yet.
fwd_idx : numpy.array
- Has one entry for each reaction in the model containing the index of
- the respective forward variable.
+ A numpy array having one entry for each reaction in the model,
+ containing the index of the respective forward variable.
rev_idx : numpy.array
- Has one entry for each reaction in the model containing the index of
- the respective reverse variable.
+ A numpy array having one entry for each reaction in the model,
+ containing the index of the respective reverse variable.
"""
- def __init__(self, model, thinning, nproj=None, seed=None):
- """Initialize a new sampler object."""
+ def __init__(
+ self,
+ model: "Model",
+ thinning: int,
+ nproj: Optional[int] = None,
+ seed: Optional[int] = None,
+ **kwargs,
+ ) -> None:
+ """Initialize a new sampler object.
+
+ Other Parameters
+ ----------------
+ kwargs :
+ Further keyword arguments are passed on to the parent class.
+
+ Raises
+ ------
+ TypeError
+ If integer problem is found.
+ """
# This currently has to be done to reset the solver basis which is
# required to get deterministic warmup point generation
- # (in turn required for a working `seed` arg)
+ # (in turn required for a working `seed`)
if model.solver.is_integer:
- raise TypeError("sampling does not work with integer problems :(")
+ raise TypeError("Sampling does not work with integer problems.")
self.model = model.copy()
self.feasibility_tol = model.tolerance
@@ -201,9 +232,15 @@ class HRSampler(object):
# Avoid overflow
self._seed = self._seed % np.iinfo(np.int32).max
- def __build_problem(self):
- """Build the matrix representation of the sampling problem."""
+ def __build_problem(self) -> Problem:
+ """Build the matrix representation of the sampling problem.
+ Returns
+ -------
+ Problem
+ The matrix representation in the form of a NamedTuple.
+
+ """
# Set up the mathematical problem
prob = constraint_matrices(self.model, zero_tol=self.feasibility_tol)
@@ -244,7 +281,7 @@ class HRSampler(object):
homogeneous=homogeneous,
)
- def generate_fva_warmup(self):
+ def generate_fva_warmup(self) -> None:
"""Generate the warmup points for the sampler.
Generates warmup points by setting each flux as the sole objective
@@ -252,8 +289,13 @@ class HRSampler(object):
warmup points into the nullspace for non-homogeneous problems (only
if necessary).
- """
+ Raises
+ ------
+ ValueError
+ If flux cone contains a single point or the problem is
+ inhomogeneous.
+ """
self.n_warmup = 0
reactions = self.model.reactions
self.warmup = np.zeros((2 * len(reactions), len(self.model.variables)))
@@ -270,7 +312,7 @@ class HRSampler(object):
# Omit fixed reactions if they are non-homogeneous
if r.upper_bound - r.lower_bound < self.bounds_tol:
- LOGGER.info("skipping fixed reaction %s" % r.id)
+ logger.info(f"Skipping fixed reaction {r.id}")
continue
self.model.objective.set_linear_coefficients(
@@ -280,7 +322,7 @@ class HRSampler(object):
self.model.slim_optimize()
if not self.model.solver.status == OPTIMAL:
- LOGGER.info("can not maximize reaction %s, skipping it" % r.id)
+ logger.info(f"Cannot maximize reaction {r.id}, skipping it.")
continue
primals = self.model.solver.primal_values
@@ -305,14 +347,14 @@ class HRSampler(object):
# Catch some special cases
if len(self.warmup.shape) == 1 or self.warmup.shape[0] == 1:
- raise ValueError("Your flux cone consists only of a single point!")
+ raise ValueError("Flux cone only consists a single point.")
elif self.n_warmup == 2:
if not self.problem.homogeneous:
raise ValueError(
- "Can not sample from an inhomogenous problem"
- " with only 2 search directions :("
+ "Cannot sample from an inhomogenous problem "
+ "with only 2 search directions."
)
- LOGGER.info("All search directions on a line, adding another one.")
+ logger.info("All search directions on a line, adding another one.")
newdir = self.warmup.T.dot([0.25, 0.25])
self.warmup = np.vstack([self.warmup, newdir])
self.n_warmup += 1
@@ -322,11 +364,12 @@ class HRSampler(object):
(self.n_warmup, len(self.model.variables)), self.warmup
)
- def _reproject(self, p):
+ def _reproject(self, p: np.ndarray) -> np.ndarray:
"""Reproject a point into the feasibility region.
This function is guaranteed to return a new feasible point. However,
- no guarantees in terms of proximity to the original point can be made.
+ no guarantee can be made in terms of proximity to the original
+ point.
Parameters
----------
@@ -336,10 +379,9 @@ class HRSampler(object):
Returns
-------
numpy.array
- A new feasible point. If `p` was feasible it wil return p.
+ A new feasible point. If `p` is feasible, it will return `p`.
"""
-
nulls = self.problem.nullspace
equalities = self.problem.equalities
@@ -349,39 +391,34 @@ class HRSampler(object):
):
new = p
else:
- LOGGER.info(
- "feasibility violated in sample"
- " %d, trying to reproject" % self.n_samples
+ logger.info(
+ f"Feasibility violated in sample {self.n_samples}, trying to reproject."
)
new = nulls.dot(nulls.T.dot(p))
# Projections may violate bounds
# set to random point in space in that case
if any(new != p):
- LOGGER.info(
- "reprojection failed in sample"
- " %d, using random point in space" % self.n_samples
+ logger.info(
+ f"Re-projection failed in sample {self.n_samples}, "
+ "using random point in space."
)
new = self._random_point()
return new
- def _random_point(self):
+ def _random_point(self) -> np.ndarray:
"""Find an approximately random point in the flux cone."""
-
idx = np.random.randint(
self.n_warmup, size=min(2, np.ceil(np.sqrt(self.n_warmup)))
)
return self.warmup[idx, :].mean(axis=0)
- def _is_redundant(self, matrix, cutoff=None):
- """Identify rdeundant rows in a matrix that can be removed."""
-
+ def _is_redundant(self, matrix: np.matrix, cutoff: Optional[float] = None) -> bool:
+ """Identify redundant rows in a matrix that can be removed."""
cutoff = 1.0 - self.feasibility_tol
-
# Avoid zero variances
extra_col = matrix[:, 0] + 1
-
# Avoid zero rows being correlated with constant rows
extra_col[matrix.sum(axis=1) == 0] = 2
corr = np.corrcoef(np.c_[matrix, extra_col])
@@ -389,9 +426,8 @@ class HRSampler(object):
return (np.abs(corr) > cutoff).any(axis=1)
- def _bounds_dist(self, p):
+ def _bounds_dist(self, p: np.ndarray) -> np.ndarray:
"""Get the lower and upper bound distances. Negative is bad."""
-
prob = self.problem
lb_dist = (
p
@@ -425,44 +461,65 @@ class HRSampler(object):
return np.array([lb_dist, ub_dist])
- def sample(self, n, fluxes=True):
+ @abstractmethod
+ def sample(self, n: int, fluxes: bool = True) -> pd.DataFrame:
"""Abstract sampling function.
Should be overwritten by child classes.
+ Parameters
+ ----------
+ n : int
+ The number of samples that are generated at once.
+ fluxes : bool, optional
+ Whether to return fluxes or the internal solver variables. If
+ set to False, will return a variable for each forward and
+ backward flux as well as all additional variables you might
+ have defined in the model (default True).
+
+ Returns
+ -------
+ pandas.DataFrame
+ Returns a pandas DataFrame with `n` rows, each containing a
+ flux sample.
+
"""
- pass
+ raise NotImplementedError(
+ "This method needs to be implemented by the subclass."
+ )
- def batch(self, batch_size, batch_num, fluxes=True):
+ def batch(
+ self, batch_size: int, batch_num: int, fluxes: bool = True
+ ) -> pd.DataFrame:
"""Create a batch generator.
- This is useful to generate n batches of m samples each.
+ This is useful to generate `batch_num` batches of `batch_size`
+ samples each.
Parameters
----------
batch_size : int
- The number of samples contained in each batch (m).
+ The number of samples contained in each batch.
batch_num : int
- The number of batches in the generator (n).
- fluxes : boolean
- Whether to return fluxes or the internal solver variables. If set
- to False will return a variable for each forward and backward flux
- as well as all additional variables you might have defined in the
- model.
+ The number of batches in the generator.
+ fluxes : bool, optional
+ Whether to return fluxes or the internal solver variables. If
+ set to False, will return a variable for each forward and
+ backward flux as well as all additional variables you might
+ have defined in the model (default True).
Yields
------
pandas.DataFrame
A DataFrame with dimensions (batch_size x n_r) containing
- a valid flux sample for a total of n_r reactions (or variables if
- fluxes=False) in each row.
+ a valid flux sample for a total of n_r reactions (or variables
+ if fluxes=False) in each row.
"""
-
- for i in range(batch_num):
+ for _ in range(batch_num):
yield self.sample(batch_size, fluxes=fluxes)
- def validate(self, samples):
+ def validate(self, samples: np.matrix) -> np.ndarray:
"""Validate a set of samples for equality and inequality feasibility.
Can be used to check whether the generated samples and warmup points
@@ -471,22 +528,25 @@ class HRSampler(object):
Parameters
----------
samples : numpy.matrix
- Must be of dimension (n_samples x n_reactions). Contains the
+ Must be of dimension (samples x n_reactions). Contains the
samples to be validated. Samples must be from fluxes.
Returns
-------
numpy.array
- A one-dimensional numpy array of length containing
+ A one-dimensional numpy array containing
a code of 1 to 3 letters denoting the validation result:
-
- 'v' means feasible in bounds and equality constraints
- 'l' means a lower bound violation
- 'u' means a lower bound validation
- 'e' means and equality constraint violation
- """
+ Raises
+ ------
+ ValueError
+ If wrong number of columns.
+ """
samples = np.atleast_2d(samples)
prob = self.problem
@@ -502,9 +562,9 @@ class HRSampler(object):
bounds = prob.variable_bounds
else:
raise ValueError(
- "Wrong number of columns. samples must have a "
+ "Wrong number of columns. Samples must have a "
"column for each flux or variable defined in the "
- "model!"
+ "model."
)
feasibility = np.abs(S.dot(samples.T).T - b).max(axis=1)
@@ -563,10 +623,15 @@ class HRSampler(object):
# Required by ACHRSampler and OptGPSampler
-# Has to be declared outside of class to be used for multiprocessing :(
-def step(sampler, x, delta, fraction=None, tries=0):
+# Has to be declared outside of class to be used for multiprocessing
+def step(
+ sampler: HRSampler,
+ x: np.ndarray,
+ delta: np.ndarray,
+ fraction: Optional[float] = None,
+ tries: int = 0,
+) -> np.ndarray:
"""Sample a new feasible point from the point `x` in direction `delta`."""
-
prob = sampler.problem
valid = (np.abs(delta) > sampler.feasibility_tol) & np.logical_not(
prob.variable_fixed
@@ -589,6 +654,7 @@ def step(sampler, x, delta, fraction=None, tries=0):
alphas = np.hstack([valphas, balphas])
else:
alphas = valphas
+
pos_alphas = alphas[alphas > 0.0]
neg_alphas = alphas[alphas <= 0.0]
alpha_range = np.array(
@@ -614,13 +680,12 @@ def step(sampler, x, delta, fraction=None, tries=0):
):
if tries > MAX_TRIES:
raise RuntimeError(
- "Can not escape sampling region, model seems"
- " numerically unstable :( Reporting the "
- "model to "
+ "Cannot escape sampling region, model seems to be "
+ "numerically unstable. Reporting the model to "
"https://github.com/opencobra/cobrapy/issues "
- "will help us to fix this :)"
+ "will help us to fix this."
)
- LOGGER.info("found bounds infeasibility in sample, " "resetting to center")
+ logger.info("Found bounds infeasibility in sample, resetting to center.")
newdir = sampler.warmup[np.random.randint(sampler.n_warmup)]
sampler.retries += 1
View it on GitLab: https://salsa.debian.org/med-team/python-cobra/-/commit/6a842b8f16fe33da58c32b3c6d9fdc7de26f3990
--
View it on GitLab: https://salsa.debian.org/med-team/python-cobra/-/commit/6a842b8f16fe33da58c32b3c6d9fdc7de26f3990
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210207/16f39d32/attachment-0001.html>
More information about the debian-med-commit
mailing list