[Python-modules-commits] [python-sparse] 01/05: New upstream version 0.1.1
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Aug 7 08:44:43 UTC 2017
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository python-sparse.
commit dd5978fd4074cc1599048f512463be08d9a986a4
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date: Sat Aug 5 09:39:07 2017 +0100
New upstream version 0.1.1
---
setup.py | 2 +-
sparse/__init__.py | 2 +-
sparse/core.py | 360 +++++++++++++++++++++++++++++++++-------------
sparse/tests/test_core.py | 79 ++++++++--
sparse/utils.py | 13 ++
5 files changed, 347 insertions(+), 109 deletions(-)
diff --git a/setup.py b/setup.py
index 6223933..a9a707a 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import setup
setup(name='sparse',
- version='0.1.0',
+ version='0.1.1',
description='Sparse',
url='http://github.com/mrocklin/sparse/',
maintainer='Matthew Rocklin',
diff --git a/sparse/__init__.py b/sparse/__init__.py
index 595bebf..579ebea 100644
--- a/sparse/__init__.py
+++ b/sparse/__init__.py
@@ -1,3 +1,3 @@
from .core import COO, tensordot, concatenate, stack, dot
-__version__ = '0.1.0'
+__version__ = '0.1.1'
diff --git a/sparse/core.py b/sparse/core.py
index c8bcaa9..43813bf 100644
--- a/sparse/core.py
+++ b/sparse/core.py
@@ -1,14 +1,20 @@
from __future__ import absolute_import, division, print_function
-from collections import Iterable
+from collections import Iterable, defaultdict, deque
from functools import reduce
-from numbers import Number
+import numbers
import operator
import numpy as np
import scipy.sparse
+try: # Windows compatibility
+ int = long
+except NameError:
+ pass
+
+
class COO(object):
""" A Sparse Multidimensional Array
@@ -29,9 +35,9 @@ class COO(object):
--------
>>> x = np.eye(4)
>>> x[2, 3] = 5
- >>> s = COO.from_numpy(x)
+ >>> s = COO(x)
>>> s
- <COO: shape=(4, 4), dtype=float64, nnz=5>
+ <COO: shape=(4, 4), dtype=float64, nnz=5, sorted=True, duplicates=False>
>>> s.data
array([ 1., 1., 1., 5., 1.])
>>> s.coords
@@ -50,9 +56,9 @@ class COO(object):
>>> data = [1, 2, 3, 4, 5]
>>> y = COO(coords, data, shape=(3, 4, 5))
>>> y
- <COO: shape=(3, 4, 5), dtype=int64, nnz=5>
+ <COO: shape=(3, 4, 5), dtype=int64, nnz=5, sorted=False, duplicates=True>
>>> tensordot(s, y, axes=(0, 1))
- <COO: shape=(4, 3, 5), dtype=float64, nnz=6>
+ <COO: shape=(4, 3, 5), dtype=float64, nnz=6, sorted=False, duplicates=False>
Following scipy.sparse conventions you can also pass these as a tuple with
rows and columns
@@ -73,7 +79,7 @@ class COO(object):
>>> d = {(0, 0, 0): 1, (1, 2, 3): 2, (1, 1, 0): 3}
>>> COO(d)
- <COO: shape=(2, 3, 4), dtype=int64, nnz=3>
+ <COO: shape=(2, 3, 4), dtype=int64, nnz=3, sorted=False, duplicates=False>
>>> L = [((0, 0), 1),
... ((1, 1), 2),
@@ -89,17 +95,23 @@ class COO(object):
"""
__array_priority__ = 12
- def __init__(self, coords, data=None, shape=None, has_duplicates=True):
+ def __init__(self, coords, data=None, shape=None, has_duplicates=True,
+ sorted=False, cache=False):
+ self._cache = None
+ if cache:
+ self.enable_caching()
if data is None:
# {(i, j, k): x, (i, j, k): y, ...}
if isinstance(coords, dict):
coords = list(coords.items())
+ has_duplicates = False
if isinstance(coords, np.ndarray):
result = COO.from_numpy(coords)
self.coords = result.coords
self.data = result.data
self.has_duplicates = result.has_duplicates
+ self.sorted = result.sorted
self.shape = result.shape
return
@@ -127,7 +139,7 @@ class COO(object):
self.coords = self.coords[None, :]
if shape and not np.prod(self.coords.shape):
- self.coords = np.zeros((len(shape), 0), dtype=int)
+ self.coords = np.zeros((len(shape), 0), dtype=np.uint64)
if shape is None:
if self.coords.nbytes:
@@ -143,13 +155,43 @@ class COO(object):
self.coords = self.coords.astype(dtype)
assert not self.shape or len(data) == self.coords.shape[1]
self.has_duplicates = has_duplicates
+ self.sorted = sorted
+
+ def enable_caching(self):
+ """ Enable caching of reshape, transpose, and tocsr/csc operations
+
+ This enables efficient iterative workflows that make heavy use of
+ csr/csc operations, such as tensordot. This maintains a cache of
+ recent results of reshape and transpose so that operations like
+ tensordot (which uses both internally) store efficiently stored
+ representations for repeated use. This can significantly cut down on
+ computational costs in common numeric algorithms.
+
+ However, this also assumes that neither this object, nor the downstream
+ objects will have their data mutated.
+
+ Examples
+ --------
+ >>> x.enable_caching() # doctest: +SKIP
+ >>> csr1 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr() # doctest: +SKIP
+ >>> csr2 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr() # doctest: +SKIP
+ >>> csr1 is csr2 # doctest: +SKIP
+ True
+ """
+ self._cache = defaultdict(lambda: deque(maxlen=3))
+ return self
@classmethod
def from_numpy(cls, x):
- coords = np.where(x)
- data = x[coords]
- coords = np.vstack(coords)
- return cls(coords, data, shape=x.shape)
+ if x.shape:
+ coords = np.where(x)
+ data = x[coords]
+ coords = np.vstack(coords)
+ else:
+ coords = []
+ data = x
+ return cls(coords, data, shape=x.shape, has_duplicates=False,
+ sorted=True)
def todense(self):
self = self.sum_duplicates()
@@ -165,7 +207,9 @@ class COO(object):
coords = np.empty((2, x.nnz), dtype=x.row.dtype)
coords[0, :] = x.row
coords[1, :] = x.col
- return COO(coords, x.data, shape=x.shape, has_duplicates=not x.has_canonical_format)
+ return COO(coords, x.data, shape=x.shape,
+ has_duplicates=not x.has_canonical_format,
+ sorted=x.has_canonical_format)
@property
def dtype(self):
@@ -189,8 +233,11 @@ class COO(object):
def __getitem__(self, index):
if not isinstance(index, tuple):
index = (index,)
- index = tuple(ind + self.shape[i] if isinstance(ind, int) and ind < 0 else ind
+ index = tuple(ind + self.shape[i] if isinstance(ind, numbers.Integral) and ind < 0 else ind
for i, ind in enumerate(index))
+ if (all(ind == slice(None) or ind == slice(0, d)
+ for ind, d in zip(index, self.shape))):
+ return self
mask = np.ones(self.nnz, dtype=bool)
for i, ind in enumerate([i for i in index if i is not None]):
if ind == slice(None, None):
@@ -202,7 +249,7 @@ class COO(object):
shape = []
i = 0
for ind in index:
- if isinstance(ind, int):
+ if isinstance(ind, numbers.Integral):
i += 1
continue
elif isinstance(ind, slice):
@@ -231,11 +278,14 @@ class COO(object):
shape = tuple(shape)
data = self.data[mask]
- return COO(coords, data, shape=shape, has_duplicates=self.has_duplicates)
+ return COO(coords, data, shape=shape,
+ has_duplicates=self.has_duplicates,
+ sorted=self.sorted)
def __str__(self):
- return "<COO: shape=%s, dtype=%s, nnz=%d>" % (self.shape, self.dtype,
- self.nnz)
+ return "<COO: shape=%s, dtype=%s, nnz=%d, sorted=%s, duplicates=%s>" % (
+ self.shape, self.dtype, self.nnz, self.sorted,
+ self.has_duplicates)
__repr__ = __str__
@@ -247,7 +297,7 @@ class COO(object):
if dtype:
kwargs['dtype'] = dtype
- if isinstance(axis, int):
+ if isinstance(axis, numbers.Integral):
axis = (axis,)
if set(axis) == set(range(self.ndim)):
@@ -266,7 +316,13 @@ class COO(object):
a = a.to_scipy_sparse()
a = getattr(a, method)(axis=0, **kwargs)
- a = COO.from_scipy_sparse(a)
+ if isinstance(a, scipy.sparse.spmatrix):
+ a = COO.from_scipy_sparse(a)
+ a.sorted = self.sorted
+ a.has_duplicates = False
+ elif isinstance(a, np.matrix):
+ a = np.asarray(a)[0]
+ a = COO.from_numpy(a)
a = a.reshape([self.shape[d] for d in neg_axis])
result = a
@@ -298,19 +354,18 @@ class COO(object):
if axes == tuple(range(self.ndim)):
return self
+ if self._cache is not None:
+ for ax, value in self._cache['transpose']:
+ if ax == axes:
+ return value
+
shape = tuple(self.shape[ax] for ax in axes)
result = COO(self.coords[axes, :], self.data, shape,
- has_duplicates=self.has_duplicates)
+ has_duplicates=self.has_duplicates,
+ cache=self._cache is not None)
- if axes == (1, 0):
- try:
- result._csc = self._csr.T
- except AttributeError:
- pass
- try:
- result._csr = self._csc.T
- except AttributeError:
- pass
+ if self._cache is not None:
+ self._cache['transpose'].append((axes, result))
return result
@property
@@ -320,6 +375,41 @@ class COO(object):
def dot(self, other):
return dot(self, other)
+ def __matmul__(self, other):
+ try:
+ return dot(self, other)
+ except NotImplementedError:
+ return NotImplemented
+
+ def __rmatmul__(self, other):
+ try:
+ return dot(other, self)
+ except NotImplementedError:
+ return NotImplemented
+
+ def __numpy_ufunc__(self, ufunc, method, i, inputs, **kwargs):
+ return NotImplemented
+
+ def linear_loc(self, signed=False):
+ """ Index location of every piece of data in a flattened array
+
+ This is used internally to check for duplicates, re-order, reshape,
+ etc..
+ """
+ n = reduce(operator.mul, self.shape)
+ if signed:
+ n = -n
+ dtype = np.min_scalar_type(n)
+ out = np.zeros(self.nnz, dtype=dtype)
+ tmp = np.zeros(self.nnz, dtype=dtype)
+ strides = 1
+ for i, d in enumerate(self.shape[::-1]):
+ # out += self.coords[-(i + 1), :].astype(dtype) * strides
+ np.multiply(self.coords[-(i + 1), :], strides, out=tmp, dtype=dtype)
+ np.add(tmp, out, out=out)
+ strides *= d
+ return out
+
def reshape(self, shape):
if self.shape == shape:
return self
@@ -327,23 +417,31 @@ class COO(object):
extra = int(np.prod(self.shape) /
np.prod([d for d in shape if d != -1]))
shape = tuple([d if d != -1 else extra for d in shape])
+
if self.shape == shape:
return self
+
+ if self._cache is not None:
+ for sh, value in self._cache['reshape']:
+ if sh == shape:
+ return value
+
# TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
- dtype = np.min_scalar_type(np.prod(self.shape))
- linear_loc = np.zeros(self.nnz, dtype=dtype)
- strides = 1
- for i, d in enumerate(self.shape[::-1]):
- linear_loc += self.coords[-(i + 1), :].astype(dtype) * strides
- strides *= d
+ linear_loc = self.linear_loc()
- coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(self.shape)))
+ coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(shape)))
strides = 1
for i, d in enumerate(shape[::-1]):
coords[-(i + 1), :] = (linear_loc // strides) % d
strides *= d
- return COO(coords, self.data, shape, has_duplicates=self.has_duplicates)
+ result = COO(coords, self.data, shape,
+ has_duplicates=self.has_duplicates,
+ sorted=self.sorted, cache=self._cache is not None)
+
+ if self._cache is not None:
+ self._cache['reshape'].append((shape, result))
+ return result
def to_scipy_sparse(self):
assert self.ndim == 2
@@ -351,40 +449,76 @@ class COO(object):
(self.coords[0],
self.coords[1])),
shape=self.shape)
- result.has_canonical_format = not self.has_duplicates
+ result.has_canonical_format = (not self.has_duplicates and self.sorted)
return result
+ def _tocsr(self):
+ assert self.ndim == 2
+
+ # Pass 1: sum duplicates
+ self.sum_duplicates()
+
+ # Pass 2: sort indices
+ self.sort_indices()
+ row, col = self.coords
+
+ # Pass 3: count nonzeros in each row
+ indptr = np.zeros(self.shape[0] + 1, dtype=np.int64)
+ np.cumsum(np.bincount(row, minlength=self.shape[0]), out=indptr[1:])
+
+ return scipy.sparse.csr_matrix((self.data, col, indptr), shape=self.shape)
+
def tocsr(self):
- try:
- return self._csr
- except AttributeError:
- pass
- try:
- self._csr = self._csc.tocsr()
- return self._csr
- except AttributeError:
- pass
-
- coo = self.to_scipy_sparse()
- csr = coo.tocsr()
- self._csr = csr
+ if self._cache is not None:
+ try:
+ return self._csr
+ except AttributeError:
+ pass
+ try:
+ self._csr = self._csc.tocsr()
+ return self._csr
+ except AttributeError:
+ pass
+
+ self._csr = csr = self._tocsr()
+ else:
+ csr = self._tocsr()
return csr
def tocsc(self):
- try:
- return self._csc
- except AttributeError:
- pass
- try:
- self._csc = self._csr.tocsc()
- return self._csc
- except AttributeError:
- pass
- coo = self.to_scipy_sparse()
- csc = coo.tocsc()
- self._csc = csc
+ if self._cache is not None:
+ try:
+ return self._csc
+ except AttributeError:
+ pass
+ try:
+ self._csc = self._csr.tocsc()
+ return self._csc
+ except AttributeError:
+ pass
+
+ self._csc = csc = self.tocsr().tocsc()
+ else:
+ csc = self.tocsr().tocsc()
+
return csc
+ def sort_indices(self):
+ if self.sorted:
+ return
+
+ linear = self.linear_loc(signed=True)
+
+ if (np.diff(linear) > 0).all(): # already sorted
+ self.sorted = True
+ return self
+
+ order = np.argsort(linear)
+ self.coords = self.coords[:, order]
+ self.data = self.data[order]
+ self.sorted = True
+ return self
+
def sum_duplicates(self):
# Inspired by scipy/sparse/coo.py::sum_duplicates
# See https://github.com/scipy/scipy/blob/master/LICENSE.txt
@@ -392,15 +526,21 @@ class COO(object):
return self
if not np.prod(self.coords.shape):
return self
- order = np.lexsort(self.coords)
- coords = self.coords[:, order]
- data = self.data[order]
- unique_mask = (coords[:, 1:] != coords[:, :-1]).any(axis=0)
+
+ self.sort_indices()
+
+ linear = self.linear_loc()
+ unique_mask = np.diff(linear) != 0
+
+ if unique_mask.sum() == len(unique_mask): # already unique
+ self.has_duplicates = False
+ return self
+
unique_mask = np.append(True, unique_mask)
- coords = coords[:, unique_mask]
+ coords = self.coords[:, unique_mask]
(unique_inds,) = np.nonzero(unique_mask)
- data = np.add.reduceat(data, unique_inds, dtype=data.dtype)
+ data = np.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
self.data = data
self.coords = coords
@@ -409,17 +549,21 @@ class COO(object):
return self
def __add__(self, other):
+ if isinstance(other, numbers.Number) and other == 0:
+ return self
if not isinstance(other, COO):
return self.maybe_densify() + other
if self.shape == other.shape:
- return COO(np.concatenate([self.coords, other.coords], axis=1),
- np.concatenate([self.data, other.data]),
- self.shape, has_duplicates=True)
+ return self.elemwise_binary(operator.add, other)
else:
raise NotImplementedError("Broadcasting not yet supported")
+ def __radd__(self, other):
+ return self + other
+
def __neg__(self):
- return COO(self.coords, -self.data, self.shape, self.has_duplicates)
+ return COO(self.coords, -self.data, self.shape, self.has_duplicates,
+ self.sorted)
def __sub__(self, other):
return self + (-other)
@@ -451,7 +595,9 @@ class COO(object):
raise ValueError("Performing this operation would produce "
"a dense result: %s" % str(func))
return COO(self.coords, func(self.data, *args, **kwargs),
- shape=self.shape, has_duplicates=self.has_duplicates)
+ shape=self.shape,
+ has_duplicates=self.has_duplicates,
+ sorted=self.sorted)
def elemwise_binary(self, func, other, *args, **kwargs):
assert isinstance(other, COO)
@@ -510,53 +656,68 @@ class COO(object):
def __abs__(self):
return self.elemwise(abs)
- def exp(self):
+ def exp(self, out=None):
+ assert out is None
return np.exp(self.maybe_densify())
- def expm1(self):
+ def expm1(self, out=None):
+ assert out is None
return self.elemwise(np.expm1)
- def log1p(self):
+ def log1p(self, out=None):
+ assert out is None
return self.elemwise(np.log1p)
- def sin(self):
+ def sin(self, out=None):
+ assert out is None
return self.elemwise(np.sin)
- def sinh(self):
+ def sinh(self, out=None):
+ assert out is None
return self.elemwise(np.sinh)
- def tan(self):
+ def tan(self, out=None):
+ assert out is None
return self.elemwise(np.tan)
- def tanh(self):
+ def tanh(self, out=None):
+ assert out is None
return self.elemwise(np.tanh)
- def sqrt(self):
+ def sqrt(self, out=None):
+ assert out is None
return self.elemwise(np.sqrt)
- def ceil(self):
+ def ceil(self, out=None):
+ assert out is None
return self.elemwise(np.ceil)
- def floor(self):
+ def floor(self, out=None):
+ assert out is None
return self.elemwise(np.floor)
- def round(self, decimals=0):
+ def round(self, decimals=0, out=None):
+ assert out is None
return self.elemwise(np.round, decimals)
- def rint(self):
+ def rint(self, out=None):
+ assert out is None
return self.elemwise(np.rint)
- def conj(self):
+ def conj(self, out=None):
+ assert out is None
return self.elemwise(np.conj)
- def conjugate(self):
+ def conjugate(self, out=None):
+ assert out is None
return self.elemwise(np.conjugate)
- def astype(self, dtype):
+ def astype(self, dtype, out=None):
+ assert out is None
return self.elemwise(np.ndarray.astype, dtype, check=False)
def __gt__(self, other):
- if not isinstance(other, Number):
+ if not isinstance(other, numbers.Number):
raise NotImplementedError("Only scalars supported")
if other < 0:
raise ValueError("Comparison with negative number would produce "
@@ -564,7 +725,7 @@ class COO(object):
return self.elemwise(operator.gt, other)
def __ge__(self, other):
- if not isinstance(other, Number):
+ if not isinstance(other, numbers.Number):
raise NotImplementedError("Only scalars supported")
if other <= 0:
raise ValueError("Comparison with negative number would produce "
@@ -649,12 +810,17 @@ def tensordot(a, b, axes=2):
res = res.todense()
else:
res = COO.from_scipy_sparse(res) # <--- modified
+ res.has_duplicates = False
if isinstance(res, np.matrix):
res = np.asarray(res)
return res.reshape(olda + oldb)
def dot(a, b):
+ if not hasattr(a, 'ndim') or not hasattr(b, 'ndim'):
+ raise NotImplementedError(
+ "Cannot perform dot product on types %s, %s" %
+ (type(a), type(b)))
return tensordot(a, b, axes=((a.ndim - 1,), (b.ndim - 2,)))
@@ -680,7 +846,7 @@ def _keepdims(original, new, axis):
def _mask(coords, idx):
- if isinstance(idx, int):
+ if isinstance(idx, numbers.Integral):
return coords == idx
elif isinstance(idx, slice):
if idx.step not in (1, None):
@@ -717,7 +883,8 @@ def concatenate(arrays, axis=0):
shape[axis] = dim
has_duplicates = any(x.has_duplicates for x in arrays)
- return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+ return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+ sorted=(axis == 0) and all(a.sorted for a in arrays))
def stack(arrays, axis=0):
@@ -743,4 +910,5 @@ def stack(arrays, axis=0):
coords.insert(axis, new)
coords = np.stack(coords, axis=0)
- return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+ return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+ sorted=(axis == 0) and all(a.sorted for a in arrays))
diff --git a/sparse/tests/test_core.py b/sparse/tests/test_core.py
index 5cc5558..51af459 100644
--- a/sparse/tests/test_core.py
+++ b/sparse/tests/test_core.py
@@ -55,6 +55,7 @@ def test_transpose(axis):
[(2, 3, 4, 5), (8, 15)],
[(2, 3, 4, 5), (24, 5)],
[(2, 3, 4, 5), (20, 6)],
+ [(), ()],
])
def test_reshape(a, b):
x = random_x(a)
@@ -70,7 +71,7 @@ def test_large_reshape():
col = row % m # np.random.randint(0, m, size=n, dtype=np.uint16)
data = np.ones(n, dtype=np.uint8)
- x = COO((data, (row, col)))
+ x = COO((data, (row, col)), sorted=True, has_duplicates=False)
assert_eq(x, x.reshape(x.shape))
@@ -126,6 +127,7 @@ def test_tensordot(a_shape, b_shape, axes):
def test_dot():
+ import operator
a = random_x((3, 4, 5))
b = random_x((5, 6))
@@ -135,6 +137,33 @@ def test_dot():
assert_eq(a.dot(b), sa.dot(sb))
assert_eq(np.dot(a, b), sparse.dot(sa, sb))
+ if hasattr(operator, 'matmul'):
+ # Basic equivalences
+ assert_eq(eval("a @ b"), eval("sa @ sb"))
+ assert_eq(eval("sa @ sb"), sparse.dot(sa, sb))
+
+ # Test that SOO's and np.array's combine correctly
+ assert_eq(eval("a @ sb"), eval("sa @ b"))
+
+
+ at pytest.mark.xfail
+def test_dot_nocoercion():
+ a = random_x((3, 4, 5))
+ b = random_x((5, 6))
+
+ la = a.tolist()
+ lb = b.tolist()
+ la, lb # silencing flake8
+
+ sa = COO.from_numpy(a)
+ sb = COO.from_numpy(b)
+ sa, sb # silencing flake8
+
+ if hasattr(operator, 'matmul'):
+ # Operations with naive collection (list)
+ assert_eq(eval("la @ b"), eval("la @ sb"))
+ assert_eq(eval("a @ lb"), eval("sa @ lb"))
+
@pytest.mark.parametrize('func', [np.expm1, np.log1p, np.sin, np.tan,
np.sinh, np.tanh, np.floor, np.ceil,
@@ -365,8 +394,8 @@ def test_scalar_exponentiation():
def test_create_with_lists_of_tuples():
L = [((0, 0, 0), 1),
- ((1, 1, 1), 2),
((1, 2, 1), 1),
+ ((1, 1, 1), 2),
((1, 3, 2), 3)]
s = COO(L)
@@ -410,21 +439,13 @@ def test_scipy_sparse_interface():
def test_cache_csr():
x = random_x((10, 5))
- s = COO.from_numpy(x)
+ s = COO(x, cache=True)
assert isinstance(s.tocsr(), scipy.sparse.csr_matrix)
assert isinstance(s.tocsc(), scipy.sparse.csc_matrix)
assert s.tocsr() is s.tocsr()
assert s.tocsc() is s.tocsc()
- st = s.T
-
- assert_eq(st._csr, st)
- assert_eq(st._csc, st)
-
- assert isinstance(st.tocsr(), scipy.sparse.csr_matrix)
- assert isinstance(st.tocsc(), scipy.sparse.csc_matrix)
-
def test_empty_shape():
x = COO([], [1.0])
@@ -447,3 +468,39 @@ def test_raise_dense():
with pytest.raises((ValueError, NotImplementedError)):
x + 1
+
+
+def test_large_sum():
+ n = 500000
+ x = np.random.randint(0, 10000, size=(n,))
+ y = np.random.randint(0, 1000, size=(n,))
+ z = np.random.randint(0, 3, size=(n,))
+
+ data = np.random.random(n)
+
+ a = COO((x, y, z), data)
+ assert a.shape == (10000, 1000, 3)
+
+ b = a.sum(axis=2)
+ assert b.nnz > 100000
+
+
+def test_add_many_sparse_arrays():
+ x = COO({(1, 1): 1})
+ y = sum([x] * 100)
+ assert y.nnz < np.prod(y.shape)
+
+
+def test_caching():
+ x = COO({(10, 10, 10): 1})
+ assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr()
+
+ x = COO({(10, 10, 10): 1}, cache=True)
+ assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr()
+
+ x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True)
+
+ for i in range(x.ndim):
+ x.reshape((1,) * i + (2,) + (1,) * (x.ndim - i - 1))
+
+ assert len(x._cache['reshape']) < 5
diff --git a/sparse/utils.py b/sparse/utils.py
index 76af90c..cf58a22 100644
--- a/sparse/utils.py
+++ b/sparse/utils.py
@@ -1,9 +1,18 @@
import numpy as np
+from .core import COO
def assert_eq(x, y):
assert x.shape == y.shape
assert x.dtype == y.dtype
+
+ if isinstance(x, COO):
+ if x.sorted:
+ assert is_lexsorted(x)
+ if isinstance(y, COO):
+ if y.sorted:
+ assert is_lexsorted(y)
+
if hasattr(x, 'todense'):
xx = x.todense()
else:
@@ -13,3 +22,7 @@ def assert_eq(x, y):
else:
yy = y
assert np.allclose(xx, yy)
+
+
+def is_lexsorted(x):
+ return not x.shape or (np.diff(x.linear_loc()) > 0).all()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-sparse.git
More information about the Python-modules-commits
mailing list