[Python-modules-commits] [python-sparse] 01/05: New upstream version 0.1.1

Mon Aug 7 08:44:43 UTC 2017

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository python-sparse.

commit dd5978fd4074cc1599048f512463be08d9a986a4
Author: Ghislain Antony Vaillant <ghisvail at gmail.com>
Date:   Sat Aug 5 09:39:07 2017 +0100

    New upstream version 0.1.1
---
 setup.py                  |   2 +-
 sparse/__init__.py        |   2 +-
 sparse/core.py            | 360 +++++++++++++++++++++++++++++++++-------------
 sparse/tests/test_core.py |  79 ++++++++--
 sparse/utils.py           |  13 ++
 5 files changed, 347 insertions(+), 109 deletions(-)

diff --git a/setup.py b/setup.py
index 6223933..a9a707a 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ from setuptools import setup
 
 
 setup(name='sparse',
-      version='0.1.0',
+      version='0.1.1',
       description='Sparse',
       url='http://github.com/mrocklin/sparse/',
       maintainer='Matthew Rocklin',
diff --git a/sparse/__init__.py b/sparse/__init__.py
index 595bebf..579ebea 100644
--- a/sparse/__init__.py
+++ b/sparse/__init__.py
@@ -1,3 +1,3 @@
 from .core import COO, tensordot, concatenate, stack, dot
 
-__version__ = '0.1.0'
+__version__ = '0.1.1'
diff --git a/sparse/core.py b/sparse/core.py
index c8bcaa9..43813bf 100644
--- a/sparse/core.py
+++ b/sparse/core.py
@@ -1,14 +1,20 @@
 from __future__ import absolute_import, division, print_function
 
-from collections import Iterable
+from collections import Iterable, defaultdict, deque
 from functools import reduce
-from numbers import Number
+import numbers
 import operator
 
 import numpy as np
 import scipy.sparse
 
 
+try:  # Windows compatibility
+    int = long
+except NameError:
+    pass
+
+
 class COO(object):
     """ A Sparse Multidimensional Array
 
@@ -29,9 +35,9 @@ class COO(object):
     --------
     >>> x = np.eye(4)
     >>> x[2, 3] = 5
-    >>> s = COO.from_numpy(x)
+    >>> s = COO(x)
     >>> s
-    <COO: shape=(4, 4), dtype=float64, nnz=5>
+    <COO: shape=(4, 4), dtype=float64, nnz=5, sorted=True, duplicates=False>
     >>> s.data
     array([ 1.,  1.,  1.,  5.,  1.])
     >>> s.coords
@@ -50,9 +56,9 @@ class COO(object):
     >>> data = [1, 2, 3, 4, 5]
     >>> y = COO(coords, data, shape=(3, 4, 5))
     >>> y
-    <COO: shape=(3, 4, 5), dtype=int64, nnz=5>
+    <COO: shape=(3, 4, 5), dtype=int64, nnz=5, sorted=False, duplicates=True>
     >>> tensordot(s, y, axes=(0, 1))
-    <COO: shape=(4, 3, 5), dtype=float64, nnz=6>
+    <COO: shape=(4, 3, 5), dtype=float64, nnz=6, sorted=False, duplicates=False>
 
     Following scipy.sparse conventions you can also pass these as a tuple with
     rows and columns
@@ -73,7 +79,7 @@ class COO(object):
 
     >>> d = {(0, 0, 0): 1, (1, 2, 3): 2, (1, 1, 0): 3}
     >>> COO(d)
-    <COO: shape=(2, 3, 4), dtype=int64, nnz=3>
+    <COO: shape=(2, 3, 4), dtype=int64, nnz=3, sorted=False, duplicates=False>
 
     >>> L = [((0, 0), 1),
     ...      ((1, 1), 2),
@@ -89,17 +95,23 @@ class COO(object):
     """
     __array_priority__ = 12
 
-    def __init__(self, coords, data=None, shape=None, has_duplicates=True):
+    def __init__(self, coords, data=None, shape=None, has_duplicates=True,
+                 sorted=False, cache=False):
+        self._cache = None
+        if cache:
+            self.enable_caching()
         if data is None:
             # {(i, j, k): x, (i, j, k): y, ...}
             if isinstance(coords, dict):
                 coords = list(coords.items())
+                has_duplicates = False
 
             if isinstance(coords, np.ndarray):
                 result = COO.from_numpy(coords)
                 self.coords = result.coords
                 self.data = result.data
                 self.has_duplicates = result.has_duplicates
+                self.sorted = result.sorted
                 self.shape = result.shape
                 return
 
@@ -127,7 +139,7 @@ class COO(object):
             self.coords = self.coords[None, :]
 
         if shape and not np.prod(self.coords.shape):
-            self.coords = np.zeros((len(shape), 0), dtype=int)
+            self.coords = np.zeros((len(shape), 0), dtype=np.uint64)
 
         if shape is None:
             if self.coords.nbytes:
@@ -143,13 +155,43 @@ class COO(object):
         self.coords = self.coords.astype(dtype)
         assert not self.shape or len(data) == self.coords.shape[1]
         self.has_duplicates = has_duplicates
+        self.sorted = sorted
+
+    def enable_caching(self):
+        """ Enable caching of reshape, transpose, and tocsr/csc operations
+
+        This enables efficient iterative workflows that make heavy use of
+        csr/csc operations, such as tensordot.  This maintains a cache of
+        recent results of reshape and transpose so that operations like
+        tensordot (which uses both internally) store efficiently stored
+        representations for repeated use.  This can significantly cut down on
+        computational costs in common numeric algorithms.
+
+        However, this also assumes that neither this object, nor the downstream
+        objects will have their data mutated.
+
+        Examples
+        --------
+        >>> x.enable_caching()  # doctest: +SKIP
+        >>> csr1 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr()  # doctest: +SKIP
+        >>> csr2 = x.transpose((2, 0, 1)).reshape((100, 120)).tocsr()  # doctest: +SKIP
+        >>> csr1 is csr2  # doctest: +SKIP
+        True
+        """
+        self._cache = defaultdict(lambda: deque(maxlen=3))
+        return self
 
     @classmethod
     def from_numpy(cls, x):
-        coords = np.where(x)
-        data = x[coords]
-        coords = np.vstack(coords)
-        return cls(coords, data, shape=x.shape)
+        if x.shape:
+            coords = np.where(x)
+            data = x[coords]
+            coords = np.vstack(coords)
+        else:
+            coords = []
+            data = x
+        return cls(coords, data, shape=x.shape, has_duplicates=False,
+                   sorted=True)
 
     def todense(self):
         self = self.sum_duplicates()
@@ -165,7 +207,9 @@ class COO(object):
         coords = np.empty((2, x.nnz), dtype=x.row.dtype)
         coords[0, :] = x.row
         coords[1, :] = x.col
-        return COO(coords, x.data, shape=x.shape, has_duplicates=not x.has_canonical_format)
+        return COO(coords, x.data, shape=x.shape,
+                   has_duplicates=not x.has_canonical_format,
+                   sorted=x.has_canonical_format)
 
     @property
     def dtype(self):
@@ -189,8 +233,11 @@ class COO(object):
     def __getitem__(self, index):
         if not isinstance(index, tuple):
             index = (index,)
-        index = tuple(ind + self.shape[i] if isinstance(ind, int) and ind < 0 else ind
+        index = tuple(ind + self.shape[i] if isinstance(ind, numbers.Integral) and ind < 0 else ind
                       for i, ind in enumerate(index))
+        if (all(ind == slice(None) or ind == slice(0, d)
+                for ind, d in zip(index, self.shape))):
+            return self
         mask = np.ones(self.nnz, dtype=bool)
         for i, ind in enumerate([i for i in index if i is not None]):
             if ind == slice(None, None):
@@ -202,7 +249,7 @@ class COO(object):
         shape = []
         i = 0
         for ind in index:
-            if isinstance(ind, int):
+            if isinstance(ind, numbers.Integral):
                 i += 1
                 continue
             elif isinstance(ind, slice):
@@ -231,11 +278,14 @@ class COO(object):
         shape = tuple(shape)
         data = self.data[mask]
 
-        return COO(coords, data, shape=shape, has_duplicates=self.has_duplicates)
+        return COO(coords, data, shape=shape,
+                   has_duplicates=self.has_duplicates,
+                   sorted=self.sorted)
 
     def __str__(self):
-        return "<COO: shape=%s, dtype=%s, nnz=%d>" % (self.shape, self.dtype,
-                self.nnz)
+        return "<COO: shape=%s, dtype=%s, nnz=%d, sorted=%s, duplicates=%s>" % (
+                self.shape, self.dtype, self.nnz, self.sorted,
+                self.has_duplicates)
 
     __repr__ = __str__
 
@@ -247,7 +297,7 @@ class COO(object):
         if dtype:
             kwargs['dtype'] = dtype
 
-        if isinstance(axis, int):
+        if isinstance(axis, numbers.Integral):
             axis = (axis,)
 
         if set(axis) == set(range(self.ndim)):
@@ -266,7 +316,13 @@ class COO(object):
 
             a = a.to_scipy_sparse()
             a = getattr(a, method)(axis=0, **kwargs)
-            a = COO.from_scipy_sparse(a)
+            if isinstance(a, scipy.sparse.spmatrix):
+                a = COO.from_scipy_sparse(a)
+                a.sorted = self.sorted
+                a.has_duplicates = False
+            elif isinstance(a, np.matrix):
+                a = np.asarray(a)[0]
+                a = COO.from_numpy(a)
             a = a.reshape([self.shape[d] for d in neg_axis])
             result = a
 
@@ -298,19 +354,18 @@ class COO(object):
         if axes == tuple(range(self.ndim)):
             return self
 
+        if self._cache is not None:
+            for ax, value in self._cache['transpose']:
+                if ax == axes:
+                    return value
+
         shape = tuple(self.shape[ax] for ax in axes)
         result = COO(self.coords[axes, :], self.data, shape,
-                     has_duplicates=self.has_duplicates)
+                     has_duplicates=self.has_duplicates,
+                     cache=self._cache is not None)
 
-        if axes == (1, 0):
-            try:
-                result._csc = self._csr.T
-            except AttributeError:
-                pass
-            try:
-                result._csr = self._csc.T
-            except AttributeError:
-                pass
+        if self._cache is not None:
+            self._cache['transpose'].append((axes, result))
         return result
 
     @property
@@ -320,6 +375,41 @@ class COO(object):
     def dot(self, other):
         return dot(self, other)
 
+    def __matmul__(self, other):
+        try:
+            return dot(self, other)
+        except NotImplementedError:
+            return NotImplemented
+
+    def __rmatmul__(self, other):
+        try:
+            return dot(other, self)
+        except NotImplementedError:
+            return NotImplemented
+
+    def __numpy_ufunc__(self, ufunc, method, i, inputs, **kwargs):
+        return NotImplemented
+
+    def linear_loc(self, signed=False):
+        """ Index location of every piece of data in a flattened array
+
+        This is used internally to check for duplicates, re-order, reshape,
+        etc..
+        """
+        n = reduce(operator.mul, self.shape)
+        if signed:
+            n = -n
+        dtype = np.min_scalar_type(n)
+        out = np.zeros(self.nnz, dtype=dtype)
+        tmp = np.zeros(self.nnz, dtype=dtype)
+        strides = 1
+        for i, d in enumerate(self.shape[::-1]):
+            # out += self.coords[-(i + 1), :].astype(dtype) * strides
+            np.multiply(self.coords[-(i + 1), :], strides, out=tmp, dtype=dtype)
+            np.add(tmp, out, out=out)
+            strides *= d
+        return out
+
     def reshape(self, shape):
         if self.shape == shape:
             return self
@@ -327,23 +417,31 @@ class COO(object):
             extra = int(np.prod(self.shape) /
                         np.prod([d for d in shape if d != -1]))
             shape = tuple([d if d != -1 else extra for d in shape])
+
         if self.shape == shape:
             return self
+
+        if self._cache is not None:
+            for sh, value in self._cache['reshape']:
+                if sh == shape:
+                    return value
+
         # TODO: this np.prod(self.shape) enforces a 2**64 limit to array size
-        dtype = np.min_scalar_type(np.prod(self.shape))
-        linear_loc = np.zeros(self.nnz, dtype=dtype)
-        strides = 1
-        for i, d in enumerate(self.shape[::-1]):
-            linear_loc += self.coords[-(i + 1), :].astype(dtype) * strides
-            strides *= d
+        linear_loc = self.linear_loc()
 
-        coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(self.shape)))
+        coords = np.empty((len(shape), self.nnz), dtype=np.min_scalar_type(max(shape)))
         strides = 1
         for i, d in enumerate(shape[::-1]):
             coords[-(i + 1), :] = (linear_loc // strides) % d
             strides *= d
 
-        return COO(coords, self.data, shape, has_duplicates=self.has_duplicates)
+        result = COO(coords, self.data, shape,
+                     has_duplicates=self.has_duplicates,
+                     sorted=self.sorted, cache=self._cache is not None)
+
+        if self._cache is not None:
+            self._cache['reshape'].append((shape, result))
+        return result
 
     def to_scipy_sparse(self):
         assert self.ndim == 2
@@ -351,40 +449,76 @@ class COO(object):
                                           (self.coords[0],
                                            self.coords[1])),
                                           shape=self.shape)
-        result.has_canonical_format = not self.has_duplicates
+        result.has_canonical_format = (not self.has_duplicates and self.sorted)
         return result
 
+    def _tocsr(self):
+        assert self.ndim == 2
+
+        # Pass 1: sum duplicates
+        self.sum_duplicates()
+
+        # Pass 2: sort indices
+        self.sort_indices()
+        row, col = self.coords
+
+        # Pass 3: count nonzeros in each row
+        indptr = np.zeros(self.shape[0] + 1, dtype=np.int64)
+        np.cumsum(np.bincount(row, minlength=self.shape[0]), out=indptr[1:])
+
+        return scipy.sparse.csr_matrix((self.data, col, indptr), shape=self.shape)
+
     def tocsr(self):
-        try:
-            return self._csr
-        except AttributeError:
-            pass
-        try:
-            self._csr = self._csc.tocsr()
-            return self._csr
-        except AttributeError:
-            pass
-
-        coo = self.to_scipy_sparse()
-        csr = coo.tocsr()
-        self._csr = csr
+        if self._cache is not None:
+            try:
+                return self._csr
+            except AttributeError:
+                pass
+            try:
+                self._csr = self._csc.tocsr()
+                return self._csr
+            except AttributeError:
+                pass
+
+            self._csr = csr = self._tocsr()
+        else:
+            csr = self._tocsr()
         return csr
 
     def tocsc(self):
-        try:
-            return self._csc
-        except AttributeError:
-            pass
-        try:
-            self._csc = self._csr.tocsc()
-            return self._csc
-        except AttributeError:
-            pass
-        coo = self.to_scipy_sparse()
-        csc = coo.tocsc()
-        self._csc = csc
+        if self._cache is not None:
+            try:
+                return self._csc
+            except AttributeError:
+                pass
+            try:
+                self._csc = self._csr.tocsc()
+                return self._csc
+            except AttributeError:
+                pass
+
+            self._csc = csc = self.tocsr().tocsc()
+        else:
+            csc = self.tocsr().tocsc()
+
         return csc
 
+    def sort_indices(self):
+        if self.sorted:
+            return
+
+        linear = self.linear_loc(signed=True)
+
+        if (np.diff(linear) > 0).all():  # already sorted
+            self.sorted = True
+            return self
+
+        order = np.argsort(linear)
+        self.coords = self.coords[:, order]
+        self.data = self.data[order]
+        self.sorted = True
+        return self
+
     def sum_duplicates(self):
         # Inspired by scipy/sparse/coo.py::sum_duplicates
         # See https://github.com/scipy/scipy/blob/master/LICENSE.txt
@@ -392,15 +526,21 @@ class COO(object):
             return self
         if not np.prod(self.coords.shape):
             return self
-        order = np.lexsort(self.coords)
-        coords = self.coords[:, order]
-        data = self.data[order]
-        unique_mask = (coords[:, 1:] != coords[:, :-1]).any(axis=0)
+
+        self.sort_indices()
+
+        linear = self.linear_loc()
+        unique_mask = np.diff(linear) != 0
+
+        if unique_mask.sum() == len(unique_mask):  # already unique
+            self.has_duplicates = False
+            return self
+
         unique_mask = np.append(True, unique_mask)
 
-        coords = coords[:, unique_mask]
+        coords = self.coords[:, unique_mask]
         (unique_inds,) = np.nonzero(unique_mask)
-        data = np.add.reduceat(data, unique_inds, dtype=data.dtype)
+        data = np.add.reduceat(self.data, unique_inds, dtype=self.data.dtype)
 
         self.data = data
         self.coords = coords
@@ -409,17 +549,21 @@ class COO(object):
         return self
 
     def __add__(self, other):
+        if isinstance(other, numbers.Number) and other == 0:
+            return self
         if not isinstance(other, COO):
             return self.maybe_densify() + other
         if self.shape == other.shape:
-            return COO(np.concatenate([self.coords, other.coords], axis=1),
-                       np.concatenate([self.data, other.data]),
-                       self.shape, has_duplicates=True)
+            return self.elemwise_binary(operator.add, other)
         else:
             raise NotImplementedError("Broadcasting not yet supported")
 
+    def __radd__(self, other):
+        return self + other
+
     def __neg__(self):
-        return COO(self.coords, -self.data, self.shape, self.has_duplicates)
+        return COO(self.coords, -self.data, self.shape, self.has_duplicates,
+                   self.sorted)
 
     def __sub__(self, other):
         return self + (-other)
@@ -451,7 +595,9 @@ class COO(object):
             raise ValueError("Performing this operation would produce "
                     "a dense result: %s" % str(func))
         return COO(self.coords, func(self.data, *args, **kwargs),
-                   shape=self.shape, has_duplicates=self.has_duplicates)
+                   shape=self.shape,
+                   has_duplicates=self.has_duplicates,
+                   sorted=self.sorted)
 
     def elemwise_binary(self, func, other, *args, **kwargs):
         assert isinstance(other, COO)
@@ -510,53 +656,68 @@ class COO(object):
     def __abs__(self):
         return self.elemwise(abs)
 
-    def exp(self):
+    def exp(self, out=None):
+        assert out is None
         return np.exp(self.maybe_densify())
 
-    def expm1(self):
+    def expm1(self, out=None):
+        assert out is None
         return self.elemwise(np.expm1)
 
-    def log1p(self):
+    def log1p(self, out=None):
+        assert out is None
         return self.elemwise(np.log1p)
 
-    def sin(self):
+    def sin(self, out=None):
+        assert out is None
         return self.elemwise(np.sin)
 
-    def sinh(self):
+    def sinh(self, out=None):
+        assert out is None
         return self.elemwise(np.sinh)
 
-    def tan(self):
+    def tan(self, out=None):
+        assert out is None
         return self.elemwise(np.tan)
 
-    def tanh(self):
+    def tanh(self, out=None):
+        assert out is None
         return self.elemwise(np.tanh)
 
-    def sqrt(self):
+    def sqrt(self, out=None):
+        assert out is None
         return self.elemwise(np.sqrt)
 
-    def ceil(self):
+    def ceil(self, out=None):
+        assert out is None
         return self.elemwise(np.ceil)
 
-    def floor(self):
+    def floor(self, out=None):
+        assert out is None
         return self.elemwise(np.floor)
 
-    def round(self, decimals=0):
+    def round(self, decimals=0, out=None):
+        assert out is None
         return self.elemwise(np.round, decimals)
 
-    def rint(self):
+    def rint(self, out=None):
+        assert out is None
         return self.elemwise(np.rint)
 
-    def conj(self):
+    def conj(self, out=None):
+        assert out is None
         return self.elemwise(np.conj)
 
-    def conjugate(self):
+    def conjugate(self, out=None):
+        assert out is None
         return self.elemwise(np.conjugate)
 
-    def astype(self, dtype):
+    def astype(self, dtype, out=None):
+        assert out is None
         return self.elemwise(np.ndarray.astype, dtype, check=False)
 
     def __gt__(self, other):
-        if not isinstance(other, Number):
+        if not isinstance(other, numbers.Number):
             raise NotImplementedError("Only scalars supported")
         if other < 0:
             raise ValueError("Comparison with negative number would produce "
@@ -564,7 +725,7 @@ class COO(object):
         return self.elemwise(operator.gt, other)
 
     def __ge__(self, other):
-        if not isinstance(other, Number):
+        if not isinstance(other, numbers.Number):
             raise NotImplementedError("Only scalars supported")
         if other <= 0:
             raise ValueError("Comparison with negative number would produce "
@@ -649,12 +810,17 @@ def tensordot(a, b, axes=2):
             res = res.todense()
         else:
             res = COO.from_scipy_sparse(res)  # <--- modified
+            res.has_duplicates = False
     if isinstance(res, np.matrix):
         res = np.asarray(res)
     return res.reshape(olda + oldb)
 
 
 def dot(a, b):
+    if not hasattr(a, 'ndim') or not hasattr(b, 'ndim'):
+        raise NotImplementedError(
+                "Cannot perform dot product on types %s, %s" %
+                (type(a), type(b)))
     return tensordot(a, b, axes=((a.ndim - 1,), (b.ndim - 2,)))
 
 
@@ -680,7 +846,7 @@ def _keepdims(original, new, axis):
 
 
 def _mask(coords, idx):
-    if isinstance(idx, int):
+    if isinstance(idx, numbers.Integral):
         return coords == idx
     elif isinstance(idx, slice):
         if idx.step not in (1, None):
@@ -717,7 +883,8 @@ def concatenate(arrays, axis=0):
     shape[axis] = dim
     has_duplicates = any(x.has_duplicates for x in arrays)
 
-    return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+    return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+               sorted=(axis == 0) and all(a.sorted for a in arrays))
 
 
 def stack(arrays, axis=0):
@@ -743,4 +910,5 @@ def stack(arrays, axis=0):
     coords.insert(axis, new)
     coords = np.stack(coords, axis=0)
 
-    return COO(coords, data, shape=shape, has_duplicates=has_duplicates)
+    return COO(coords, data, shape=shape, has_duplicates=has_duplicates,
+               sorted=(axis == 0) and all(a.sorted for a in arrays))
diff --git a/sparse/tests/test_core.py b/sparse/tests/test_core.py
index 5cc5558..51af459 100644
--- a/sparse/tests/test_core.py
+++ b/sparse/tests/test_core.py
@@ -55,6 +55,7 @@ def test_transpose(axis):
     [(2, 3, 4, 5), (8, 15)],
     [(2, 3, 4, 5), (24, 5)],
     [(2, 3, 4, 5), (20, 6)],
+    [(), ()],
 ])
 def test_reshape(a, b):
     x = random_x(a)
@@ -70,7 +71,7 @@ def test_large_reshape():
     col = row % m # np.random.randint(0, m, size=n, dtype=np.uint16)
     data = np.ones(n, dtype=np.uint8)
 
-    x = COO((data, (row, col)))
+    x = COO((data, (row, col)), sorted=True, has_duplicates=False)
 
     assert_eq(x, x.reshape(x.shape))
 
@@ -126,6 +127,7 @@ def test_tensordot(a_shape, b_shape, axes):
 
 
 def test_dot():
+    import operator
     a = random_x((3, 4, 5))
     b = random_x((5, 6))
 
@@ -135,6 +137,33 @@ def test_dot():
     assert_eq(a.dot(b), sa.dot(sb))
     assert_eq(np.dot(a, b), sparse.dot(sa, sb))
 
+    if hasattr(operator, 'matmul'):
+        # Basic equivalences
+        assert_eq(eval("a @ b"), eval("sa @ sb"))
+        assert_eq(eval("sa @ sb"), sparse.dot(sa, sb))
+
+        # Test that SOO's and np.array's combine correctly
+        assert_eq(eval("a @ sb"), eval("sa @ b"))
+
+
+ at pytest.mark.xfail
+def test_dot_nocoercion():
+    a = random_x((3, 4, 5))
+    b = random_x((5, 6))
+
+    la = a.tolist()
+    lb = b.tolist()
+    la, lb          # silencing flake8
+
+    sa = COO.from_numpy(a)
+    sb = COO.from_numpy(b)
+    sa, sb          # silencing flake8
+
+    if hasattr(operator, 'matmul'):
+        # Operations with naive collection (list)
+        assert_eq(eval("la @ b"), eval("la @ sb"))
+        assert_eq(eval("a @ lb"), eval("sa @ lb"))
+
 
 @pytest.mark.parametrize('func', [np.expm1, np.log1p, np.sin, np.tan,
                                    np.sinh,  np.tanh, np.floor, np.ceil,
@@ -365,8 +394,8 @@ def test_scalar_exponentiation():
 
 def test_create_with_lists_of_tuples():
     L = [((0, 0, 0), 1),
-         ((1, 1, 1), 2),
          ((1, 2, 1), 1),
+         ((1, 1, 1), 2),
          ((1, 3, 2), 3)]
 
     s = COO(L)
@@ -410,21 +439,13 @@ def test_scipy_sparse_interface():
 
 def test_cache_csr():
     x = random_x((10, 5))
-    s = COO.from_numpy(x)
+    s = COO(x, cache=True)
 
     assert isinstance(s.tocsr(), scipy.sparse.csr_matrix)
     assert isinstance(s.tocsc(), scipy.sparse.csc_matrix)
     assert s.tocsr() is s.tocsr()
     assert s.tocsc() is s.tocsc()
 
-    st = s.T
-
-    assert_eq(st._csr, st)
-    assert_eq(st._csc, st)
-
-    assert isinstance(st.tocsr(), scipy.sparse.csr_matrix)
-    assert isinstance(st.tocsc(), scipy.sparse.csc_matrix)
-
 
 def test_empty_shape():
     x = COO([], [1.0])
@@ -447,3 +468,39 @@ def test_raise_dense():
 
     with pytest.raises((ValueError, NotImplementedError)):
         x + 1
+
+
+def test_large_sum():
+    n = 500000
+    x = np.random.randint(0, 10000, size=(n,))
+    y = np.random.randint(0, 1000, size=(n,))
+    z = np.random.randint(0, 3, size=(n,))
+
+    data = np.random.random(n)
+
+    a = COO((x, y, z), data)
+    assert a.shape == (10000, 1000, 3)
+
+    b = a.sum(axis=2)
+    assert b.nnz > 100000
+
+
+def test_add_many_sparse_arrays():
+    x = COO({(1, 1): 1})
+    y = sum([x] * 100)
+    assert y.nnz < np.prod(y.shape)
+
+
+def test_caching():
+    x = COO({(10, 10, 10): 1})
+    assert x[:].reshape((100, 10)).transpose().tocsr() is not x[:].reshape((100, 10)).transpose().tocsr()
+
+    x = COO({(10, 10, 10): 1}, cache=True)
+    assert x[:].reshape((100, 10)).transpose().tocsr() is x[:].reshape((100, 10)).transpose().tocsr()
+
+    x = COO({(1, 1, 1, 1, 1, 1, 1, 2): 1}, cache=True)
+
+    for i in range(x.ndim):
+        x.reshape((1,) * i + (2,) + (1,) * (x.ndim - i - 1))
+
+    assert len(x._cache['reshape']) < 5
diff --git a/sparse/utils.py b/sparse/utils.py
index 76af90c..cf58a22 100644
--- a/sparse/utils.py
+++ b/sparse/utils.py
@@ -1,9 +1,18 @@
 import numpy as np
+from .core import COO
 
 
 def assert_eq(x, y):
     assert x.shape == y.shape
     assert x.dtype == y.dtype
+
+    if isinstance(x, COO):
+        if x.sorted:
+            assert is_lexsorted(x)
+    if isinstance(y, COO):
+        if y.sorted:
+            assert is_lexsorted(y)
+
     if hasattr(x, 'todense'):
         xx = x.todense()
     else:
@@ -13,3 +22,7 @@ def assert_eq(x, y):
     else:
         yy = y
     assert np.allclose(xx, yy)
+
+
+def is_lexsorted(x):
+    return not x.shape or (np.diff(x.linear_loc()) > 0).all()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-sparse.git