[med-svn] [Git][med-team/python-pynndescent][upstream] 2 commits: New upstream version 0.5.2

Andreas Tille gitlab at salsa.debian.org
Thu Feb 18 19:27:54 GMT 2021



Andreas Tille pushed to branch upstream at Debian Med / python-pynndescent


Commits:
0c865cba by Andreas Tille at 2021-02-18T20:13:48+01:00
New upstream version 0.5.2
- - - - -
e6eb7a83 by Andreas Tille at 2021-02-18T20:23:24+01:00
New upstream version 0.5.2+dfsg
- - - - -


17 changed files:

- PKG-INFO
- README.rst
- pynndescent.egg-info/PKG-INFO
- pynndescent.egg-info/SOURCES.txt
- pynndescent/distances.py
- + pynndescent/graph_utils.py
- pynndescent/pynndescent_.py
- pynndescent/rp_trees.py
- pynndescent/sparse.py
- − pynndescent/tests/__pycache__/__init__.cpython-37.pyc
- − pynndescent/tests/__pycache__/test_distances.cpython-37.pyc
- − pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc
- − pynndescent/tests/__pycache__/test_rank.cpython-37.pyc
- pynndescent/tests/test_distances.py
- pynndescent/tests/test_pynndescent_.py
- pynndescent/utils.py
- setup.py


Changes:

=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: pynndescent
-Version: 0.5.1
+Version: 0.5.2
 Summary: Nearest Neighbor Descent
 Home-page: http://github.com/lmcinnes/pynndescent
 Author: Leland McInnes
@@ -91,8 +91,11 @@ Description: .. image:: https://travis-ci.org/lmcinnes/pynndescent.svg
         **Angular and correlation metrics**
         
         - cosine
+        - dot
         - correlation
         - spearmanr
+        - tsss
+        - true_angular
         
         **Probability metrics**
         


=====================================
README.rst
=====================================
@@ -81,8 +81,11 @@ supporting a wide variety of distance metrics by default:
 **Angular and correlation metrics**
 
 - cosine
+- dot
 - correlation
 - spearmanr
+- tsss
+- true_angular
 
 **Probability metrics**
 


=====================================
pynndescent.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
 Metadata-Version: 1.2
 Name: pynndescent
-Version: 0.5.1
+Version: 0.5.2
 Summary: Nearest Neighbor Descent
 Home-page: http://github.com/lmcinnes/pynndescent
 Author: Leland McInnes
@@ -91,8 +91,11 @@ Description: .. image:: https://travis-ci.org/lmcinnes/pynndescent.svg
         **Angular and correlation metrics**
         
         - cosine
+        - dot
         - correlation
         - spearmanr
+        - tsss
+        - true_angular
         
         **Probability metrics**
         


=====================================
pynndescent.egg-info/SOURCES.txt
=====================================
@@ -7,6 +7,7 @@ requirements.txt
 setup.py
 pynndescent/__init__.py
 pynndescent/distances.py
+pynndescent/graph_utils.py
 pynndescent/optimal_transport.py
 pynndescent/pynndescent_.py
 pynndescent/rp_trees.py
@@ -25,7 +26,11 @@ pynndescent/tests/test_distances.py
 pynndescent/tests/test_pynndescent_.py
 pynndescent/tests/test_rank.py
 pynndescent/tests/__pycache__/__init__.cpython-37.pyc
+pynndescent/tests/__pycache__/__init__.cpython-38.pyc
 pynndescent/tests/__pycache__/test_distances.cpython-37.pyc
+pynndescent/tests/__pycache__/test_distances.cpython-38-pytest-6.2.2.pyc
 pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc
+pynndescent/tests/__pycache__/test_pynndescent_.cpython-38-pytest-6.2.2.pyc
 pynndescent/tests/__pycache__/test_rank.cpython-37.pyc
+pynndescent/tests/__pycache__/test_rank.cpython-38-pytest-6.2.2.pyc
 pynndescent/tests/test_data/cosine_hang.npy
\ No newline at end of file


=====================================
pynndescent/distances.py
=====================================
@@ -47,7 +47,7 @@ def euclidean(x, y):
     locals={
         "result": numba.types.float32,
         "diff": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -230,7 +230,7 @@ def jaccard(x, y):
         "num_equal": numba.types.float32,
         "x_true": numba.types.uint8,
         "y_true": numba.types.uint8,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -415,7 +415,7 @@ def cosine(x, y):
         "result": numba.types.float32,
         "norm_x": numba.types.float32,
         "norm_y": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -445,7 +445,7 @@ def alternative_cosine(x, y):
     fastmath=True,
     locals={
         "result": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -472,7 +472,7 @@ def dot(x, y):
     fastmath=True,
     locals={
         "result": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -493,6 +493,59 @@ def correct_alternative_cosine(d):
     return 1.0 - pow(2.0, -d)
 
 
+ at numba.njit(fastmath=True)
+def tsss(x, y):
+    d_euc_squared = 0.0
+    d_cos = 0.0
+    norm_x = 0.0
+    norm_y = 0.0
+    dim = x.shape[0]
+
+    for i in range(dim):
+        diff = x[i] - y[i]
+        d_euc_squared += diff * diff
+        d_cos += x[i] * y[i]
+        norm_x += x[i] * x[i]
+        norm_y += y[i] * y[i]
+
+    norm_x = np.sqrt(norm_x)
+    norm_y = np.sqrt(norm_y)
+    magnitude_difference = np.abs(norm_x - norm_y)
+    d_cos /= norm_x * norm_y
+    theta = np.arccos(d_cos) + np.radians(10)  # Add 10 degrees as an "epsilon" to
+    # avoid problems
+    sector = ((np.sqrt(d_euc_squared) + magnitude_difference) ** 2) * theta
+    triangle = norm_x * norm_y * np.sin(theta) / 2.0
+    return triangle * sector
+
+
+ at numba.njit(fastmath=True)
+def true_angular(x, y):
+    result = 0.0
+    norm_x = 0.0
+    norm_y = 0.0
+    dim = x.shape[0]
+    for i in range(dim):
+        result += x[i] * y[i]
+        norm_x += x[i] * x[i]
+        norm_y += y[i] * y[i]
+
+    if norm_x == 0.0 and norm_y == 0.0:
+        return 0.0
+    elif norm_x == 0.0 or norm_y == 0.0:
+        return FLOAT32_MAX
+    elif result <= 0.0:
+        return FLOAT32_MAX
+    else:
+        result = result / np.sqrt(norm_x * norm_y)
+        return 1.0 - (np.arccos(result) / np.pi)
+
+
+ at numba.vectorize(fastmath=True)
+def true_angular_from_alt_cosine(d):
+    return 1.0 - (np.arccos(pow(2.0, -d)) / np.pi)
+
+
 @numba.njit(fastmath=True, cache=True)
 def correlation(x, y):
     mu_x = 0.0
@@ -536,7 +589,7 @@ def correlation(x, y):
         "result": numba.types.float32,
         "l1_norm_x": numba.types.float32,
         "l1_norm_y": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -572,7 +625,7 @@ def hellinger(x, y):
         "result": numba.types.float32,
         "l1_norm_x": numba.types.float32,
         "l1_norm_y": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -738,6 +791,8 @@ named_distances = {
     "spearmanr": spearmanr,
     "kantorovich": kantorovich,
     "wasserstein": kantorovich,
+    "tsss": tsss,
+    "true_angular": true_angular,
     # Binary distances
     "hamming": hamming,
     "jaccard": jaccard,
@@ -762,6 +817,10 @@ fast_distance_alternatives = {
     "l2": {"dist": squared_euclidean, "correction": np.sqrt},
     "cosine": {"dist": alternative_cosine, "correction": correct_alternative_cosine},
     "dot": {"dist": alternative_dot, "correction": correct_alternative_cosine},
+    "true_angular": {
+        "dist": alternative_cosine,
+        "correction": true_angular_from_alt_cosine,
+    },
     "hellinger": {
         "dist": alternative_hellinger,
         "correction": correct_alternative_hellinger,


=====================================
pynndescent/graph_utils.py
=====================================
@@ -0,0 +1,242 @@
+import numba
+import numpy as np
+import heapq
+
+from scipy.sparse import coo_matrix
+from scipy.sparse.csgraph import connected_components
+from itertools import combinations
+
+import pynndescent.distances as pynnd_dist
+import joblib
+
+from pynndescent.utils import (
+    rejection_sample,
+    make_heap,
+    deheap_sort,
+    simple_heap_push,
+    has_been_visited,
+    mark_visited,
+)
+
+FLOAT32_EPS = np.finfo(np.float32).eps
+
+
+def create_component_search(index):
+    alternative_dot = pynnd_dist.alternative_dot
+    alternative_cosine = pynnd_dist.alternative_cosine
+
+    data = index._raw_data
+    indptr = index._search_graph.indptr
+    indices = index._search_graph.indices
+    dist = index._distance_func
+
+    @numba.njit(
+        fastmath=True,
+        nogil=True,
+        locals={
+            "current_query": numba.types.float32[::1],
+            "i": numba.types.uint32,
+            "j": numba.types.uint32,
+            "heap_priorities": numba.types.float32[::1],
+            "heap_indices": numba.types.int32[::1],
+            "candidate": numba.types.int32,
+            "vertex": numba.types.int32,
+            "d": numba.types.float32,
+            "d_vertex": numba.types.float32,
+            "visited": numba.types.uint8[::1],
+            "indices": numba.types.int32[::1],
+            "indptr": numba.types.int32[::1],
+            "data": numba.types.float32[:, ::1],
+            "heap_size": numba.types.int16,
+            "distance_scale": numba.types.float32,
+            "distance_bound": numba.types.float32,
+            "seed_scale": numba.types.float32,
+        },
+    )
+    def custom_search_closure(
+        query_points,
+        candidate_indices,
+        k,
+        epsilon,
+        visited,
+    ):
+        result = make_heap(query_points.shape[0], k)
+        distance_scale = 1.0 + epsilon
+
+        for i in range(query_points.shape[0]):
+            visited[:] = 0
+            if dist == alternative_dot or dist == alternative_cosine:
+                norm = np.sqrt((query_points[i] ** 2).sum())
+                if norm > 0.0:
+                    current_query = query_points[i] / norm
+                else:
+                    continue
+            else:
+                current_query = query_points[i]
+
+            heap_priorities = result[1][i]
+            heap_indices = result[0][i]
+            seed_set = [(np.float32(np.inf), np.int32(-1)) for j in range(0)]
+
+            ############ Init ################
+            n_initial_points = candidate_indices.shape[0]
+
+            for j in range(n_initial_points):
+                candidate = np.int32(candidate_indices[j])
+                d = dist(data[candidate], current_query)
+                # indices are guaranteed different
+                simple_heap_push(heap_priorities, heap_indices, d, candidate)
+                heapq.heappush(seed_set, (d, candidate))
+                mark_visited(visited, candidate)
+
+            ############ Search ##############
+            distance_bound = distance_scale * heap_priorities[0]
+
+            # Find smallest seed point
+            d_vertex, vertex = heapq.heappop(seed_set)
+
+            while d_vertex < distance_bound:
+
+                for j in range(indptr[vertex], indptr[vertex + 1]):
+
+                    candidate = indices[j]
+
+                    if has_been_visited(visited, candidate) == 0:
+                        mark_visited(visited, candidate)
+
+                        d = dist(data[candidate], current_query)
+
+                        if d < distance_bound:
+                            simple_heap_push(
+                                heap_priorities, heap_indices, d, candidate
+                            )
+                            heapq.heappush(seed_set, (d, candidate))
+                            # Update bound
+                            distance_bound = distance_scale * heap_priorities[0]
+
+                # find new smallest seed point
+                if len(seed_set) == 0:
+                    break
+                else:
+                    d_vertex, vertex = heapq.heappop(seed_set)
+
+        return result
+
+    return custom_search_closure
+
+
+# @numba.njit(nogil=True)
+def find_component_connection_edge(
+    component1,
+    component2,
+    search_closure,
+    raw_data,
+    visited,
+    rng_state,
+    search_size=10,
+    epsilon=0.0,
+):
+    indices = [np.zeros(1, dtype=np.int64) for i in range(2)]
+    indices[0] = component1[
+        rejection_sample(np.int64(search_size), component1.shape[0], rng_state)
+    ]
+    indices[1] = component2[
+        rejection_sample(np.int64(search_size), component2.shape[0], rng_state)
+    ]
+    query_side = 0
+    query_points = raw_data[indices[query_side]]
+    candidate_indices = indices[1 - query_side].copy()
+    changed = [True, True]
+    best_dist = np.inf
+    best_edge = (indices[0][0], indices[1][0])
+
+    while changed[0] or changed[1]:
+        result = search_closure(
+            query_points, candidate_indices, search_size, epsilon, visited
+        )
+        inds, dists = deheap_sort(result)
+        for i in range(dists.shape[0]):
+            for j in range(dists.shape[1]):
+                if dists[i, j] < best_dist:
+                    best_dist = dists[i, j]
+                    best_edge = (indices[query_side][i], inds[i, j])
+        candidate_indices = indices[query_side]
+        new_indices = np.unique(inds[:, 0])
+        if indices[1 - query_side].shape[0] == new_indices.shape[0]:
+            changed[1 - query_side] = np.any(indices[1 - query_side] != new_indices)
+        indices[1 - query_side] = new_indices
+        query_points = raw_data[indices[1 - query_side]]
+        query_side = 1 - query_side
+
+    return best_edge[0], best_edge[1], best_dist
+
+
+def adjacency_matrix_representation(neighbor_indices, neighbor_distances):
+    result = coo_matrix(
+        (neighbor_indices.shape[0], neighbor_indices.shape[0]), dtype=np.float32
+    )
+
+    # Preserve any distance 0 points
+    neighbor_distances[neighbor_distances == 0.0] = FLOAT32_EPS
+
+    result.row = np.repeat(
+        np.arange(neighbor_indices.shape[0], dtype=np.int32),
+        neighbor_indices.shape[1],
+    )
+    result.col = neighbor_indices.ravel()
+    result.data = neighbor_distances.ravel()
+
+    # Get rid of any -1 index entries
+    result = result.tocsr()
+    result.data[result.indices == -1] = 0.0
+    result.eliminate_zeros()
+
+    # Symmetrize
+    result = result.maximum(result.T)
+
+    return result
+
+
+def connect_graph(graph, index, search_size=10, n_jobs=None):
+
+    search_closure = create_component_search(index)
+    n_components, component_ids = connected_components(graph)
+    result = graph.tolil()
+
+    # Translate component ids into internal vertex order
+    component_ids = component_ids[index._vertex_order]
+
+    def new_edge(c1, c2):
+        component1 = np.where(component_ids == c1)[0]
+        component2 = np.where(component_ids == c2)[0]
+
+        i, j, d = find_component_connection_edge(
+            component1,
+            component2,
+            search_closure,
+            index._raw_data,
+            index._visited,
+            index.rng_state,
+            search_size=search_size,
+        )
+
+        # Correct the distance if required
+        if index._distance_correction is not None:
+            d = index._distance_correction(d)
+
+        # Convert indices to original data order
+        i = index._vertex_order[i]
+        j = index._vertex_order[j]
+
+        return i, j, d
+
+    new_edges = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+        joblib.delayed(new_edge)(c1, c2)
+        for c1, c2 in combinations(range(n_components), 2)
+    )
+
+    for i, j, d in new_edges:
+        result[i, j] = d
+        result[j, i] = d
+
+    return result.tocsr()


=====================================
pynndescent/pynndescent_.py
=====================================
@@ -855,7 +855,9 @@ class NNDescent(object):
             if init_graph is None:
                 _init_graph = EMPTY_GRAPH
             else:
-                _init_graph = make_heap(init_graph.shape[0], init_graph.shape[1])
+                if init_graph.shape[0] != self._raw_data.shape[0]:
+                    raise ValueError("Init graph size does not match dataset size!")
+                _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
                 _init_graph = sparse_initalize_heap_from_graph_indices(
                     _init_graph,
                     init_graph,
@@ -892,7 +894,9 @@ class NNDescent(object):
             if init_graph is None:
                 _init_graph = EMPTY_GRAPH
             else:
-                _init_graph = make_heap(init_graph.shape[0], init_graph.shape[1])
+                if init_graph.shape[0] != self._raw_data.shape[0]:
+                    raise ValueError("Init graph size does not match dataset size!")
+                _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
                 _init_graph = initalize_heap_from_graph_indices(
                     _init_graph, init_graph, data, self._distance_func
                 )
@@ -952,21 +956,40 @@ class NNDescent(object):
             numba.set_num_threads(self.n_jobs)
 
         if not hasattr(self, "_search_forest"):
-            tree_scores = [
-                score_linked_tree(tree, self._neighbor_graph[0])
-                for tree in self._rp_forest
-            ]
-            if self.verbose:
-                print(ts(), "Worst tree score: {:.8f}".format(np.min(tree_scores)))
-                print(ts(), "Mean tree score: {:.8f}".format(np.mean(tree_scores)))
-                print(ts(), "Best tree score: {:.8f}".format(np.max(tree_scores)))
-            best_tree_indices = np.argsort(tree_scores)[: self.n_search_trees]
-            best_trees = [self._rp_forest[idx] for idx in best_tree_indices]
-            del self._rp_forest
-            self._search_forest = [
-                convert_tree_format(tree, self._raw_data.shape[0])
-                for tree in best_trees
-            ]
+            if self._rp_forest is None:
+                # We don't have a forest, so make a small search forest
+                current_random_state = check_random_state(self.random_state)
+                rp_forest = make_forest(
+                    self._raw_data,
+                    self.n_neighbors,
+                    self.n_search_trees,
+                    self.leaf_size,
+                    self.rng_state,
+                    current_random_state,
+                    self.n_jobs,
+                    self._angular_trees,
+                )
+                self._search_forest = [
+                    convert_tree_format(tree, self._raw_data.shape[0])
+                    for tree in rp_forest
+                ]
+            else:
+                # convert the best trees into a search forest
+                tree_scores = [
+                    score_linked_tree(tree, self._neighbor_graph[0])
+                    for tree in self._rp_forest
+                ]
+                if self.verbose:
+                    print(ts(), "Worst tree score: {:.8f}".format(np.min(tree_scores)))
+                    print(ts(), "Mean tree score: {:.8f}".format(np.mean(tree_scores)))
+                    print(ts(), "Best tree score: {:.8f}".format(np.max(tree_scores)))
+                best_tree_indices = np.argsort(tree_scores)[: self.n_search_trees]
+                best_trees = [self._rp_forest[idx] for idx in best_tree_indices]
+                del self._rp_forest
+                self._search_forest = [
+                    convert_tree_format(tree, self._raw_data.shape[0])
+                    for tree in best_trees
+                ]
 
         nnz_pre_diversify = np.sum(self._neighbor_graph[0] >= 0)
         if self._is_sparse:
@@ -1079,7 +1102,7 @@ class NNDescent(object):
         self._search_graph.sort_indices()
         self._search_graph = self._search_graph.maximum(reverse_graph).tocsr()
 
-        # Eliminate the diagonal0]
+        # Eliminate the diagonal
         self._search_graph.setdiag(0.0)
         self._search_graph.eliminate_zeros()
 
@@ -1531,6 +1554,7 @@ class NNDescent(object):
     def compress_index(self):
         import gc
 
+        self.prepare()
         self.compressed = True
 
         if hasattr(self, "_rp_forest"):


=====================================
pynndescent/rp_trees.py
=====================================
@@ -853,7 +853,7 @@ def make_sparse_tree(inds, indptr, spdata, rng_state, leaf_size=30, angular=Fals
     fastmath=True,
     locals={
         "margin": numba.types.float32,
-        "dim": numba.types.uint16,
+        "dim": numba.types.intp,
         "d": numba.types.uint16,
     },
 )
@@ -984,7 +984,7 @@ def make_forest(
     )
     try:
         if scipy.sparse.isspmatrix_csr(data):
-            result = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+            result = joblib.Parallel(n_jobs=n_jobs, require="sharedmem")(
                 joblib.delayed(make_sparse_tree)(
                     data.indices,
                     data.indptr,
@@ -996,7 +996,7 @@ def make_forest(
                 for i in range(n_trees)
             )
         else:
-            result = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+            result = joblib.Parallel(n_jobs=n_jobs, require="sharedmem")(
                 joblib.delayed(make_dense_tree)(data, rng_states[i], leaf_size, angular)
                 for i in range(n_trees)
             )
@@ -1029,10 +1029,9 @@ def get_leaves_from_tree(tree):
 
 
 def rptree_leaf_array_parallel(rp_forest):
-    result = joblib.Parallel(n_jobs=-1, prefer="threads")(
+    result = joblib.Parallel(n_jobs=-1, require="sharedmem")(
         joblib.delayed(get_leaves_from_tree)(rp_tree) for rp_tree in rp_forest
     )
-    # result = [get_leaves_from_tree(rp_tree) for rp_tree in rp_forest]
     return result
 
 


=====================================
pynndescent/sparse.py
=====================================
@@ -218,7 +218,7 @@ def sparse_euclidean(ind1, data1, ind2, data2):
         "aux_data": numba.types.float32[::1],
         "result": numba.types.float32,
         "diff": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -281,17 +281,30 @@ def sparse_canberra(ind1, data1, ind2, data2):
     return result
 
 
- at numba.njit()
+ at numba.njit(
+    [
+        "f4(i4[::1],f4[::1],i4[::1],f4[::1])",
+        numba.types.float32(
+            numba.types.Array(numba.types.int32, 1, "C", readonly=True),
+            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+            numba.types.Array(numba.types.int32, 1, "C", readonly=True),
+            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+        ),
+    ],
+    fastmath=True,
+)
 def sparse_bray_curtis(ind1, data1, ind2, data2):  # pragma: no cover
-    abs_data1 = np.abs(data1)
-    abs_data2 = np.abs(data2)
-    _, denom_data = sparse_sum(ind1, abs_data1, ind2, abs_data2)
+    _, denom_data = sparse_sum(ind1, data1, ind2, data2)
+    denom_data = np.abs(denom_data)
 
     if denom_data.shape[0] == 0:
         return 0.0
 
     denominator = np.sum(denom_data)
 
+    if denominator == 0.0:
+        return 0.0
+
     _, numer_data = sparse_diff(ind1, data1, ind2, data2)
     numer_data = np.abs(numer_data)
 
@@ -323,8 +336,8 @@ def sparse_jaccard(ind1, data1, ind2, data2):
     ],
     fastmath=True,
     locals={
-        "num_non_zero": numba.types.float32,
-        "num_equal": numba.types.float32,
+        "num_non_zero": numba.types.intp,
+        "num_equal": numba.types.intp,
     },
 )
 def sparse_alternative_jaccard(ind1, data1, ind2, data2):
@@ -445,7 +458,7 @@ def sparse_cosine(ind1, data1, ind2, data2):
         "result": numba.types.float32,
         "norm_x": numba.types.float32,
         "norm_y": numba.types.float32,
-        "dim": numba.types.int32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -492,7 +505,7 @@ def sparse_dot(ind1, data1, ind2, data2):
     fastmath=True,
     locals={
         "result": numba.types.float32,
-        "dim": numba.types.int32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -598,7 +611,7 @@ def sparse_hellinger(ind1, data1, ind2, data2):
         "result": numba.types.float32,
         "l1_norm_x": numba.types.float32,
         "l1_norm_y": numba.types.float32,
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint16,
     },
 )
@@ -810,7 +823,7 @@ sparse_named_distances = {
     "canberra": sparse_canberra,
     "kantorovich": sparse_kantorovich,
     "wasserstein": sparse_kantorovich,
-    # 'braycurtis': sparse_bray_curtis,
+    "braycurtis": sparse_bray_curtis,
     # Binary distances
     "hamming": sparse_hamming,
     "jaccard": sparse_jaccard,


=====================================
pynndescent/tests/__pycache__/__init__.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/__init__.cpython-37.pyc and /dev/null differ


=====================================
pynndescent/tests/__pycache__/test_distances.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_distances.cpython-37.pyc and /dev/null differ


=====================================
pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc and /dev/null differ


=====================================
pynndescent/tests/__pycache__/test_rank.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_rank.cpython-37.pyc and /dev/null differ


=====================================
pynndescent/tests/test_distances.py
=====================================
@@ -1,11 +1,10 @@
 import numpy as np
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_array_almost_equal
 import pynndescent.distances as dist
 import pynndescent.sparse as spdist
 from scipy import sparse, stats
 from sklearn.metrics import pairwise_distances
 from sklearn.neighbors import BallTree
-from sklearn.utils.testing import assert_array_almost_equal
 
 np.random.seed(42)
 spatial_data = np.random.randn(10, 20)
@@ -315,6 +314,10 @@ def test_sparse_sokalsneath():
     sparse_binary_check("sokalsneath")
 
 
+def test_sparse_braycurtis():
+    sparse_spatial_check("braycurtis")
+
+
 def test_seuclidean():
     v = np.abs(np.random.randn(spatial_data.shape[1]))
     dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v)


=====================================
pynndescent/tests/test_pynndescent_.py
=====================================
@@ -413,6 +413,51 @@ def test_pickle_unpickle():
     np.testing.assert_equal(distances1, distances2)
 
 
+def test_compressed_pickle_unpickle():
+    seed = np.random.RandomState(42)
+
+    x1 = seed.normal(0, 100, (1000, 50))
+    x2 = seed.normal(0, 100, (1000, 50))
+
+    index1 = NNDescent(
+        x1,
+        "euclidean",
+        {},
+        10,
+        random_state=None,
+        compressed=True,
+    )
+    neighbors1, distances1 = index1.query(x2)
+
+    pickle.dump(index1, open("test_tmp.pkl", "wb"))
+    index2 = pickle.load(open("test_tmp.pkl", "rb"))
+    os.remove("test_tmp.pkl")
+
+    neighbors2, distances2 = index2.query(x2)
+
+    np.testing.assert_equal(neighbors1, neighbors2)
+    np.testing.assert_equal(distances1, distances2)
+
+
+def test_transformer_pickle_unpickle():
+    seed = np.random.RandomState(42)
+
+    x1 = seed.normal(0, 100, (1000, 50))
+    x2 = seed.normal(0, 100, (1000, 50))
+
+    index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1)
+    result1 = index1.transform(x2)
+
+    pickle.dump(index1, open("test_tmp.pkl", "wb"))
+    index2 = pickle.load(open("test_tmp.pkl", "rb"))
+    os.remove("test_tmp.pkl")
+
+    result2 = index2.transform(x2)
+
+    np.testing.assert_equal(result1.indices, result2.indices)
+    np.testing.assert_equal(result1.data, result2.data)
+
+
 def test_joblib_dump():
     seed = np.random.RandomState(42)
 


=====================================
pynndescent/utils.py
=====================================
@@ -67,7 +67,7 @@ def tau_rand(state):
         ),
     ],
     locals={
-        "dim": numba.types.uint32,
+        "dim": numba.types.intp,
         "i": numba.types.uint32,
         "result": numba.types.float32,
     },
@@ -620,7 +620,7 @@ def mark_visited(table, candidate):
     "i4(f4[::1],i4[::1],f4,i4)",
     fastmath=True,
     locals={
-        "size": numba.types.uint16,
+        "size": numba.types.intp,
         "i": numba.types.uint16,
         "ic1": numba.types.uint16,
         "ic2": numba.types.uint16,
@@ -676,7 +676,7 @@ def simple_heap_push(priorities, indices, p, n):
     "i4(f4[::1],i4[::1],f4,i4)",
     fastmath=True,
     locals={
-        "size": numba.types.uint16,
+        "size": numba.types.intp,
         "i": numba.types.uint16,
         "ic1": numba.types.uint16,
         "ic2": numba.types.uint16,
@@ -737,7 +737,7 @@ def checked_heap_push(priorities, indices, p, n):
     "i4(f4[::1],i4[::1],u1[::1],f4,i4,u1)",
     fastmath=True,
     locals={
-        "size": numba.types.uint16,
+        "size": numba.types.intp,
         "i": numba.types.uint16,
         "ic1": numba.types.uint16,
         "ic2": numba.types.uint16,
@@ -796,7 +796,7 @@ def flagged_heap_push(priorities, indices, flags, p, n, f):
     "i4(f4[::1],i4[::1],u1[::1],f4,i4,u1)",
     fastmath=True,
     locals={
-        "size": numba.types.uint16,
+        "size": numba.types.intp,
         "i": numba.types.uint16,
         "ic1": numba.types.uint16,
         "ic2": numba.types.uint16,


=====================================
setup.py
=====================================
@@ -8,7 +8,7 @@ def readme():
 
 configuration = {
     "name": "pynndescent",
-    "version": "0.5.1",
+    "version": "0.5.2",
     "description": "Nearest Neighbor Descent",
     "long_description": readme(),
     "classifiers": [



View it on GitLab: https://salsa.debian.org/med-team/python-pynndescent/-/compare/870d8598e8ef50ed13138755e469c277637c04f3...e6eb7a837d9954c09c153da53b54d53e8190238f

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-pynndescent/-/compare/870d8598e8ef50ed13138755e469c277637c04f3...e6eb7a837d9954c09c153da53b54d53e8190238f
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210218/c998beaa/attachment-0001.html>


More information about the debian-med-commit mailing list