[med-svn] [Git][med-team/python-pynndescent][upstream] 2 commits: New upstream version 0.5.2
Andreas Tille
gitlab at salsa.debian.org
Thu Feb 18 19:27:54 GMT 2021
Andreas Tille pushed to branch upstream at Debian Med / python-pynndescent
Commits:
0c865cba by Andreas Tille at 2021-02-18T20:13:48+01:00
New upstream version 0.5.2
- - - - -
e6eb7a83 by Andreas Tille at 2021-02-18T20:23:24+01:00
New upstream version 0.5.2+dfsg
- - - - -
17 changed files:
- PKG-INFO
- README.rst
- pynndescent.egg-info/PKG-INFO
- pynndescent.egg-info/SOURCES.txt
- pynndescent/distances.py
- + pynndescent/graph_utils.py
- pynndescent/pynndescent_.py
- pynndescent/rp_trees.py
- pynndescent/sparse.py
- â pynndescent/tests/__pycache__/__init__.cpython-37.pyc
- â pynndescent/tests/__pycache__/test_distances.cpython-37.pyc
- â pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc
- â pynndescent/tests/__pycache__/test_rank.cpython-37.pyc
- pynndescent/tests/test_distances.py
- pynndescent/tests/test_pynndescent_.py
- pynndescent/utils.py
- setup.py
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.2
Name: pynndescent
-Version: 0.5.1
+Version: 0.5.2
Summary: Nearest Neighbor Descent
Home-page: http://github.com/lmcinnes/pynndescent
Author: Leland McInnes
@@ -91,8 +91,11 @@ Description: .. image:: https://travis-ci.org/lmcinnes/pynndescent.svg
**Angular and correlation metrics**
- cosine
+ - dot
- correlation
- spearmanr
+ - tsss
+ - true_angular
**Probability metrics**
=====================================
README.rst
=====================================
@@ -81,8 +81,11 @@ supporting a wide variety of distance metrics by default:
**Angular and correlation metrics**
- cosine
+- dot
- correlation
- spearmanr
+- tsss
+- true_angular
**Probability metrics**
=====================================
pynndescent.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.2
Name: pynndescent
-Version: 0.5.1
+Version: 0.5.2
Summary: Nearest Neighbor Descent
Home-page: http://github.com/lmcinnes/pynndescent
Author: Leland McInnes
@@ -91,8 +91,11 @@ Description: .. image:: https://travis-ci.org/lmcinnes/pynndescent.svg
**Angular and correlation metrics**
- cosine
+ - dot
- correlation
- spearmanr
+ - tsss
+ - true_angular
**Probability metrics**
=====================================
pynndescent.egg-info/SOURCES.txt
=====================================
@@ -7,6 +7,7 @@ requirements.txt
setup.py
pynndescent/__init__.py
pynndescent/distances.py
+pynndescent/graph_utils.py
pynndescent/optimal_transport.py
pynndescent/pynndescent_.py
pynndescent/rp_trees.py
@@ -25,7 +26,11 @@ pynndescent/tests/test_distances.py
pynndescent/tests/test_pynndescent_.py
pynndescent/tests/test_rank.py
pynndescent/tests/__pycache__/__init__.cpython-37.pyc
+pynndescent/tests/__pycache__/__init__.cpython-38.pyc
pynndescent/tests/__pycache__/test_distances.cpython-37.pyc
+pynndescent/tests/__pycache__/test_distances.cpython-38-pytest-6.2.2.pyc
pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc
+pynndescent/tests/__pycache__/test_pynndescent_.cpython-38-pytest-6.2.2.pyc
pynndescent/tests/__pycache__/test_rank.cpython-37.pyc
+pynndescent/tests/__pycache__/test_rank.cpython-38-pytest-6.2.2.pyc
pynndescent/tests/test_data/cosine_hang.npy
\ No newline at end of file
=====================================
pynndescent/distances.py
=====================================
@@ -47,7 +47,7 @@ def euclidean(x, y):
locals={
"result": numba.types.float32,
"diff": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -230,7 +230,7 @@ def jaccard(x, y):
"num_equal": numba.types.float32,
"x_true": numba.types.uint8,
"y_true": numba.types.uint8,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -415,7 +415,7 @@ def cosine(x, y):
"result": numba.types.float32,
"norm_x": numba.types.float32,
"norm_y": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -445,7 +445,7 @@ def alternative_cosine(x, y):
fastmath=True,
locals={
"result": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -472,7 +472,7 @@ def dot(x, y):
fastmath=True,
locals={
"result": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -493,6 +493,59 @@ def correct_alternative_cosine(d):
return 1.0 - pow(2.0, -d)
+ at numba.njit(fastmath=True)
+def tsss(x, y):
+ d_euc_squared = 0.0
+ d_cos = 0.0
+ norm_x = 0.0
+ norm_y = 0.0
+ dim = x.shape[0]
+
+ for i in range(dim):
+ diff = x[i] - y[i]
+ d_euc_squared += diff * diff
+ d_cos += x[i] * y[i]
+ norm_x += x[i] * x[i]
+ norm_y += y[i] * y[i]
+
+ norm_x = np.sqrt(norm_x)
+ norm_y = np.sqrt(norm_y)
+ magnitude_difference = np.abs(norm_x - norm_y)
+ d_cos /= norm_x * norm_y
+ theta = np.arccos(d_cos) + np.radians(10) # Add 10 degrees as an "epsilon" to
+ # avoid problems
+ sector = ((np.sqrt(d_euc_squared) + magnitude_difference) ** 2) * theta
+ triangle = norm_x * norm_y * np.sin(theta) / 2.0
+ return triangle * sector
+
+
+ at numba.njit(fastmath=True)
+def true_angular(x, y):
+ result = 0.0
+ norm_x = 0.0
+ norm_y = 0.0
+ dim = x.shape[0]
+ for i in range(dim):
+ result += x[i] * y[i]
+ norm_x += x[i] * x[i]
+ norm_y += y[i] * y[i]
+
+ if norm_x == 0.0 and norm_y == 0.0:
+ return 0.0
+ elif norm_x == 0.0 or norm_y == 0.0:
+ return FLOAT32_MAX
+ elif result <= 0.0:
+ return FLOAT32_MAX
+ else:
+ result = result / np.sqrt(norm_x * norm_y)
+ return 1.0 - (np.arccos(result) / np.pi)
+
+
+ at numba.vectorize(fastmath=True)
+def true_angular_from_alt_cosine(d):
+ return 1.0 - (np.arccos(pow(2.0, -d)) / np.pi)
+
+
@numba.njit(fastmath=True, cache=True)
def correlation(x, y):
mu_x = 0.0
@@ -536,7 +589,7 @@ def correlation(x, y):
"result": numba.types.float32,
"l1_norm_x": numba.types.float32,
"l1_norm_y": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -572,7 +625,7 @@ def hellinger(x, y):
"result": numba.types.float32,
"l1_norm_x": numba.types.float32,
"l1_norm_y": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -738,6 +791,8 @@ named_distances = {
"spearmanr": spearmanr,
"kantorovich": kantorovich,
"wasserstein": kantorovich,
+ "tsss": tsss,
+ "true_angular": true_angular,
# Binary distances
"hamming": hamming,
"jaccard": jaccard,
@@ -762,6 +817,10 @@ fast_distance_alternatives = {
"l2": {"dist": squared_euclidean, "correction": np.sqrt},
"cosine": {"dist": alternative_cosine, "correction": correct_alternative_cosine},
"dot": {"dist": alternative_dot, "correction": correct_alternative_cosine},
+ "true_angular": {
+ "dist": alternative_cosine,
+ "correction": true_angular_from_alt_cosine,
+ },
"hellinger": {
"dist": alternative_hellinger,
"correction": correct_alternative_hellinger,
=====================================
pynndescent/graph_utils.py
=====================================
@@ -0,0 +1,242 @@
+import numba
+import numpy as np
+import heapq
+
+from scipy.sparse import coo_matrix
+from scipy.sparse.csgraph import connected_components
+from itertools import combinations
+
+import pynndescent.distances as pynnd_dist
+import joblib
+
+from pynndescent.utils import (
+ rejection_sample,
+ make_heap,
+ deheap_sort,
+ simple_heap_push,
+ has_been_visited,
+ mark_visited,
+)
+
+FLOAT32_EPS = np.finfo(np.float32).eps
+
+
+def create_component_search(index):
+ alternative_dot = pynnd_dist.alternative_dot
+ alternative_cosine = pynnd_dist.alternative_cosine
+
+ data = index._raw_data
+ indptr = index._search_graph.indptr
+ indices = index._search_graph.indices
+ dist = index._distance_func
+
+ @numba.njit(
+ fastmath=True,
+ nogil=True,
+ locals={
+ "current_query": numba.types.float32[::1],
+ "i": numba.types.uint32,
+ "j": numba.types.uint32,
+ "heap_priorities": numba.types.float32[::1],
+ "heap_indices": numba.types.int32[::1],
+ "candidate": numba.types.int32,
+ "vertex": numba.types.int32,
+ "d": numba.types.float32,
+ "d_vertex": numba.types.float32,
+ "visited": numba.types.uint8[::1],
+ "indices": numba.types.int32[::1],
+ "indptr": numba.types.int32[::1],
+ "data": numba.types.float32[:, ::1],
+ "heap_size": numba.types.int16,
+ "distance_scale": numba.types.float32,
+ "distance_bound": numba.types.float32,
+ "seed_scale": numba.types.float32,
+ },
+ )
+ def custom_search_closure(
+ query_points,
+ candidate_indices,
+ k,
+ epsilon,
+ visited,
+ ):
+ result = make_heap(query_points.shape[0], k)
+ distance_scale = 1.0 + epsilon
+
+ for i in range(query_points.shape[0]):
+ visited[:] = 0
+ if dist == alternative_dot or dist == alternative_cosine:
+ norm = np.sqrt((query_points[i] ** 2).sum())
+ if norm > 0.0:
+ current_query = query_points[i] / norm
+ else:
+ continue
+ else:
+ current_query = query_points[i]
+
+ heap_priorities = result[1][i]
+ heap_indices = result[0][i]
+ seed_set = [(np.float32(np.inf), np.int32(-1)) for j in range(0)]
+
+ ############ Init ################
+ n_initial_points = candidate_indices.shape[0]
+
+ for j in range(n_initial_points):
+ candidate = np.int32(candidate_indices[j])
+ d = dist(data[candidate], current_query)
+ # indices are guaranteed different
+ simple_heap_push(heap_priorities, heap_indices, d, candidate)
+ heapq.heappush(seed_set, (d, candidate))
+ mark_visited(visited, candidate)
+
+ ############ Search ##############
+ distance_bound = distance_scale * heap_priorities[0]
+
+ # Find smallest seed point
+ d_vertex, vertex = heapq.heappop(seed_set)
+
+ while d_vertex < distance_bound:
+
+ for j in range(indptr[vertex], indptr[vertex + 1]):
+
+ candidate = indices[j]
+
+ if has_been_visited(visited, candidate) == 0:
+ mark_visited(visited, candidate)
+
+ d = dist(data[candidate], current_query)
+
+ if d < distance_bound:
+ simple_heap_push(
+ heap_priorities, heap_indices, d, candidate
+ )
+ heapq.heappush(seed_set, (d, candidate))
+ # Update bound
+ distance_bound = distance_scale * heap_priorities[0]
+
+ # find new smallest seed point
+ if len(seed_set) == 0:
+ break
+ else:
+ d_vertex, vertex = heapq.heappop(seed_set)
+
+ return result
+
+ return custom_search_closure
+
+
+# @numba.njit(nogil=True)
+def find_component_connection_edge(
+ component1,
+ component2,
+ search_closure,
+ raw_data,
+ visited,
+ rng_state,
+ search_size=10,
+ epsilon=0.0,
+):
+ indices = [np.zeros(1, dtype=np.int64) for i in range(2)]
+ indices[0] = component1[
+ rejection_sample(np.int64(search_size), component1.shape[0], rng_state)
+ ]
+ indices[1] = component2[
+ rejection_sample(np.int64(search_size), component2.shape[0], rng_state)
+ ]
+ query_side = 0
+ query_points = raw_data[indices[query_side]]
+ candidate_indices = indices[1 - query_side].copy()
+ changed = [True, True]
+ best_dist = np.inf
+ best_edge = (indices[0][0], indices[1][0])
+
+ while changed[0] or changed[1]:
+ result = search_closure(
+ query_points, candidate_indices, search_size, epsilon, visited
+ )
+ inds, dists = deheap_sort(result)
+ for i in range(dists.shape[0]):
+ for j in range(dists.shape[1]):
+ if dists[i, j] < best_dist:
+ best_dist = dists[i, j]
+ best_edge = (indices[query_side][i], inds[i, j])
+ candidate_indices = indices[query_side]
+ new_indices = np.unique(inds[:, 0])
+ if indices[1 - query_side].shape[0] == new_indices.shape[0]:
+ changed[1 - query_side] = np.any(indices[1 - query_side] != new_indices)
+ indices[1 - query_side] = new_indices
+ query_points = raw_data[indices[1 - query_side]]
+ query_side = 1 - query_side
+
+ return best_edge[0], best_edge[1], best_dist
+
+
+def adjacency_matrix_representation(neighbor_indices, neighbor_distances):
+ result = coo_matrix(
+ (neighbor_indices.shape[0], neighbor_indices.shape[0]), dtype=np.float32
+ )
+
+ # Preserve any distance 0 points
+ neighbor_distances[neighbor_distances == 0.0] = FLOAT32_EPS
+
+ result.row = np.repeat(
+ np.arange(neighbor_indices.shape[0], dtype=np.int32),
+ neighbor_indices.shape[1],
+ )
+ result.col = neighbor_indices.ravel()
+ result.data = neighbor_distances.ravel()
+
+ # Get rid of any -1 index entries
+ result = result.tocsr()
+ result.data[result.indices == -1] = 0.0
+ result.eliminate_zeros()
+
+ # Symmetrize
+ result = result.maximum(result.T)
+
+ return result
+
+
+def connect_graph(graph, index, search_size=10, n_jobs=None):
+
+ search_closure = create_component_search(index)
+ n_components, component_ids = connected_components(graph)
+ result = graph.tolil()
+
+ # Translate component ids into internal vertex order
+ component_ids = component_ids[index._vertex_order]
+
+ def new_edge(c1, c2):
+ component1 = np.where(component_ids == c1)[0]
+ component2 = np.where(component_ids == c2)[0]
+
+ i, j, d = find_component_connection_edge(
+ component1,
+ component2,
+ search_closure,
+ index._raw_data,
+ index._visited,
+ index.rng_state,
+ search_size=search_size,
+ )
+
+ # Correct the distance if required
+ if index._distance_correction is not None:
+ d = index._distance_correction(d)
+
+ # Convert indices to original data order
+ i = index._vertex_order[i]
+ j = index._vertex_order[j]
+
+ return i, j, d
+
+ new_edges = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+ joblib.delayed(new_edge)(c1, c2)
+ for c1, c2 in combinations(range(n_components), 2)
+ )
+
+ for i, j, d in new_edges:
+ result[i, j] = d
+ result[j, i] = d
+
+ return result.tocsr()
=====================================
pynndescent/pynndescent_.py
=====================================
@@ -855,7 +855,9 @@ class NNDescent(object):
if init_graph is None:
_init_graph = EMPTY_GRAPH
else:
- _init_graph = make_heap(init_graph.shape[0], init_graph.shape[1])
+ if init_graph.shape[0] != self._raw_data.shape[0]:
+ raise ValueError("Init graph size does not match dataset size!")
+ _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
_init_graph = sparse_initalize_heap_from_graph_indices(
_init_graph,
init_graph,
@@ -892,7 +894,9 @@ class NNDescent(object):
if init_graph is None:
_init_graph = EMPTY_GRAPH
else:
- _init_graph = make_heap(init_graph.shape[0], init_graph.shape[1])
+ if init_graph.shape[0] != self._raw_data.shape[0]:
+ raise ValueError("Init graph size does not match dataset size!")
+ _init_graph = make_heap(init_graph.shape[0], self.n_neighbors)
_init_graph = initalize_heap_from_graph_indices(
_init_graph, init_graph, data, self._distance_func
)
@@ -952,21 +956,40 @@ class NNDescent(object):
numba.set_num_threads(self.n_jobs)
if not hasattr(self, "_search_forest"):
- tree_scores = [
- score_linked_tree(tree, self._neighbor_graph[0])
- for tree in self._rp_forest
- ]
- if self.verbose:
- print(ts(), "Worst tree score: {:.8f}".format(np.min(tree_scores)))
- print(ts(), "Mean tree score: {:.8f}".format(np.mean(tree_scores)))
- print(ts(), "Best tree score: {:.8f}".format(np.max(tree_scores)))
- best_tree_indices = np.argsort(tree_scores)[: self.n_search_trees]
- best_trees = [self._rp_forest[idx] for idx in best_tree_indices]
- del self._rp_forest
- self._search_forest = [
- convert_tree_format(tree, self._raw_data.shape[0])
- for tree in best_trees
- ]
+ if self._rp_forest is None:
+ # We don't have a forest, so make a small search forest
+ current_random_state = check_random_state(self.random_state)
+ rp_forest = make_forest(
+ self._raw_data,
+ self.n_neighbors,
+ self.n_search_trees,
+ self.leaf_size,
+ self.rng_state,
+ current_random_state,
+ self.n_jobs,
+ self._angular_trees,
+ )
+ self._search_forest = [
+ convert_tree_format(tree, self._raw_data.shape[0])
+ for tree in rp_forest
+ ]
+ else:
+ # convert the best trees into a search forest
+ tree_scores = [
+ score_linked_tree(tree, self._neighbor_graph[0])
+ for tree in self._rp_forest
+ ]
+ if self.verbose:
+ print(ts(), "Worst tree score: {:.8f}".format(np.min(tree_scores)))
+ print(ts(), "Mean tree score: {:.8f}".format(np.mean(tree_scores)))
+ print(ts(), "Best tree score: {:.8f}".format(np.max(tree_scores)))
+ best_tree_indices = np.argsort(tree_scores)[: self.n_search_trees]
+ best_trees = [self._rp_forest[idx] for idx in best_tree_indices]
+ del self._rp_forest
+ self._search_forest = [
+ convert_tree_format(tree, self._raw_data.shape[0])
+ for tree in best_trees
+ ]
nnz_pre_diversify = np.sum(self._neighbor_graph[0] >= 0)
if self._is_sparse:
@@ -1079,7 +1102,7 @@ class NNDescent(object):
self._search_graph.sort_indices()
self._search_graph = self._search_graph.maximum(reverse_graph).tocsr()
- # Eliminate the diagonal0]
+ # Eliminate the diagonal
self._search_graph.setdiag(0.0)
self._search_graph.eliminate_zeros()
@@ -1531,6 +1554,7 @@ class NNDescent(object):
def compress_index(self):
import gc
+ self.prepare()
self.compressed = True
if hasattr(self, "_rp_forest"):
=====================================
pynndescent/rp_trees.py
=====================================
@@ -853,7 +853,7 @@ def make_sparse_tree(inds, indptr, spdata, rng_state, leaf_size=30, angular=Fals
fastmath=True,
locals={
"margin": numba.types.float32,
- "dim": numba.types.uint16,
+ "dim": numba.types.intp,
"d": numba.types.uint16,
},
)
@@ -984,7 +984,7 @@ def make_forest(
)
try:
if scipy.sparse.isspmatrix_csr(data):
- result = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+ result = joblib.Parallel(n_jobs=n_jobs, require="sharedmem")(
joblib.delayed(make_sparse_tree)(
data.indices,
data.indptr,
@@ -996,7 +996,7 @@ def make_forest(
for i in range(n_trees)
)
else:
- result = joblib.Parallel(n_jobs=n_jobs, prefer="threads")(
+ result = joblib.Parallel(n_jobs=n_jobs, require="sharedmem")(
joblib.delayed(make_dense_tree)(data, rng_states[i], leaf_size, angular)
for i in range(n_trees)
)
@@ -1029,10 +1029,9 @@ def get_leaves_from_tree(tree):
def rptree_leaf_array_parallel(rp_forest):
- result = joblib.Parallel(n_jobs=-1, prefer="threads")(
+ result = joblib.Parallel(n_jobs=-1, require="sharedmem")(
joblib.delayed(get_leaves_from_tree)(rp_tree) for rp_tree in rp_forest
)
- # result = [get_leaves_from_tree(rp_tree) for rp_tree in rp_forest]
return result
=====================================
pynndescent/sparse.py
=====================================
@@ -218,7 +218,7 @@ def sparse_euclidean(ind1, data1, ind2, data2):
"aux_data": numba.types.float32[::1],
"result": numba.types.float32,
"diff": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -281,17 +281,30 @@ def sparse_canberra(ind1, data1, ind2, data2):
return result
- at numba.njit()
+ at numba.njit(
+ [
+ "f4(i4[::1],f4[::1],i4[::1],f4[::1])",
+ numba.types.float32(
+ numba.types.Array(numba.types.int32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.int32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+ ),
+ ],
+ fastmath=True,
+)
def sparse_bray_curtis(ind1, data1, ind2, data2): # pragma: no cover
- abs_data1 = np.abs(data1)
- abs_data2 = np.abs(data2)
- _, denom_data = sparse_sum(ind1, abs_data1, ind2, abs_data2)
+ _, denom_data = sparse_sum(ind1, data1, ind2, data2)
+ denom_data = np.abs(denom_data)
if denom_data.shape[0] == 0:
return 0.0
denominator = np.sum(denom_data)
+ if denominator == 0.0:
+ return 0.0
+
_, numer_data = sparse_diff(ind1, data1, ind2, data2)
numer_data = np.abs(numer_data)
@@ -323,8 +336,8 @@ def sparse_jaccard(ind1, data1, ind2, data2):
],
fastmath=True,
locals={
- "num_non_zero": numba.types.float32,
- "num_equal": numba.types.float32,
+ "num_non_zero": numba.types.intp,
+ "num_equal": numba.types.intp,
},
)
def sparse_alternative_jaccard(ind1, data1, ind2, data2):
@@ -445,7 +458,7 @@ def sparse_cosine(ind1, data1, ind2, data2):
"result": numba.types.float32,
"norm_x": numba.types.float32,
"norm_y": numba.types.float32,
- "dim": numba.types.int32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -492,7 +505,7 @@ def sparse_dot(ind1, data1, ind2, data2):
fastmath=True,
locals={
"result": numba.types.float32,
- "dim": numba.types.int32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -598,7 +611,7 @@ def sparse_hellinger(ind1, data1, ind2, data2):
"result": numba.types.float32,
"l1_norm_x": numba.types.float32,
"l1_norm_y": numba.types.float32,
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint16,
},
)
@@ -810,7 +823,7 @@ sparse_named_distances = {
"canberra": sparse_canberra,
"kantorovich": sparse_kantorovich,
"wasserstein": sparse_kantorovich,
- # 'braycurtis': sparse_bray_curtis,
+ "braycurtis": sparse_bray_curtis,
# Binary distances
"hamming": sparse_hamming,
"jaccard": sparse_jaccard,
=====================================
pynndescent/tests/__pycache__/__init__.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/__init__.cpython-37.pyc and /dev/null differ
=====================================
pynndescent/tests/__pycache__/test_distances.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_distances.cpython-37.pyc and /dev/null differ
=====================================
pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_pynndescent_.cpython-37.pyc and /dev/null differ
=====================================
pynndescent/tests/__pycache__/test_rank.cpython-37.pyc deleted
=====================================
Binary files a/pynndescent/tests/__pycache__/test_rank.cpython-37.pyc and /dev/null differ
=====================================
pynndescent/tests/test_distances.py
=====================================
@@ -1,11 +1,10 @@
import numpy as np
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_array_almost_equal
import pynndescent.distances as dist
import pynndescent.sparse as spdist
from scipy import sparse, stats
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import BallTree
-from sklearn.utils.testing import assert_array_almost_equal
np.random.seed(42)
spatial_data = np.random.randn(10, 20)
@@ -315,6 +314,10 @@ def test_sparse_sokalsneath():
sparse_binary_check("sokalsneath")
+def test_sparse_braycurtis():
+ sparse_spatial_check("braycurtis")
+
+
def test_seuclidean():
v = np.abs(np.random.randn(spatial_data.shape[1]))
dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v)
=====================================
pynndescent/tests/test_pynndescent_.py
=====================================
@@ -413,6 +413,51 @@ def test_pickle_unpickle():
np.testing.assert_equal(distances1, distances2)
+def test_compressed_pickle_unpickle():
+ seed = np.random.RandomState(42)
+
+ x1 = seed.normal(0, 100, (1000, 50))
+ x2 = seed.normal(0, 100, (1000, 50))
+
+ index1 = NNDescent(
+ x1,
+ "euclidean",
+ {},
+ 10,
+ random_state=None,
+ compressed=True,
+ )
+ neighbors1, distances1 = index1.query(x2)
+
+ pickle.dump(index1, open("test_tmp.pkl", "wb"))
+ index2 = pickle.load(open("test_tmp.pkl", "rb"))
+ os.remove("test_tmp.pkl")
+
+ neighbors2, distances2 = index2.query(x2)
+
+ np.testing.assert_equal(neighbors1, neighbors2)
+ np.testing.assert_equal(distances1, distances2)
+
+
+def test_transformer_pickle_unpickle():
+ seed = np.random.RandomState(42)
+
+ x1 = seed.normal(0, 100, (1000, 50))
+ x2 = seed.normal(0, 100, (1000, 50))
+
+ index1 = PyNNDescentTransformer(n_neighbors=10).fit(x1)
+ result1 = index1.transform(x2)
+
+ pickle.dump(index1, open("test_tmp.pkl", "wb"))
+ index2 = pickle.load(open("test_tmp.pkl", "rb"))
+ os.remove("test_tmp.pkl")
+
+ result2 = index2.transform(x2)
+
+ np.testing.assert_equal(result1.indices, result2.indices)
+ np.testing.assert_equal(result1.data, result2.data)
+
+
def test_joblib_dump():
seed = np.random.RandomState(42)
=====================================
pynndescent/utils.py
=====================================
@@ -67,7 +67,7 @@ def tau_rand(state):
),
],
locals={
- "dim": numba.types.uint32,
+ "dim": numba.types.intp,
"i": numba.types.uint32,
"result": numba.types.float32,
},
@@ -620,7 +620,7 @@ def mark_visited(table, candidate):
"i4(f4[::1],i4[::1],f4,i4)",
fastmath=True,
locals={
- "size": numba.types.uint16,
+ "size": numba.types.intp,
"i": numba.types.uint16,
"ic1": numba.types.uint16,
"ic2": numba.types.uint16,
@@ -676,7 +676,7 @@ def simple_heap_push(priorities, indices, p, n):
"i4(f4[::1],i4[::1],f4,i4)",
fastmath=True,
locals={
- "size": numba.types.uint16,
+ "size": numba.types.intp,
"i": numba.types.uint16,
"ic1": numba.types.uint16,
"ic2": numba.types.uint16,
@@ -737,7 +737,7 @@ def checked_heap_push(priorities, indices, p, n):
"i4(f4[::1],i4[::1],u1[::1],f4,i4,u1)",
fastmath=True,
locals={
- "size": numba.types.uint16,
+ "size": numba.types.intp,
"i": numba.types.uint16,
"ic1": numba.types.uint16,
"ic2": numba.types.uint16,
@@ -796,7 +796,7 @@ def flagged_heap_push(priorities, indices, flags, p, n, f):
"i4(f4[::1],i4[::1],u1[::1],f4,i4,u1)",
fastmath=True,
locals={
- "size": numba.types.uint16,
+ "size": numba.types.intp,
"i": numba.types.uint16,
"ic1": numba.types.uint16,
"ic2": numba.types.uint16,
=====================================
setup.py
=====================================
@@ -8,7 +8,7 @@ def readme():
configuration = {
"name": "pynndescent",
- "version": "0.5.1",
+ "version": "0.5.2",
"description": "Nearest Neighbor Descent",
"long_description": readme(),
"classifiers": [
View it on GitLab: https://salsa.debian.org/med-team/python-pynndescent/-/compare/870d8598e8ef50ed13138755e469c277637c04f3...e6eb7a837d9954c09c153da53b54d53e8190238f
--
View it on GitLab: https://salsa.debian.org/med-team/python-pynndescent/-/compare/870d8598e8ef50ed13138755e469c277637c04f3...e6eb7a837d9954c09c153da53b54d53e8190238f
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210218/c998beaa/attachment-0001.html>
More information about the debian-med-commit
mailing list