[med-svn] [Git][python-team/packages/python-pynndescent][upstream] New upstream version 0.5.7
Andreas Tille (@tille)
gitlab at salsa.debian.org
Sun Jul 17 20:42:11 BST 2022
Andreas Tille pushed to branch upstream at Debian Python Team / packages / python-pynndescent
Commits:
d3f6540a by Andreas Tille at 2022-07-17T16:57:11+02:00
New upstream version 0.5.7
- - - - -
13 changed files:
- PKG-INFO
- pynndescent.egg-info/PKG-INFO
- pynndescent/distances.py
- pynndescent/graph_utils.py
- pynndescent/optimal_transport.py
- pynndescent/pynndescent_.py
- pynndescent/rp_trees.py
- pynndescent/sparse.py
- pynndescent/sparse_nndescent.py
- pynndescent/tests/test_distances.py
- pynndescent/tests/test_pynndescent_.py
- pynndescent/utils.py
- setup.py
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.2
Name: pynndescent
-Version: 0.5.6
+Version: 0.5.7
Summary: Nearest Neighbor Descent
Home-page: http://github.com/lmcinnes/pynndescent
Author: Leland McInnes
=====================================
pynndescent.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 1.2
Name: pynndescent
-Version: 0.5.6
+Version: 0.5.7
Summary: Nearest Neighbor Descent
Home-page: http://github.com/lmcinnes/pynndescent
Author: Leland McInnes
=====================================
pynndescent/distances.py
=====================================
@@ -142,7 +142,7 @@ def weighted_minkowski(x, y, w=_mock_ones, p=2):
"""
result = 0.0
for i in range(x.shape[0]):
- result += (w[i] * np.abs(x[i] - y[i])) ** p
+ result += w[i] * np.abs(x[i] - y[i]) ** p
return result ** (1.0 / p)
=====================================
pynndescent/graph_utils.py
=====================================
@@ -145,10 +145,10 @@ def find_component_connection_edge(
best_edge = (indices[0][0], indices[1][0])
while changed[0] or changed[1]:
- result = search_closure(
+ inds, dists, _ = search_closure(
query_points, candidate_indices, search_size, epsilon, visited
)
- inds, dists = deheap_sort(result)
+ inds, dists = deheap_sort(inds, dists)
for i in range(dists.shape[0]):
for j in range(dists.shape[1]):
if dists[i, j] < best_dist:
=====================================
pynndescent/optimal_transport.py
=====================================
@@ -935,7 +935,7 @@ def network_simplex_core(node_arc_data, spanning_tree, graph, max_iter):
fastmath=True,
parallel=True,
locals={"diff": numba.float32, "result": numba.float32},
- cache=True,
+ cache=False,
)
def right_marginal_error(u, K, v, y):
uK = u @ K
@@ -950,7 +950,7 @@ def right_marginal_error(u, K, v, y):
fastmath=True,
parallel=True,
locals={"diff": numba.float32, "result": numba.float32},
- cache=True,
+ cache=False,
)
def right_marginal_error_batch(u, K, v, y):
uK = K.T @ u
@@ -962,7 +962,7 @@ def right_marginal_error_batch(u, K, v, y):
return np.sqrt(result)
- at numba.njit(fastmath=True, parallel=True, cache=True)
+ at numba.njit(fastmath=True, parallel=True, cache=False)
def transport_plan(K, u, v):
i_dim = K.shape[0]
j_dim = K.shape[1]
@@ -974,7 +974,7 @@ def transport_plan(K, u, v):
return result
- at numba.njit(fastmath=True, parallel=True, locals={"result": numba.float32}, cache=True)
+ at numba.njit(fastmath=True, parallel=True, locals={"result": numba.float32}, cache=False)
def relative_change_in_plan(old_u, old_v, new_u, new_v):
i_dim = old_u.shape[0]
j_dim = old_v.shape[0]
@@ -987,7 +987,7 @@ def relative_change_in_plan(old_u, old_v, new_u, new_v):
return result / (i_dim * j_dim)
- at numba.njit(fastmath=True, parallel=True, cache=True)
+ at numba.njit(fastmath=True, parallel=True, cache=False)
def precompute_K_prime(K, x):
i_dim = K.shape[0]
j_dim = K.shape[1]
@@ -1003,7 +1003,7 @@ def precompute_K_prime(K, x):
return result
- at numba.njit(fastmath=True, parallel=True, cache=True)
+ at numba.njit(fastmath=True, parallel=True, cache=False)
def K_from_cost(cost, regularization):
i_dim = cost.shape[0]
j_dim = cost.shape[1]
@@ -1131,7 +1131,7 @@ def sinkhorn_distance(x, y, cost=_dummy_cost, regularization=1.0):
return result
- at numba.njit(fastmath=True, parallel=True, cache=True)
+ at numba.njit(fastmath=True, parallel=True, cache=False)
def sinkhorn_distance_batch(x, y, cost=_dummy_cost, regularization=1.0):
dim_x = x.shape[0]
dim_y = y.shape[0]
=====================================
pynndescent/pynndescent_.py
=====================================
@@ -63,7 +63,7 @@ def is_c_contiguous(array_like):
return flags is not None and flags["C_CONTIGUOUS"]
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def generate_leaf_updates(leaf_block, dist_thresholds, data, dist):
updates = [[(-1, -1, np.inf)] for i in range(leaf_block.shape[0])]
@@ -156,7 +156,7 @@ def init_from_neighbor_graph(heap, indices, distances):
return
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def generate_graph_updates(
new_candidate_block, old_candidate_block, dist_thresholds, data, dist
):
@@ -372,7 +372,7 @@ def nn_descent(
verbose=verbose,
)
- return deheap_sort(current_graph)
+ return deheap_sort(current_graph[0], current_graph[1])
@numba.njit(parallel=True)
@@ -946,22 +946,25 @@ class NNDescent:
if not hasattr(self, "_search_forest"):
if self._rp_forest is None:
- # We don't have a forest, so make a small search forest
- current_random_state = check_random_state(self.random_state)
- rp_forest = make_forest(
- self._raw_data,
- self.n_neighbors,
- self.n_search_trees,
- self.leaf_size,
- self.rng_state,
- current_random_state,
- self.n_jobs,
- self._angular_trees,
- )
- self._search_forest = [
- convert_tree_format(tree, self._raw_data.shape[0])
- for tree in rp_forest
- ]
+ if self.tree_init:
+ # We don't have a forest, so make a small search forest
+ current_random_state = check_random_state(self.random_state)
+ rp_forest = make_forest(
+ self._raw_data,
+ self.n_neighbors,
+ self.n_search_trees,
+ self.leaf_size,
+ self.rng_state,
+ current_random_state,
+ self.n_jobs,
+ self._angular_trees,
+ )
+ self._search_forest = [
+ convert_tree_format(tree, self._raw_data.shape[0])
+ for tree in rp_forest
+ ]
+ else:
+ self._search_forest = []
else:
# convert the best trees into a search forest
tree_scores = [
@@ -1118,22 +1121,26 @@ class NNDescent:
# reorder according to the search tree leaf order
if self.verbose:
print(ts(), "Resorting data and graph based on tree order")
- self._vertex_order = self._search_forest[0].indices
- row_ordered_graph = self._search_graph[self._vertex_order, :].tocsc()
- self._search_graph = row_ordered_graph[:, self._vertex_order]
- self._search_graph = self._search_graph.tocsr()
- self._search_graph.sort_indices()
- if self._is_sparse:
- self._raw_data = self._raw_data[self._vertex_order, :]
- else:
- self._raw_data = np.ascontiguousarray(self._raw_data[self._vertex_order, :])
+ if self.tree_init:
+ self._vertex_order = self._search_forest[0].indices
+ row_ordered_graph = self._search_graph[self._vertex_order, :].tocsc()
+ self._search_graph = row_ordered_graph[:, self._vertex_order]
+ self._search_graph = self._search_graph.tocsr()
+ self._search_graph.sort_indices()
- tree_order = np.argsort(self._vertex_order)
- self._search_forest = tuple(
- resort_tree_indices(tree, tree_order)
- for tree in self._search_forest[: self.n_search_trees]
- )
+ if self._is_sparse:
+ self._raw_data = self._raw_data[self._vertex_order, :]
+ else:
+ self._raw_data = np.ascontiguousarray(self._raw_data[self._vertex_order, :])
+
+ tree_order = np.argsort(self._vertex_order)
+ self._search_forest = tuple(
+ resort_tree_indices(tree, tree_order)
+ for tree in self._search_forest[: self.n_search_trees]
+ )
+ else:
+ self._vertex_order = np.arange(self._raw_data.shape[0])
if self.compressed:
if self.verbose:
@@ -1149,34 +1156,42 @@ class NNDescent:
if self.verbose:
print(ts(), "Building and compiling search function")
- tree_hyperplanes = self._search_forest[0].hyperplanes
- tree_offsets = self._search_forest[0].offsets
- tree_indices = self._search_forest[0].indices
- tree_children = self._search_forest[0].children
+ if self.tree_init:
+ tree_hyperplanes = self._search_forest[0].hyperplanes
+ tree_offsets = self._search_forest[0].offsets
+ tree_indices = self._search_forest[0].indices
+ tree_children = self._search_forest[0].children
+
+ @numba.njit(
+ [
+ numba.types.Array(numba.types.int32, 1, "C", readonly=True)(
+ numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.int64, 1, "C", readonly=False),
+ )
+ ],
+ locals={"node": numba.types.uint32, "side": numba.types.boolean},
+ )
+ def tree_search_closure(point, rng_state):
+ node = 0
+ while tree_children[node, 0] > 0:
+ side = select_side(
+ tree_hyperplanes[node], tree_offsets[node], point, rng_state
+ )
+ if side == 0:
+ node = tree_children[node, 0]
+ else:
+ node = tree_children[node, 1]
- @numba.njit(
- [
- numba.types.Array(numba.types.int32, 1, "C", readonly=True)(
- numba.types.Array(numba.types.float32, 1, "C", readonly=True),
- numba.types.Array(numba.types.int64, 1, "C", readonly=False),
- )
- ],
- locals={"node": numba.types.uint32, "side": numba.types.boolean},
- )
- def tree_search_closure(point, rng_state):
- node = 0
- while tree_children[node, 0] > 0:
- side = select_side(
- tree_hyperplanes[node], tree_offsets[node], point, rng_state
- )
- if side == 0:
- node = tree_children[node, 0]
- else:
- node = tree_children[node, 1]
+ return -tree_children[node]
- return -tree_children[node]
+ self._tree_search = tree_search_closure
+ else:
+ @numba.njit()
+ def tree_search_closure(point, rng_state):
+ return (0, 0)
- self._tree_search = tree_search_closure
+ self._tree_search = tree_search_closure
+ tree_indices = np.zeros(1, dtype=np.int64)
alternative_dot = pynnd_dist.alternative_dot
alternative_cosine = pynnd_dist.alternative_cosine
@@ -1301,50 +1316,63 @@ class NNDescent:
return result
self._search_function = search_closure
+ self._deheap_function = numba.njit(parallel=self.parallel_batch_queries)(
+ deheap_sort.py_func
+ )
+
# Force compilation of the search function (hardcoded k, epsilon)
query_data = self._raw_data[:1]
- _ = self._search_function(
+ inds, dists, _ = self._search_function(
query_data, 5, 0.0, self._visited, self.search_rng_state
)
+ _ = self._deheap_function(inds, dists)
def _init_sparse_search_function(self):
if self.verbose:
print(ts(), "Building and compiling sparse search function")
- tree_hyperplanes = self._search_forest[0].hyperplanes
- tree_offsets = self._search_forest[0].offsets
- tree_indices = self._search_forest[0].indices
- tree_children = self._search_forest[0].children
+ if self.tree_init:
+ tree_hyperplanes = self._search_forest[0].hyperplanes
+ tree_offsets = self._search_forest[0].offsets
+ tree_indices = self._search_forest[0].indices
+ tree_children = self._search_forest[0].children
+
+ @numba.njit(
+ [
+ numba.types.Array(numba.types.int32, 1, "C", readonly=True)(
+ numba.types.Array(numba.types.int32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.float32, 1, "C", readonly=True),
+ numba.types.Array(numba.types.int64, 1, "C", readonly=False),
+ )
+ ],
+ locals={"node": numba.types.uint32, "side": numba.types.boolean},
+ )
+ def sparse_tree_search_closure(point_inds, point_data, rng_state):
+ node = 0
+ while tree_children[node, 0] > 0:
+ side = sparse_select_side(
+ tree_hyperplanes[node],
+ tree_offsets[node],
+ point_inds,
+ point_data,
+ rng_state,
+ )
+ if side == 0:
+ node = tree_children[node, 0]
+ else:
+ node = tree_children[node, 1]
- @numba.njit(
- [
- numba.types.Array(numba.types.int32, 1, "C", readonly=True)(
- numba.types.Array(numba.types.int32, 1, "C", readonly=True),
- numba.types.Array(numba.types.float32, 1, "C", readonly=True),
- numba.types.Array(numba.types.int64, 1, "C", readonly=False),
- )
- ],
- locals={"node": numba.types.uint32, "side": numba.types.boolean},
- )
- def sparse_tree_search_closure(point_inds, point_data, rng_state):
- node = 0
- while tree_children[node, 0] > 0:
- side = sparse_select_side(
- tree_hyperplanes[node],
- tree_offsets[node],
- point_inds,
- point_data,
- rng_state,
- )
- if side == 0:
- node = tree_children[node, 0]
- else:
- node = tree_children[node, 1]
+ return -tree_children[node]
- return -tree_children[node]
+ self._tree_search = sparse_tree_search_closure
+ else:
+ @numba.njit()
+ def sparse_tree_search_closure(point_inds, point_data, rng_state):
+ return (0, 0)
- self._tree_search = sparse_tree_search_closure
+ self._tree_search = sparse_tree_search_closure
+ tree_indices = np.zeros(1, dtype=np.int64)
from pynndescent.distances import alternative_dot, alternative_cosine
@@ -1508,10 +1536,13 @@ class NNDescent:
return result
self._search_function = search_closure
+ self._deheap_function = numba.njit(parallel=self.parallel_batch_queries)(
+ deheap_sort.py_func
+ )
# Force compilation of the search function (hardcoded k, epsilon)
query_data = self._raw_data[:1]
- _ = self._search_function(
+ inds, dists, _ = self._search_function(
query_data.indices,
query_data.indptr,
query_data.data,
@@ -1520,6 +1551,7 @@ class NNDescent:
self._visited,
self.search_rng_state,
)
+ _ = self._deheap_function(inds, dists)
@property
def neighbor_graph(self):
@@ -1600,7 +1632,7 @@ class NNDescent:
self._init_search_function()
query_data = np.asarray(query_data).astype(np.float32, order="C")
- result = self._search_function(
+ indices, dists, _ = self._search_function(
query_data, k, epsilon, self._visited, self.search_rng_state
)
else:
@@ -1614,7 +1646,7 @@ class NNDescent:
if not query_data.has_sorted_indices:
query_data.sort_indices()
- result = self._search_function(
+ indices, dists, _ = self._search_function(
query_data.indices,
query_data.indptr,
query_data.data,
@@ -1624,7 +1656,7 @@ class NNDescent:
self.search_rng_state,
)
- indices, dists = deheap_sort(result)
+ indices, dists = self._deheap_function(indices, dists)
# Sort to input graph_data order
indices = self._vertex_order[indices]
@@ -1634,16 +1666,16 @@ class NNDescent:
return indices, dists
def update(self, X):
- if not hasattr(self, "_search_graph"):
- self._init_search_graph()
-
current_random_state = check_random_state(self.random_state)
rng_state = current_random_state.randint(INT32_MIN, INT32_MAX, 3).astype(
np.int64
)
X = check_array(X, dtype=np.float32, accept_sparse="csr", order="C")
- original_order = np.argsort(self._vertex_order)
+ if hasattr(self, "_vertex_order"):
+ original_order = np.argsort(self._vertex_order)
+ else:
+ original_order = np.ones(self._raw_data.shape[0], dtype=np.bool_)
if self._is_sparse:
self._raw_data = sparse_vstack([self._raw_data, X])
@@ -1693,6 +1725,24 @@ class NNDescent:
verbose=self.verbose,
)
+ # Remove search graph and search function
+ # and rerun prepare if it was run previously
+ if (
+ hasattr(self, "_search_graph") or
+ hasattr(self, "_search_function") or
+ hasattr(self, "_search_forest")
+ ):
+ if hasattr(self, "_search_graph"):
+ del self._search_graph
+
+ if hasattr(self, "_search_forest"):
+ del self._search_forest
+
+ if hasattr(self, "_search_function"):
+ del self._search_function
+
+ self.prepare()
+
class PyNNDescentTransformer(BaseEstimator, TransformerMixin):
"""PyNNDescentTransformer for fast approximate nearest neighbor transformer.
=====================================
pynndescent/rp_trees.py
=====================================
@@ -1219,7 +1219,7 @@ def renumbaify_tree(tree):
"result": numba.float32,
"i": numba.uint32,
},
- cache=True,
+ cache=False,
)
def score_tree(tree, neighbor_indices, data, rng_state):
result = 0.0
@@ -1237,11 +1237,11 @@ def score_tree(tree, neighbor_indices, data, rng_state):
return result / numba.float32(neighbor_indices.shape[0])
- at numba.njit(nogil=True, parallel=True, locals={"node": numba.int32}, cache=True)
+ at numba.njit(nogil=True, locals={"node": numba.int32}, cache=False)
def score_linked_tree(tree, neighbor_indices):
result = 0.0
n_nodes = len(tree.children)
- for i in numba.prange(n_nodes):
+ for i in range(n_nodes):
node = numba.int32(i)
left_child = tree.children[node][0]
right_child = tree.children[node][1]
=====================================
pynndescent/sparse.py
=====================================
@@ -938,7 +938,7 @@ def sparse_symmetric_kl_divergence(ind1, data1, ind2, data2):
return symmetric_kl_divergence(dense_data1, dense_data2)
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def diversify(
indices,
distances,
@@ -993,7 +993,7 @@ def diversify(
return indices, distances
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def diversify_csr(
graph_indptr,
graph_indices,
=====================================
pynndescent/sparse_nndescent.py
=====================================
@@ -24,7 +24,7 @@ locale.setlocale(locale.LC_NUMERIC, "C")
EMPTY_GRAPH = make_heap(1, 1)
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def generate_leaf_updates(leaf_block, dist_thresholds, inds, indptr, data, dist):
updates = [[(-1, -1, np.inf)] for i in range(leaf_block.shape[0])]
@@ -122,7 +122,7 @@ def init_random(n_neighbors, inds, indptr, data, heap, dist, rng_state):
return
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def generate_graph_updates(
new_candidate_block, old_candidate_block, dist_thresholds, inds, indptr, data, dist
):
@@ -343,4 +343,4 @@ def nn_descent(
verbose=verbose,
)
- return deheap_sort(current_graph)
+ return deheap_sort(current_graph[0], current_graph[1])
=====================================
pynndescent/tests/test_distances.py
=====================================
@@ -5,6 +5,7 @@ import pynndescent.distances as dist
import pynndescent.sparse as spdist
from scipy import stats
from scipy.sparse import csr_matrix
+from scipy.version import full_version as scipy_full_version
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import BallTree
from sklearn.preprocessing import normalize
@@ -241,9 +242,12 @@ def test_seuclidean(spatial_data):
)
+ at pytest.mark.skipif(
+ scipy_full_version < "1.8", reason="incorrect function in scipy<1.8"
+)
def test_weighted_minkowski(spatial_data):
v = np.abs(np.random.randn(spatial_data.shape[1]))
- dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3)
+ dist_matrix = pairwise_distances(spatial_data, metric="minkowski", w=v, p=3)
test_matrix = np.array(
[
[
=====================================
pynndescent/tests/test_pynndescent_.py
=====================================
@@ -435,3 +435,65 @@ def test_joblib_dump():
np.testing.assert_equal(neighbors1, neighbors2)
np.testing.assert_equal(distances1, distances2)
+
+ at pytest.mark.parametrize("metric", ["euclidean", "cosine"])
+def test_update_no_prepare_query_accuracy(nn_data, metric):
+ nnd = NNDescent(nn_data[200:800], metric=metric, n_neighbors=10, random_state=None)
+ nnd.update(nn_data[800:])
+
+ knn_indices, _ = nnd.query(nn_data[:200], k=10, epsilon=0.2)
+
+ true_nnd = NearestNeighbors(metric=metric).fit(nn_data[200:])
+ true_indices = true_nnd.kneighbors(nn_data[:200], 10, return_distance=False)
+
+ num_correct = 0.0
+ for i in range(true_indices.shape[0]):
+ num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))
+
+ percent_correct = num_correct / (true_indices.shape[0] * 10)
+ assert percent_correct >= 0.95, (
+ "NN-descent query did not get 95% " "accuracy on nearest neighbors"
+ )
+
+ at pytest.mark.parametrize("metric", ["euclidean", "cosine"])
+def test_update_w_prepare_query_accuracy(nn_data, metric):
+ nnd = NNDescent(nn_data[200:800], metric=metric, n_neighbors=10, random_state=None, compressed=False)
+ nnd.prepare()
+
+ nnd.update(nn_data[800:])
+ nnd.prepare()
+
+ knn_indices, _ = nnd.query(nn_data[:200], k=10, epsilon=0.2)
+
+ true_nnd = NearestNeighbors(metric=metric).fit(nn_data[200:])
+ true_indices = true_nnd.kneighbors(nn_data[:200], 10, return_distance=False)
+
+ num_correct = 0.0
+ for i in range(true_indices.shape[0]):
+ num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))
+
+ percent_correct = num_correct / (true_indices.shape[0] * 10)
+ assert percent_correct >= 0.95, (
+ "NN-descent query did not get 95% " "accuracy on nearest neighbors"
+ )
+
+ at pytest.mark.parametrize("metric", ["euclidean", "cosine"])
+def test_tree_init_false(nn_data, metric):
+ nnd = NNDescent(
+ nn_data[200:], metric=metric, n_neighbors=10, random_state=None, tree_init=False
+ )
+ nnd.prepare()
+
+ knn_indices, _ = nnd.query(nn_data[:200], k=10, epsilon=0.2)
+
+ true_nnd = NearestNeighbors(metric=metric).fit(nn_data[200:])
+ true_indices = true_nnd.kneighbors(nn_data[:200], 10, return_distance=False)
+
+ num_correct = 0.0
+ for i in range(true_indices.shape[0]):
+ num_correct += np.sum(np.in1d(true_indices[i], knn_indices[i]))
+
+ percent_correct = num_correct / (true_indices.shape[0] * 10)
+ assert percent_correct >= 0.95, (
+ "NN-descent query did not get 95% " "accuracy on nearest neighbors"
+ )
=====================================
pynndescent/utils.py
=====================================
@@ -223,48 +223,36 @@ def siftdown(heap1, heap2, elt):
elt = swap
- at numba.njit(cache=True)
-def deheap_sort(heap):
- """Given an array of heaps (of graph_indices and weights), unpack the heap
- out to give and array of sorted lists of graph_indices and weights by increasing
- weight. This is effectively just the second half of heap sort (the first
- half not being required since we already have the graph_data in a heap).
+ at numba.njit(parallel=True, cache=False)
+def deheap_sort(indices, distances):
+ """Given two arrays representing a heap (indices and distances), reorder the
+ arrays by increasing distance. This is effectively just the second half of
+ heap sort (the first half not being required since we already have the
+ graph_data in a heap).
+
+ Note that this is done in-place.
Parameters
----------
- heap : array of shape (3, n_samples, n_neighbors)
- The heap to turn into sorted lists.
+ indices : array of shape (n_samples, n_neighbors)
+ The graph indices to sort by distance.
+ distances : array of shape (n_samples, n_neighbors)
+ The corresponding edge distance.
Returns
-------
- graph_indices, weights: arrays of shape (n_samples, n_neighbors)
- The graph_indices and weights sorted by increasing weight.
+ indices, distances: arrays of shape (n_samples, n_neighbors)
+ The indices and distances sorted by increasing distance.
"""
- indices = heap[0]
- weights = heap[1]
-
- for i in range(indices.shape[0]):
+ for i in numba.prange(indices.shape[0]):
+ # starting from the end of the array and moving back
+ for j in range(indices.shape[1] - 1, 0, -1):
+ indices[i, 0], indices[i, j] = indices[i, j], indices[i, 0]
+ distances[i, 0], distances[i, j] = distances[i, j], distances[i, 0]
- ind_heap = indices[i]
- dist_heap = weights[i]
+ siftdown(distances[i, :j], indices[i, :j], 0)
- for j in range(ind_heap.shape[0] - 1):
- ind_heap[0], ind_heap[ind_heap.shape[0] - j - 1] = (
- ind_heap[ind_heap.shape[0] - j - 1],
- ind_heap[0],
- )
- dist_heap[0], dist_heap[dist_heap.shape[0] - j - 1] = (
- dist_heap[dist_heap.shape[0] - j - 1],
- dist_heap[0],
- )
-
- siftdown(
- dist_heap[: dist_heap.shape[0] - j - 1],
- ind_heap[: ind_heap.shape[0] - j - 1],
- 0,
- )
-
- return indices.astype(np.int64), weights
+ return indices, distances
# @numba.njit()
@@ -306,7 +294,7 @@ def deheap_sort(heap):
# return -1
- at numba.njit(parallel=True, locals={"idx": numba.types.int64}, cache=True)
+ at numba.njit(parallel=True, locals={"idx": numba.types.int64}, cache=False)
def new_build_candidates(current_graph, max_candidates, rng_state, n_threads):
"""Build a heap of candidate neighbors for nearest neighbor descent. For
each vertex the candidate neighbors are any current neighbors, and any
@@ -606,7 +594,7 @@ def checked_flagged_heap_push(priorities, indices, flags, p, n, f):
"i": numba.uint32,
"j": numba.uint32,
},
- cache=True,
+ cache=False,
)
def apply_graph_updates_low_memory(current_graph, updates, n_threads):
@@ -701,7 +689,7 @@ def initalize_heap_from_graph_indices(heap, graph_indices, data, metric):
return heap
- at numba.njit(parallel=True, cache=True)
+ at numba.njit(parallel=True, cache=False)
def sparse_initalize_heap_from_graph_indices(
heap, graph_indices, data_indptr, data_indices, data_vals, metric
):
=====================================
setup.py
=====================================
@@ -8,7 +8,7 @@ def readme():
configuration = {
"name": "pynndescent",
- "version": "0.5.6",
+ "version": "0.5.7",
"description": "Nearest Neighbor Descent",
"long_description": readme(),
"classifiers": [
View it on GitLab: https://salsa.debian.org/python-team/packages/python-pynndescent/-/commit/d3f6540a1d75451f5592815c940ac812b3d1e1ff
--
View it on GitLab: https://salsa.debian.org/python-team/packages/python-pynndescent/-/commit/d3f6540a1d75451f5592815c940ac812b3d1e1ff
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220717/67e458cf/attachment-0001.htm>
More information about the debian-med-commit
mailing list