Skip to content

Commit

Permalink
Merge pull request #445 from brj0/add-nndescent-algorithm
Browse files Browse the repository at this point in the history
add nndescent algorithm
  • Loading branch information
erikbern authored Jul 28, 2023
2 parents 6565385 + 5a062d0 commit f5ba3ce
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 2 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ jobs:
- luceneknn
- milvus
- mrpt
- nndescent
- n2
- nmslib
- onng_ngt
Expand All @@ -64,7 +65,7 @@ jobs:
- vespa
- weaviate
include:
- library: pynndescent
- library: pynndescent
dataset: random-xs-16-hamming
- library: datasketch
dataset: random-s-jaccard
Expand All @@ -88,7 +89,7 @@ jobs:

- name: Build Library Docker Image
run: python3 install.py

- name: Run the benchmark
run: |
python3 run.py --docker-tag ann-benchmarks-${LIBRARY} --max-n-algorithms 3 --runs 2 --dataset $DATASET --run-disabled --timeout 300
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Evaluated
* [FAISS](https://github.com/facebookresearch/faiss) ![https://img.shields.io/github/stars/facebookresearch/faiss?style=social](https://img.shields.io/github/stars/facebookresearch/faiss?style=social)
* [DolphinnPy](https://github.com/ipsarros/DolphinnPy) ![https://img.shields.io/github/stars/ipsarros/DolphinnPy?style=social](https://img.shields.io/github/stars/ipsarros/DolphinnPy?style=social)
* [Datasketch](https://github.com/ekzhu/datasketch) ![https://img.shields.io/github/stars/ekzhu/datasketch?style=social](https://img.shields.io/github/stars/ekzhu/datasketch?style=social)
* [nndescent](https://github.com/brj0/nndescent) ![https://img.shields.io/github/stars/brj0/nndescent?style=social](https://img.shields.io/github/stars/brj0/nndescent?style=social)
* [PyNNDescent](https://github.com/lmcinnes/pynndescent) ![https://img.shields.io/github/stars/lmcinnes/pynndescent?style=social](https://img.shields.io/github/stars/lmcinnes/pynndescent?style=social)
* [MRPT](https://github.com/teemupitkanen/mrpt) ![https://img.shields.io/github/stars/teemupitkanen/mrpt?style=social](https://img.shields.io/github/stars/teemupitkanen/mrpt?style=social)
* [NGT](https://github.com/yahoojapan/NGT) ![https://img.shields.io/github/stars/yahoojapan/NGT?style=social](https://img.shields.io/github/stars/yahoojapan/NGT?style=social): ONNG, PANNG, QG
Expand Down
5 changes: 5 additions & 0 deletions ann_benchmarks/algorithms/nndescent/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
FROM ann-benchmarks

RUN pip3 install --verbose numpy nndescent>=1.0.4

RUN python3 -c 'import nndescent'
138 changes: 138 additions & 0 deletions ann_benchmarks/algorithms/nndescent/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
bit:
hamming:
- base_args: ['@metric']
constructor: NNDescent
disabled: false
docker_tag: ann-benchmarks-nndescent
module: ann_benchmarks.algorithms.nndescent
name: nndescent
run_groups:
NN-120:
arg_groups: [{pruning_prob: [0.0, 1.0], leaf_size: 80, n_neighbors: [120],
pruning_degree_multiplier: [2.0, 2.5]}]
args: {}
query_args: [[0.08, 0.16, 0.2, 0.24, 0.28, 0.32, 0.36]]
NN-20:
arg_groups: [{pruning_prob: [0.75, 1.0], leaf_size: 32, n_neighbors: [20],
pruning_degree_multiplier: [1.0, 1.5]}]
args: {}
query_args: [[0.0, 0.01, 0.02, 0.04, 0.08, 0.12, 0.16]]
NN-40:
arg_groups: [{pruning_prob: [0.5, 1.0], leaf_size: 48, n_neighbors: [40],
pruning_degree_multiplier: [1.5, 2.0]}]
args: {}
query_args: [[0.0, 0.04, 0.08, 0.12, 0.16, 0.2, 0.24]]
NN-80:
arg_groups: [{pruning_prob: [0.25, 1.0], leaf_size: 64, n_neighbors: [80],
pruning_degree_multiplier: [1.75, 2.25]}]
args: {}
query_args: [[0.0, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32]]
jaccard:
- base_args: ['@metric']
constructor: NNDescent
disabled: false
docker_tag: ann-benchmarks-nndescent
module: ann_benchmarks.algorithms.nndescent
name: nndescent
run_groups:
NN-120:
arg_groups: [{pruning_prob: [1.0, 0.125], leaf_size: 80, n_neighbors: 120,
pruning_degree_multiplier: 1.0}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.12, 0.14, 0.16, 0.18, 0.2, 0.22]]
NN-20:
arg_groups: [{pruning_prob: [0.75, 1.0], leaf_size: 30, n_neighbors: 20,
pruning_degree_multiplier: 1.0}]
args: {}
query_args: [[0.0, 0.01, 0.02, 0.03, 0.04, 0.06, 0.08, 0.12, 0.16, 0.2]]
NN-40:
arg_groups: [{pruning_prob: [0.5, 1.0], leaf_size: 30, n_neighbors: 40,
pruning_degree_multiplier: 1.0}]
args: {}
query_args: [[0.0, 0.01, 0.02, 0.03, 0.04, 0.06, 0.08, 0.12, 0.16, 0.2]]
NN-80:
arg_groups: [{pruning_prob: [1.0, 0.25], leaf_size: 60, n_neighbors: 80,
pruning_degree_multiplier: 1.0}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.12, 0.14, 0.16, 0.18, 0.2, 0.22]]
float:
angular:
- base_args: ['@metric']
constructor: NNDescent
disabled: false
docker_tag: ann-benchmarks-nndescent
module: ann_benchmarks.algorithms.nndescent
name: nndescent
run_groups:
NN-120-accurate:
arg_groups: [{pruning_prob: 0.125, leaf_size: 35, n_neighbors: 120, pruning_degree_multiplier: 2.5}]
args: {}
query_args: [[0.16, 0.2, 0.24, 0.28, 0.32, 0.36]]
NN-120-fast:
arg_groups: [{pruning_prob: 1.0, leaf_size: 20, n_neighbors: 120, pruning_degree_multiplier: 2.5}]
args: {}
query_args: [[0.0, 0.04, 0.08, 0.16, 0.2, 0.24, 0.28, 0.32]]
NN-20:
arg_groups: [{pruning_prob: [1.0], leaf_size: 20, n_neighbors: [20], pruning_degree_multiplier: [
0.5, 1.0]}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12]]
NN-40:
arg_groups: [{pruning_prob: [0.5, 1.0], leaf_size: 25, n_neighbors: [40],
pruning_degree_multiplier: [1.5]}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16]]
NN-80-accurate:
arg_groups: [{pruning_prob: 0.25, leaf_size: 30, n_neighbors: 80, pruning_degree_multiplier: 2.0}]
args: {}
query_args: [[0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32, 0.36]]
NN-80-fast:
arg_groups: [{pruning_prob: 1.0, leaf_size: 20, n_neighbors: 80, pruning_degree_multiplier: 2.0}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.08, 0.12, 0.16, 0.2, 0.24]]
any:
- base_args: ['@metric']
constructor: NNDescent
disabled: false
docker_tag: ann-benchmarks-nndescent
module: ann_benchmarks.algorithms.nndescent
name: nndescent
run_groups:
NN-10-20:
arg_groups: [{pruning_prob: [1.0], leaf_size: 32, n_neighbors: [10, 20],
pruning_degree_multiplier: [1.5, 2.0]}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16]]
NN-40-80:
arg_groups: [{pruning_prob: [0.0, 1.0], leaf_size: 64, n_neighbors: [40,
80], pruning_degree_multiplier: [2.0, 2.5]}]
args: {}
query_args: [[0.0, 0.04, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32]]
euclidean:
- base_args: ['@metric']
constructor: NNDescent
disabled: false
docker_tag: ann-benchmarks-nndescent
module: ann_benchmarks.algorithms.nndescent
name: nndescent
run_groups:
NN-10:
arg_groups: [{pruning_prob: 1.0, leaf_size: 24, n_neighbors: 10, pruning_degree_multiplier: [
0.5, 1.0]}]
args: {}
query_args: [[0.0, 0.01, 0.02, 0.03, 0.04, 0.06, 0.08, 0.1, 0.12]]
NN-20:
arg_groups: [{pruning_prob: 1.0, leaf_size: 24, n_neighbors: 20, pruning_degree_multiplier: [
0.75, 1.5]}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.06, 0.08, 0.1, 0.12, 0.14, 0.16, 0.2]]
NN-40:
arg_groups: [{pruning_prob: [0.0, 1.0], leaf_size: 36, n_neighbors: 40,
pruning_degree_multiplier: [1.0, 2.0]}]
args: {}
query_args: [[0.0, 0.02, 0.04, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32]]
NN-60:
arg_groups: [{pruning_prob: 0.0, leaf_size: 48, n_neighbors: 60, pruning_degree_multiplier: [
2.0, 3.0]}]
args: {}
query_args: [[0.0, 0.04, 0.08, 0.12, 0.16, 0.2, 0.24, 0.28, 0.32, 0.36]]
120 changes: 120 additions & 0 deletions ann_benchmarks/algorithms/nndescent/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import numpy as np
import nndescent
import scipy.sparse

from ..base.module import BaseANN


class NNDescent(BaseANN):
def __init__(self, metric, index_param_dict):
if "n_neighbors" in index_param_dict:
self.n_neighbors = int(index_param_dict["n_neighbors"])
else:
self.n_neighbors = 30

if "pruning_degree_multiplier" in index_param_dict:
self.pruning_degree_multiplier = float(
index_param_dict["pruning_degree_multiplier"]
)
else:
self.pruning_degree_multiplier = 1.5

if "pruning_prob" in index_param_dict:
self.pruning_prob = float(index_param_dict["pruning_prob"])
else:
self.pruning_prob = 1.0

if "leaf_size" in index_param_dict:
self.leaf_size = int(index_param_dict["leaf_size"])

self.is_sparse = metric in ["jaccard"]

self.nnd_metric = {
"angular": "dot",
"euclidean": "euclidean",
"hamming": "hamming",
"jaccard": "jaccard",
}[metric]

def fit(self, X):
if self.is_sparse:
# Convert to sparse matrix format
if type(X) == list:
sizes = [len(x) for x in X]
n_cols = max([max(x) for x in X]) + 1
matrix = scipy.sparse.csr_matrix(
(len(X), n_cols), dtype=np.float32
)
matrix.indices = np.hstack(X).astype(np.int32)
matrix.indptr = np.concatenate([[0], np.cumsum(sizes)]).astype(
np.int32
)
matrix.data = np.ones(
matrix.indices.shape[0], dtype=np.float32
)
matrix.sort_indices()
X = matrix
else:
X = scipy.sparse.csr_matrix(X)

self.query_matrix = scipy.sparse.csr_matrix(
(1, X.shape[1]), dtype=np.float32
)
elif not isinstance(X, np.ndarray) or X.dtype != np.float32:
print("Convert data to float32")
X = np.asarray(X, dtype=np.float32)

# nndescent uses pointers to the data. Make shure X does not change
# outside of this scope.
self.X = X
self.index = nndescent.NNDescent(
self.X,
n_neighbors=self.n_neighbors,
metric=self.nnd_metric,
leaf_size=self.leaf_size,
pruning_degree_multiplier=self.pruning_degree_multiplier,
pruning_prob=self.pruning_prob,
verbose=True,
)
# Make a dummy query to prepare the search graph.
if self.is_sparse:
empty_mtx = np.empty((0, X.shape[0]), dtype=np.float32)
empty_csr = scipy.sparse.csr_matrix(empty_mtx)
self.index.query(empty_csr, k=1, epsilon=0.1)
else:
empty_mtx = np.empty((0, X.shape[0]), dtype=np.float32)
self.index.query(empty_mtx, k=1, epsilon=0.1)

def set_query_arguments(self, epsilon=0.1):
self.epsilon = float(epsilon)

def query(self, v, n):
if self.is_sparse:
# Convert index array to sparse matrix format and query; the
# overhead of direct conversion is high for single queries
# (converting the entire test dataset and sending single rows is
# better), so we just populate the required structures.
if v.dtype == np.bool_:
self.query_matrix.indices = np.flatnonzero(v).astype(np.int32)
else:
self.query_matrix.indices = v.astype(np.int32)
size = self.query_matrix.indices.shape[0]
self.query_matrix.indptr = np.array([0, size], dtype=np.int32)
self.query_matrix.data = np.ones(size, dtype=np.float32)
ind, dist = self.index.query(
self.query_matrix, k=n, epsilon=self.epsilon
)
else:
ind, dist = self.index.query(
v.reshape(1, -1).astype("float32"), k=n, epsilon=self.epsilon
)
return ind[0]

def __str__(self):
return (
f"NNDescent(n_neighbors={self.n_neighbors}, "
f"pruning_mult={self.pruning_degree_multiplier:.2f}, "
f"pruning_prob={self.pruning_prob:.3f}, "
f"epsilon={self.epsilon:.3f}, "
f"leaf_size={self.leaf_size:02d})"
)

0 comments on commit f5ba3ce

Please sign in to comment.