Skip to content

Commit

Permalink
Merge pull request #538 from cmuparlay/main
Browse files Browse the repository at this point in the history
Add ParlayANN to ann-benchmarks
  • Loading branch information
erikbern authored Aug 2, 2024
2 parents 3bb0474 + c79b388 commit ebfea23
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 0 deletions.
1 change: 1 addition & 0 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ jobs:
- onng_ngt
- opensearchknn
- panng_ngt
- parlayann
- pg_embedding
- pgvector
- pgvecto_rs
Expand Down
20 changes: 20 additions & 0 deletions ann_benchmarks/algorithms/parlayann/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM ann-benchmarks

RUN apt update
RUN apt install -y software-properties-common
RUN add-apt-repository -y ppa:git-core/ppa
RUN apt update
RUN DEBIAN_FRONTEND=noninteractive apt install -y git make cmake g++ libaio-dev libgoogle-perftools-dev libunwind-dev clang-format libboost-dev libboost-program-options-dev libmkl-full-dev libcpprest-dev python3.10

#RUN apt-get update
#RUN apt-get install -y g++ software-properties-common
RUN pip3 install pybind11 numpy

ARG CACHEBUST=1
RUN git clone -b annbench https://github.com/cmuparlay/ParlayANN.git
RUN cd ParlayANN && git submodule update --init --recursive
RUN cd ParlayANN/python && bash compile.sh
#RUN cd ParlayANN/python && pip install -e .
#RUN python3 -c 'import parlaypy'
ENV PYTHONPATH=$PYTHONPATH:/home/app/ParlayANN/python
WORKDIR /home/app
43 changes: 43 additions & 0 deletions ann_benchmarks/algorithms/parlayann/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
float:
euclidean:
- base_args: ['@metric']
constructor: ParlayANN
disabled: false
docker_tag: ann-benchmarks-parlayann
module: ann_benchmarks.algorithms.parlayann
name: parlayann
run_groups:
parlay_80:
args: [{alpha: 1.15, R: 80, L: 160, two_pass: True}]
query_args: [[{Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 300}, {Q: 400}, {Q: 600}, {Q: 800}]]
parlay_64:
args: [{alpha: 1.1, R: 64, L: 128, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 300}, {Q: 400}, {Q: 600}, {Q: 800}]]
parlay_40:
args: [{alpha: 1.08, R: 40, L: 80, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]]
parlay_32_05:
args: [{alpha: 1.05, R: 32, L: 64, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]]
angular:
- base_args: ['@metric']
constructor: ParlayANN
disabled: false
docker_tag: ann-benchmarks-parlayann
module: ann_benchmarks.algorithms.parlayann
name: parlayann
run_groups:
parlay_130:
args: [{alpha: .85, R: 130, L: 260, two_pass: True}]
query_args: [[{Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 400}, {Q: 800}]]
parlay_100:
args: [{alpha: .85, R: 100, L: 200, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 13}, {Q: 14}, {Q: 15}, {Q: 16}, {Q: 17}, {Q: 18}, {Q: 20}, {Q: 22}, {Q: 25}, {Q: 30}, {Q: 40}, {Q: 50}, {Q: 60}, {Q: 80}, {Q: 100}, {Q: 125}, {Q: 150}, {Q: 200}, {Q: 400}, {Q: 800}]]
parlay_80:
args: [{alpha: .90, R: 80, L: 160, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 13}, {Q: 14}, {Q: 15}, {Q: 16}, {Q: 17}, {Q: 18}, {Q: 20}]]
parlay_50:
args: [{alpha: .95, R: 50, L: 100, two_pass: True}]
query_args: [[{limit: 10}, {limit: 11}, {limit: 12}, {limit: 13}, {limit: 14}, {limit: 15}, {limit: 16}, {limit: 18}, {limit: 20}, {limit: 22}, {limit: 25}, {limit: 30}, {Q: 10}, {Q: 11}, {Q: 12}, {Q: 14}, {Q: 16}, {Q: 18}, {Q: 20}]]


84 changes: 84 additions & 0 deletions ann_benchmarks/algorithms/parlayann/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import absolute_import
import psutil
import os
import struct
import time
import numpy as np
import wrapper as pann

from ..base.module import BaseANN

class ParlayANN(BaseANN):
def __init__(self, metric, index_params):
self.name = "parlayann_(" + str(index_params) + ")"
self._index_params = index_params
self._metric = self.translate_dist_fn(metric)

self.R = int(index_params.get("R", 50))
self.L = int(index_params.get("L", 100))
self.alpha = float(index_params.get("alpha", 1.15))
self.two_pass = bool(index_params.get("two_pass", False))

def translate_dist_fn(self, metric):
if metric == 'euclidean':
return 'Euclidian'
elif metric == 'ip':
return 'mips'
elif metric == 'angular':
return 'mips'
else:
raise Exception('Invalid metric')

def translate_dtype(self, dtype:str):
if dtype == 'float32':
return 'float'
else:
return dtype

def fit(self, X):
def bin_to_float(binary):
return struct.unpack("!f", struct.pack("!I", int(binary, 2)))[0]

print("Vamana: Starting Fit...")
index_dir = "indices"

if not os.path.exists(index_dir):
os.makedirs(index_dir)

data_path = os.path.join(index_dir, "base.bin")
save_path = os.path.join(index_dir, self.name)
print("parlayann: Index Stored At: " + save_path)
nb, dims = X.shape
shape = [
np.float32(bin_to_float("{:032b}".format(nb))),
np.float32(bin_to_float("{:032b}".format(dims))),
]
X = X.flatten()
X = np.insert(X, 0, shape)
X.tofile(data_path)

if not os.path.exists(save_path):
print("parlayann: Creating Index")
start = time.time()
self.params = pann.build_vamana_index(self._metric, "float", data_path, save_path,
self.R, self.L, self.alpha, self.two_pass)
end = time.time()
print("Indexing time: ", end - start)
print(f"Wrote index to {save_path}")
self.index = pann.load_index(self._metric, "float", data_path, save_path)
print("Index loaded")

def query(self, X, k):
return self.index.single_search(X, k, self.Q, True, self.limit)

def batch_query(self, X, k):
print("running batch")
nq, dims = X.shape
self.res, self.distances = self.index.batch_search(X, k, self.Q, True, self.limit)
return self.res

def set_query_arguments(self, query_args):
self.name = "parlayann_(" + str(self._index_params) + "," + str(query_args) + ")"
print(query_args)
self.limit = 1000 if query_args.get("limit") is None else query_args.get("limit")
self.Q = 10 if query_args.get("Q") is None else query_args.get("Q")

0 comments on commit ebfea23

Please sign in to comment.