Skip to content

Commit

Permalink
move run_algorithm into runner, compute filename from definitions
Browse files Browse the repository at this point in the history
  • Loading branch information
erikbern committed Dec 5, 2017
1 parent 74c6e1f commit 46eb818
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 69 deletions.
12 changes: 12 additions & 0 deletions ann_benchmarks/algorithms/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from os import sep as pathsep
import collections
import importlib
import json
import os
import re
import sys
import traceback
import yaml
Expand All @@ -18,6 +21,15 @@ def instantiate_algorithm(definition):
return constructor(*definition.arguments)


def get_result_filename(dataset, count, definition):
d = ['results',
dataset,
str(count),
definition.algorithm,
re.sub(r'\W+', '_', json.dumps(definition.arguments, sort_keys=True)).strip('_')]
return os.path.join(*d)


def _handle_args(args):
if isinstance(args, list):
args = [el if isinstance(el, list) else [el] for el in args]
Expand Down
11 changes: 3 additions & 8 deletions ann_benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ann_benchmarks.datasets import get_dataset
from ann_benchmarks.results import get_results
from ann_benchmarks.constants import INDEX_DIR
from ann_benchmarks.algorithms.definitions import get_definitions, list_algorithms
from ann_benchmarks.algorithms.definitions import get_definitions, list_algorithms, get_result_filename
from ann_benchmarks.runner import run, run_docker


Expand Down Expand Up @@ -93,19 +93,14 @@ def main():
if os.path.exists(INDEX_DIR):
shutil.rmtree(INDEX_DIR)

# TODO(erikbern): deal with this later
#algos_already_run = set()
#if not args.force:
# for res in get_results(args.dataset, args.count):
# print(res)
# algos_already_run.add((res.attrs["library"], res.attrs["name"]))

dataset = get_dataset(args.dataset)
dimension = len(dataset['train'][0]) # TODO(erikbern): ugly
point_type = 'float' # TODO(erikbern): should look at the type of X_train
distance = dataset.attrs['distance']
definitions = get_definitions(args.definitions, dimension, point_type, distance, args.count)

definitions = [definition for definition in definitions if not os.path.exists(get_result_filename(args.dataset, args.count, definition))]

random.shuffle(definitions)

if args.algorithm:
Expand Down
21 changes: 6 additions & 15 deletions ann_benchmarks/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,11 @@

import h5py
import os
from ann_benchmarks.algorithms.definitions import get_result_filename

def store_results(attrs, results, dataset, count, distance):
fragments = {
"ds": dataset,
"k": count,
"dst": distance,
"inst": attrs["name"],
}
for k, v in fragments.items():
if v and isinstance(v, str):
assert not os.sep in v, """\
error: path fragment "%s" contains a path separator and so would break the \
directory hierarchy""" % k
def _make_path(*args):
return os.path.join(*map(lambda s: s % fragments, args))
fn = _make_path("results", "k=%(k)d", "dataset=%(ds)s", "%(inst)s.hdf5")

def store_results(dataset, count, definition, attrs, results):
fn = get_result_filename(dataset, count, definition)
head, tail = os.path.split(fn)
if not os.path.isdir(head):
os.makedirs(head)
Expand All @@ -33,6 +22,7 @@ def _make_path(*args):
distances[i] = [d for n, d in ds] + [float('inf')] * (count - len(ds))
f.close()


def _get_leaf_paths(path):
if os.path.isdir(path):
for fragment in os.listdir(path):
Expand All @@ -41,6 +31,7 @@ def _get_leaf_paths(path):
elif os.path.isfile(path) and path.endswith(".hdf5"):
yield path


def _leaf_path_to_descriptor(path):
directory, _ = os.path.split(path)
parts = directory.split(os.sep)[1:]
Expand Down
56 changes: 51 additions & 5 deletions ann_benchmarks/runner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import argparse
import datetime
import docker
import json
Expand All @@ -7,8 +8,8 @@
import sys
import time

from ann_benchmarks.datasets import get_dataset
from ann_benchmarks.algorithms.definitions import instantiate_algorithm
from ann_benchmarks.datasets import get_dataset, DATASETS
from ann_benchmarks.algorithms.definitions import Definition, instantiate_algorithm
from ann_benchmarks.distance import metrics
from ann_benchmarks.results import get_results, store_results

Expand Down Expand Up @@ -78,7 +79,6 @@ def batch_query(X):

verbose = hasattr(algo, "query_verbose")
attrs = {
"name": algo.name,
"build_time": build_time,
"best_search_time": best_search_time,
"candidates": avg_candidates,
Expand All @@ -87,13 +87,59 @@ def batch_query(X):
"expect_extra": verbose,
"batch_mode": use_batch_query
}
store_results(attrs, results, dataset, count, distance)
store_results(dataset, count, definition, attrs, results)
finally:
algo.done()


def run_from_cmdline():
parser = argparse.ArgumentParser()
parser.add_argument(
'--dataset',
choices=DATASETS.keys(),
required=True)
parser.add_argument(
'--algorithm',
required=True)
parser.add_argument(
'--module',
required=True)
parser.add_argument(
'--constructor',
required=True)
parser.add_argument(
'--count',
required=True,
type=int)
parser.add_argument(
'--json-args',
action='store_true')
parser.add_argument(
'-a', '--arg',
dest='args', action='append')
args = parser.parse_args()
if args.json_args:
algo_args = [json.loads(arg) for arg in args.args]
else:
algo_args = args.args

definition = Definition(
algorithm=args.algorithm,
docker_tag=None, # not needed
module=args.module,
constructor=args.constructor,
arguments=algo_args
)
run(definition, args.dataset, args.count)


def run_docker(definition, dataset, count, runs, timeout=7200, mem_limit='8g'):
cmd = ['--dataset', dataset, '--module', definition.module, '--constructor', definition.constructor, '--count', str(count), '--json-args']
cmd = ['--dataset', dataset,
'--algorithm', definition.algorithm,
'--module', definition.module,
'--constructor', definition.constructor,
'--count', str(count),
'--json-args']
for arg in definition.arguments:
cmd += ['--arg', json.dumps(arg)]
print('Running command', cmd)
Expand Down
43 changes: 2 additions & 41 deletions run_algorithm.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,3 @@
import argparse
import json
from ann_benchmarks.datasets import DATASETS
from ann_benchmarks.algorithms.definitions import Definition, instantiate_algorithm
from ann_benchmarks.runner import run
from ann_benchmarks.runner import run_from_cmdline

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--dataset',
choices=DATASETS.keys(),
required=True)
parser.add_argument(
'--module',
required=True)
parser.add_argument(
'--constructor',
required=True)
parser.add_argument(
'--count',
required=True,
type=int)
parser.add_argument(
'--json-args',
action='store_true')
parser.add_argument(
'-a', '--arg',
dest='args', action='append')
args = parser.parse_args()
if args.json_args:
algo_args = [json.loads(arg) for arg in args.args]
else:
algo_args = args.args

definition = Definition(
algorithm=None, # not needed
docker_tag=None, # also not needed
module=args.module,
constructor=args.constructor,
arguments=algo_args
)
run(definition, args.dataset, args.count)
run_from_cmdline()

0 comments on commit 46eb818

Please sign in to comment.