Added batch mode to plotting scripts.

erikbern · Sep 29, 2018 · 5bc4b9e · 5bc4b9e
1 parent 5dd8b5b
commit 5bc4b9e
Show file tree

Hide file tree

Showing 4 changed files with 116 additions and 70 deletions.
diff --git a/create_website.py b/create_website.py
@@ -126,7 +126,7 @@ def create_plot(all_data, xn, yn, linestyle, j2_env, additional_label = "", plot
                     label = additional_label, linestyle = linestyle,
                     render_all_points = render_all_points)
 
-def build_detail_site(data, label_func, linestyles, j2_env):
+def build_detail_site(data, label_func, j2_env, linestyles, batch=False):
     for (name, runs) in data.items():
         print("Building '%s'" % name)
         all_runs = runs.keys()
@@ -145,40 +145,40 @@ def build_detail_site(data, label_func, linestyles, j2_env):
         for k in runs.keys():
             data_for_plot[k] = prepare_data(runs[k], 'k-nn', 'qps')
         plot.create_plot(data_for_plot, False,
-                False, True, 'k-nn', 'qps',  args.outputdir + name + ".png",
-                linestyles)
-        with open(args.outputdir + name + ".html", "w") as text_file:
+                False, True, 'k-nn', 'qps',  args.outputdir + name + "_batch=" + str(batch) + ".png",
+                linestyles, batch)
+        with open(args.outputdir + name + "_batch=" + str(batch) + ".html", "w") as text_file:
             text_file.write(j2_env.get_template("detail_page.html").
-                render(title = label, plot_data = data, args = args))
+                render(title = label, plot_data = data, args = args, batch=batch))
 
 
 def build_index_site(datasets, algorithms, j2_env, file_name):
-    distance_measures = sorted(set([get_distance_from_desc(e) for e in datasets.keys()]))
-    sorted_datasets = sorted(set([get_dataset_from_desc(e) for e in datasets.keys()]))
-
-    dataset_data = []
-
-    for dm in distance_measures:
-        d = {"name" : dm.capitalize(), "entries": []}
-        for ds in sorted_datasets:
-            matching_datasets = [e for e in datasets.keys() \
-                    if get_dataset_from_desc(e) == ds and \
-                       get_distance_from_desc(e) == dm]
-            sorted_matches = sorted(matching_datasets, \
-                    key = lambda e: int(get_count_from_desc(e)))
-            for idd in sorted_matches:
-                d["entries"].append({"name" : idd, "desc" : get_dataset_label(idd)})
-        dataset_data.append(d)
+    dataset_data = {'batch' : [], 'non-batch' : []}
+    for mode in ['batch', 'non-batch']:
+        distance_measures = sorted(set([get_distance_from_desc(e) for e in datasets[mode].keys()]))
+        sorted_datasets = sorted(set([get_dataset_from_desc(e) for e in datasets[mode].keys()]))
+
+        for dm in distance_measures:
+            d = {"name" : dm.capitalize(), "entries": []}
+            for ds in sorted_datasets:
+                matching_datasets = [e for e in datasets[mode].keys() \
+                        if get_dataset_from_desc(e) == ds and \
+                           get_distance_from_desc(e) == dm]
+                sorted_matches = sorted(matching_datasets, \
+                        key = lambda e: int(get_count_from_desc(e)))
+                for idd in sorted_matches:
+                    d["entries"].append({"name" : idd, "desc" : get_dataset_label(idd)})
+            dataset_data[mode].append(d)
 
     with open(args.outputdir + "index.html", "w") as text_file:
         text_file.write(j2_env.get_template("summary.html").
                 render(title = "ANN-Benchmarks", dataset_with_distances = dataset_data,
-                    algorithms = algorithms.keys()))
+                    algorithms = algorithms))
 
 def load_all_results():
     """Read all result files and compute all metrics"""
-    all_runs_by_dataset = {}
-    all_runs_by_algorithm = {}
+    all_runs_by_dataset = {'batch' : {}, 'non-batch': {}}
+    all_runs_by_algorithm = {'batch' : {}, 'non-batch' : {}}
     cached_true_dist = []
     old_sdn = None
     for properties, f in results.load_all_results():
@@ -190,16 +190,21 @@ def load_all_results():
         algo = properties["algo"]
         ms = compute_all_metrics(cached_true_dist, f, properties)
         algo_ds = get_dataset_label(sdn)
+        idx = "non-batch"
+        if properties["batch_mode"]:
+            idx = "batch"
+        all_runs_by_algorithm[idx].setdefault(algo, {}).setdefault(algo_ds, []).append(ms)
+        all_runs_by_dataset[idx].setdefault(sdn, {}).setdefault(algo, []).append(ms)
 
-        all_runs_by_algorithm.setdefault(algo, {}).setdefault(algo_ds, []).append(ms)
-        all_runs_by_dataset.setdefault(sdn, {}).setdefault(algo, []).append(ms)
     return (all_runs_by_dataset, all_runs_by_algorithm)
 
 j2_env = Environment(loader=FileSystemLoader("./templates/"), trim_blocks = True)
-j2_env.globals.update(zip=zip)
-
+j2_env.globals.update(zip=zip, len=len)
 runs_by_ds, runs_by_algo = load_all_results()
 linestyles = {**create_linestyles([get_dataset_label(x) for x in runs_by_ds.keys()]),  **create_linestyles(runs_by_algo.keys())}
-build_detail_site(runs_by_ds, lambda label: get_dataset_label(label), linestyles, j2_env)
-build_detail_site(runs_by_algo, lambda x: x, linestyles, j2_env)
+
+build_detail_site(runs_by_ds['non-batch'], lambda label: get_dataset_label(label), j2_env, linestyles, False)
+build_detail_site(runs_by_ds['batch'], lambda label: get_dataset_label(label), j2_env, linestyles, True)
+build_detail_site(runs_by_algo['non-batch'], lambda x: x, j2_env, linestyles, False)
+build_detail_site(runs_by_algo['batch'], lambda x: x, j2_env, linestyles, True)
 build_index_site(runs_by_ds, runs_by_algo, j2_env, "index.html")
diff --git a/plot.py b/plot.py
@@ -11,7 +11,7 @@
 from ann_benchmarks.results import store_results, load_all_results, get_unique_algorithms
 
 
-def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles):
+def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles, batch):
     xm, ym = (metrics[xn], metrics[yn])
     # Now generate each plot
     handles = []
@@ -24,7 +24,10 @@ def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles):
         handles.append(handle)
         if raw:
             handle2, = plt.plot(axs, ays, '-', label=algo, color=faded, ms=5, mew=2, lw=2, linestyle=linestyle, marker=marker)
-        labels.append(algo)
+        label = algo
+        if batch:
+            label += "-batch"
+        labels.append(label)
 
     if x_log:
         plt.gca().set_xscale('log')
@@ -86,6 +89,10 @@ def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles):
         '--raw',
         help='Show raw results (not just Pareto frontier) in faded colours',
         action='store_true')
+    parser.add_argument(
+        '--batch',
+        help='Plot runs in batch mode',
+        action='store_true')
     args = parser.parse_args()
 
     if not args.output:
@@ -95,11 +102,11 @@ def create_plot(all_data, raw, x_log, y_log, xn, yn, fn_out, linestyles):
     dataset = get_dataset(args.dataset)
     count = int(args.count)
     unique_algorithms = get_unique_algorithms()
-    results = load_all_results(args.dataset, count)
+    results = load_all_results(args.dataset, count, args.batch)
     linestyles = create_linestyles(sorted(unique_algorithms))
     runs = compute_metrics(list(dataset["distances"]), results, args.x_axis, args.y_axis)
     if not runs:
         raise Exception('Nothing to plot')
 
     create_plot(runs, args.raw, args.x_log,
-            args.y_log, args.x_axis, args.y_axis, args.output, linestyles)
+            args.y_log, args.x_axis, args.y_axis, args.output, linestyles, args.batch)
diff --git a/templates/detail_page.html b/templates/detail_page.html
@@ -3,9 +3,17 @@
         <div class="container">
         {% for item in plot_data.keys() %}
             {% if item=="normal" %}
-            <h2>Plots for {{title}}</h2>
+                {% if batch %}
+                    <h2>Plots for {{title}} in batch mode</h2>
+                {% else %}
+                    <h2>Plots for {{title}}</h2>
+                {% endif %}
             {% elif item=="scatter" and args.scatter %}
-            <h2>Scatterplots for {{title}}</h2>
+                {% if batch %}
+                    <h2>Scatterplots for {{title}} in batch mode</h2>
+                {% else %}
+                    <h2>Scatterplots for {{title}}</h2>
+                {% endif %}
             {% endif %}
             {% for plot in plot_data[item] %}
             {{ plot }}

diff --git a/templates/summary.html b/templates/summary.html
@@ -11,40 +11,66 @@ <h1>Benchmarking Results</h1>
             by <em>(k = ...)</em>, the number of nearest neighbors an algorithm was supposed to return. The plot shown depicts <em>Recall</em> (the fraction
             of true nearest neighbors found, on average over all queries) against <em>Queries per second</em>.  Clicking on a plot reveils detailled interactive plots, including
             approximate recall, index size, and build time.</p>
-            <h2 id ="datasets">Results by Dataset</h2>
-            {% for distance_data in dataset_with_distances %}
-                <h3>Distance: {{ distance_data.name }} </h3>
-                {% for entry in distance_data.entries %}
-                <a href="./{{entry.name}}.html">
-                    <div class="row" id="{{entry.name}}">
-                        <div class = "col-md-4 bg-success">
-                            <h4>{{entry.desc}}</h4>
+            {% for type in ['non-batch', 'batch'] %}
+                {% if len(dataset_with_distances[type]) > 0 %}
+                    {% if type == 'batch' %}
+                        <h2>Benchmarks for Batched Queries</h2>
+                    {% else %}
+                        <h2>Benchmarks for Single Queries</h2>
+                    {% endif %}
+
+                    <h2 id ="datasets">Results by Dataset</h2>
+                    {% for distance_data in dataset_with_distances[type] %}
+                        <h3>Distance: {{ distance_data.name }} </h3>
+                        {% for entry in distance_data.entries %}
+                            {% if type == 'batch' %}
+                            <a href="./{{entry.name}}_batch=True.html">
+                            {% else %}
+                            <a href="./{{entry.name}}_batch=False.html">
+                            {% endif %}
+                            <div class="row" id="{{entry.name}}">
+                                <div class = "col-md-4 bg-success">
+                                    <h4>{{entry.desc}}</h4>
+                            </div>
+                            <div class = "col-md-8">
+                                {% if type == 'batch' %}
+                                <img class = "img-responsive" src="{{entry.name}}_batch=True.png" />
+                                {% else %}
+                                <img class = "img-responsive" src="{{entry.name}}_batch=False.png" />
+                                {% endif %}
+                            </div>
+                        </div>
+                        </a>
+                        <hr />
+                        {% endfor %}
+                    {% endfor %}
+                    <h2 id="algorithms">Results by Algorithm</h2>
+                    <ul class="list-inline"><b>Algorithms:</b>
+                        {% for algo in algorithms[type].keys() %}
+                            <li><a href="#{{algo}}">{{algo}}</a></li>
+                        {% endfor %}
+                    </ul>
+                    {% for algo in algorithms[type].keys()%}
+                        {% if type == 'batch' %}
+                            <a href="./{{algo}}_batch=True.html">
+                        {% else %}
+                            <a href="./{{algo}}_batch=False.html">
+                        {% endif %}
+                        <div class="row" id="{{algo}}">
+                            <div class = "col-md-4 bg-success">
+                                <h4>{{algo}}</h4>
+                        </div>
+                        <div class = "col-md-8">
+                            {% if type == 'batch' %}
+                                <img class = "img-responsive" src="{{algo}}_batch=True.png" />
+                            {% else %}
+                                <img class = "img-responsive" src="{{algo}}_batch=False.png" />
+                            {% endif %}
+                        </div>
                     </div>
-                    <div class = "col-md-8">
-                        <img class = "img-responsive" src="{{entry.name}}.png" />
-                    </div>
-                </div>
-                </a>
-                <hr />
-                {% endfor %}
-            {% endfor %}
-            <h2 id="algorithms">Results by Algorithm</h2>
-            <ul class="list-inline"><b>Algorithms:</b>
-                {% for algo in algorithms %}
-                <li><a href="#{{algo}}">{{algo}}</a></li>
-                {% endfor %}
-            </ul>
-            {% for algo in algorithms%}
-            <a href="./{{algo}}.html">
-                <div class="row" id="{{algo}}">
-                    <div class = "col-md-4 bg-success">
-                        <h4>{{algo}}</h4>
-                </div>
-                <div class = "col-md-8">
-                    <img class = "img-responsive" src="{{algo}}.png" />
-                </div>
-            </div>
-            </a>
-            <hr />
+                    </a>
+                    <hr />
+                    {% endfor %}
+                {% endif %}
             {% endfor %}
 {% endblock %}