From c6418175813007be62406bfb35b42441202b017a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Aum=C3=BCller?= Date: Sat, 3 Mar 2018 18:12:29 +0100 Subject: [PATCH] Use templating engine to seperate plotting logic and html output. --- create_website.py | 333 ++++++------------------------------- requirements.txt | 1 + templates/detail_page.html | 183 ++++++++++++++++++++ templates/general.html | 58 +++++++ templates/latex.template | 30 ++++ templates/summary.html | 50 ++++++ 6 files changed, 376 insertions(+), 279 deletions(-) create mode 100644 templates/detail_page.html create mode 100644 templates/general.html create mode 100644 templates/latex.template create mode 100644 templates/summary.html diff --git a/create_website.py b/create_website.py index 42c0e9019..22b77e387 100644 --- a/create_website.py +++ b/create_website.py @@ -4,6 +4,7 @@ import os, json, pickle, yaml import numpy import hashlib +from jinja2 import Environment, FileSystemLoader from ann_benchmarks import results from ann_benchmarks.datasets import get_dataset @@ -65,6 +66,13 @@ def directory_path(s): raise argparse.ArgumentTypeError("'%s' is not a directory" % s) return s + "/" +def prepare_data(data, xn, yn): + """Change format from (algo, instance, dict) to (algo, instance, x, y).""" + res = [] + for algo, algo_name, result in data: + res.append((algo, algo_name, result[xn], result[yn])) + return res + parser = argparse.ArgumentParser() parser.add_argument( '--plottype', @@ -88,238 +96,23 @@ def directory_path(s): action = 'store_true') args = parser.parse_args() -def get_html_header(title): - return """ - - - - - - - - %(title)s - - - - - - - - - - - - - - """ % {"title" : title} - -def get_index_description(): - return """ -
-

Info

-

ANN-Benchmarks is a benchmarking environment for approximate nearest neighbor algorithms search. This website contains the current benchmarking results. Please visit http://github.com/erikbern/ann-benchmarks/ to get an overview over evaluated data sets and algorithms. Make a pull request on Github to add your own code or improvements to the - benchmarking system. -

-
-

Benchmarking Results

-

Results are split by distance measure and dataset. In the bottom, you can find an overview of an algorithm's performance on all datasets. Each dataset is annoted - by (k = ...), the number of nearest neighbors an algorithm was supposed to return. The plot shown depicts Recall (the fraction - of true nearest neighbors found, on average over all queries) against Queries per second. Clicking on a plot reveils detailled interactive plots, including - approximate recall, index size, and build time.

-

Results by Dataset

- """ - -def get_index_footer(): - return """ -
-

Contact

-

ANN-Benchmarks has been developed by Martin Aumueller (maau@itu.dk), Erik Bernhardsson (mail@erikbern.com), and Alec Faitfull (alef@itu.dk). Please use - Github to submit your implementation or improvements.

-
-
- -""" - -def get_row_desc(idd, desc): - return """ - -
-
-

%(desc)s

-
-
- -
-
-
-
""" % { "idd" : idd, "desc" : desc} - -def prepare_data(data, xn, yn): - """Change format from (algo, instance, dict) to (algo, instance, x, y).""" - res = [] - for algo, algo_name, result in data: - res.append((algo, algo_name, result[xn], result[yn])) - return res - -def get_latex_plot(all_data, xn, yn, xm, ym, plottype): - latex_str = """ -\\begin{figure} - \\centering - \\begin{tikzpicture} - \\begin{axis}[ - xlabel={%(xlabel)s}, - ylabel={%(ylabel)s}, - ymode = log, - yticklabel style={/pgf/number format/fixed, - /pgf/number format/precision=3}, - legend style = { anchor=west}, - cycle list name = black white - ] - """ % {"xlabel" : xm["description"], "ylabel" : ym["description"] } - color_index = 0 - only_marks = "" - if plottype == "bubble": - only_marks = "[only marks]" +def get_latex_plot(all_data, xn, yn, xm, ym, plottype, j2_env): + plot_data = [] for algo in sorted(all_data.keys(), key=lambda x: x.lower()): - xs, ys, ls, axs, ays, als = create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn) - latex_str += """ - \\addplot %s coordinates {""" % only_marks - for i in range(len(xs)): - latex_str += "(%s, %s)" % (str(xs[i]), str(ys[i])) - latex_str += " };" - latex_str += """ - \\addlegendentry{%s}; - """ % (algo) - latex_str += """ - \\end{axis} - \\end{tikzpicture} - \\caption{%(caption)s} - \\label{} -\\end{figure} - """ % {"caption" : get_plot_label(xm, ym)} - return latex_str - -def get_latex_html(all_data, xn, yn, xm, ym, plottype, additional_label): - return """ -
-
- -
-
- - - """ % { "latexcode": get_latex_plot(all_data, xn, yn, xm, ym, plottype), "buttonlabel" : hashlib.sha224((get_plot_label(xm, ym) + additional_label).encode("utf-8")).hexdigest() } - -def get_plot_html(data): - return """ -

%(xlabel)s/%(ylabel)s

-
- - -
- """ % data + xs, ys, ls, axs, ays, als = \ + create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn) + plot_data.append({ "name": algo, "coords" : zip(xs, ys), + "scatter" : plottype == "bubble" }) + return j2_env.get_template("latex.template").\ + render(plot_data = plot_data, caption = get_plot_label(xm, ym), + xlabel = xm["description"], ylabel = ym["description"]) def create_data_points(all_data, xn, yn, linestyle, render_all_points): color_index = 0 output_str = "" for algo in sorted(all_data.keys(), key=lambda x: x.lower()): - xs, ys, ls, axs, ays, als = create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn) + xs, ys, ls, axs, ays, als = \ + create_pointset(prepare_data(all_data[algo], xn, yn), xn, yn) if render_all_points: xs, ys, ls = axs, ays, als output_str += """ @@ -338,38 +131,34 @@ def create_data_points(all_data, xn, yn, linestyle, render_all_points): color_index += 1 return output_str -def create_plot(ds, all_data, xn, yn, linestyle, additional_label = "", plottype = "line"): +def create_plot(all_data, xn, yn, linestyle, j2_env, additional_label = "", plottype = "line"): xm, ym = (metrics[xn], metrics[yn]) - plot_data = { "id" : ds, "xlabel" : xm["description"], "ylabel" : ym["description"], "plottype" : plottype, - "plotlabel" : get_plot_label(xm, ym), "label": additional_label, - "datapoints" : create_data_points(all_data, - xn, yn, linestyle, plottype == "bubble") } - output_str = get_plot_html(plot_data) - if args.latex: - output_str += get_latex_html(all_data, xn, yn, xm, ym, plottype, additional_label) - return output_str - -def build_detail_site(data, label_func): + return {"xlabel" : xm["description"], + "ylabel" : ym["description"], + "plottype" : plottype, + "plotlabel" : get_plot_label(xm, ym), + "label": additional_label, + "datapoints" : create_data_points(all_data, xn, yn, + linestyle, plottype == "bubble"), + "buttonlabel" : hashlib.sha224((get_plot_label(xm, ym) + + additional_label).encode("utf-8")).hexdigest(), + "latexcode" : get_latex_plot(all_data, xn, yn, xm, ym, plottype, j2_env)} + +def build_detail_site(data, label_func, j2_env): for (name, runs) in data.items(): + print("Building '%s'" % name) all_runs = runs.keys() linestyles = convert_linestyle(create_linestyles(all_runs)) - output_str = get_html_header(name) label = label_func(name) - output_str += """ -
-

Plots for %s

""" % (label) + data = {"normal" : [], "scatter" : []} + for plottype in args.plottype: xn, yn = plot_variants[plottype] - print("Processing '%s' with %s" % (name, plottype)) - output_str += create_plot(label, runs, xn, yn, linestyles) - if args.scatter: - output_str += """ -
-

Scatterplots for %s""" % (label) - for plottype in args.plottype: - xn, yn = plot_variants[plottype] - print("Processing scatterplot '%s' with %s" % (name, plottype)) - output_str += create_plot(name, runs, xn, yn, linestyles, "Scatterplot ", "bubble") + data["normal"].append(create_plot(runs, xn, yn, linestyles, j2_env)) + if args.scatter: + data["scatter"].append(create_plot(runs, xn, yn, + linestyles, j2_env, "Scatterplot ", "bubble")) + # create png plot for summary page data_for_plot = {} for k in runs.keys(): @@ -377,27 +166,19 @@ def build_detail_site(data, label_func): plot.create_plot(data_for_plot, False, False, True, 'k-nn', 'qps', args.outputdir + name + ".png", create_linestyles(all_runs)) - output_str += """ -

-
- - - """ with open(args.outputdir + name + ".html", "w") as text_file: - text_file.write(output_str) + text_file.write(j2_env.get_template("detail_page.html"). + render(title = label, plot_data = data, args = args)) -def build_index(datasets, algorithms): +def build_index_site(datasets, algorithms, j2_env, file_name): distance_measures = sorted(set([get_distance_from_desc(e) for e in datasets.keys()])) sorted_datasets = sorted(set([get_dataset_from_desc(e) for e in datasets.keys()])) - output_str = get_html_header("ANN-Benchmarks") - output_str += get_index_description() + dataset_data = [] for dm in distance_measures: - output_str += """ -

Distance: %s

- """ % dm.capitalize() + d = {"name" : dm.capitalize(), "entries": []} for ds in sorted_datasets: matching_datasets = [e for e in datasets.keys() \ if get_dataset_from_desc(e) == ds and \ @@ -405,20 +186,13 @@ def build_index(datasets, algorithms): sorted_matches = sorted(matching_datasets, \ key = lambda e: int(get_count_from_desc(e))) for idd in sorted_matches: - output_str += get_row_desc(idd, get_dataset_label(idd)) - output_str += """ -

Results by Algorithm

- " - for algo in algorithm_names: - output_str += get_row_desc(algo, algo) - output_str += get_index_footer() + d["entries"].append({"name" : idd, "desc" : get_dataset_label(idd)}) + dataset_data.append(d) with open(args.outputdir + "index.html", "w") as text_file: - text_file.write(output_str) + text_file.write(j2_env.get_template("summary.html"). + render(title = "ANN-Benchmarks", dataset_with_distances = dataset_data, + algorithms = algorithms.keys())) def load_all_results(): """Read all result files and compute all metrics""" @@ -449,6 +223,7 @@ def load_all_results(): return (all_runs_by_dataset, all_runs_by_algorithm) runs_by_ds, runs_by_algo = load_all_results() -build_detail_site(runs_by_ds, lambda label: get_dataset_label(label)) -build_detail_site(runs_by_algo, lambda x: x) -build_index(runs_by_ds, runs_by_algo) +j2_env = Environment(loader=FileSystemLoader("./templates/"), trim_blocks = True) +build_detail_site(runs_by_ds, lambda label: get_dataset_label(label), j2_env) +build_detail_site(runs_by_algo, lambda x: x, j2_env) +build_index_site(runs_by_ds, runs_by_algo, j2_env, "index.html") diff --git a/requirements.txt b/requirements.txt index c0cb9b892..610e3d91d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ pyyaml==3.12 psutil==5.4.2 scipy==1.0.0 scikit-learn==0.19.1 +jinja2=2.10 diff --git a/templates/detail_page.html b/templates/detail_page.html new file mode 100644 index 000000000..ccf30f303 --- /dev/null +++ b/templates/detail_page.html @@ -0,0 +1,183 @@ +{% extends "general.html" %} +{% block content %} +
+

Plots for {{title}}

+ {% for plot in plot_data["normal"] %} +

{{plot.xlabel}}/{{plot.ylabel}}

+
+ + +
+ {% if args.latex %} +
+
+ +
+
+ + + {% endif %} + {% endfor %} + {% if args.scatter %} +
+

Scatterplots for {{title}}

+ {% for plot in plot_data["scatter"] %} +

{{plot.xlabel}}/{{plot.ylabel}}

+
+ + +
+ {% if args.latex %} +
+
+ +
+
+ + + {% endif %} + {% endfor %} + {% endif %} +{% endblock %} diff --git a/templates/general.html b/templates/general.html new file mode 100644 index 000000000..74ba2a6e1 --- /dev/null +++ b/templates/general.html @@ -0,0 +1,58 @@ + + + + + + + + {{ title }} + + + + + + + + + + + + + + + + {% block content %} {% endblock %} + +
+

Contact

+

ANN-Benchmarks has been developed by Martin Aumueller (maau@itu.dk), Erik Bernhardsson (mail@erikbern.com), and Alec Faitfull (alef@itu.dk). Please use + Github to submit your implementation or improvements.

+
+
+ + diff --git a/templates/latex.template b/templates/latex.template new file mode 100644 index 000000000..cca51256e --- /dev/null +++ b/templates/latex.template @@ -0,0 +1,30 @@ + +\begin{figure} + \centering + \begin{tikzpicture} + \begin{axis}[ + xlabel={ {{xlabel}} }, + ylabel={ {{ylabel}} }, + ymode = log, + yticklabel style={/pgf/number format/fixed, + /pgf/number format/precision=3}, + legend style = { anchor=west}, + cycle list name = black white + ] + {% for algo in plot_data %} + {% if algo.scatter %} + \addplot [only marks] coordinates { + {% else %} + \addplot coordinates { + {% endif %} + {% for coord in algo.coords %} + ({{ coord[0]}}, {{ coord[1] }}) + {% endfor %} + } + \addlegendentry{ {{algo.name}} }; + {% endfor %} + \end{axis} + \end{tikzpicture} + \caption{ {{caption}} } + \label{} +\end{figure} diff --git a/templates/summary.html b/templates/summary.html new file mode 100644 index 000000000..99a8e6f9f --- /dev/null +++ b/templates/summary.html @@ -0,0 +1,50 @@ +{% extends "general.html" %} +{% block content %} +
+

Info

+

ANN-Benchmarks is a benchmarking environment for approximate nearest neighbor algorithms search. This website contains the current benchmarking results. Please visit http://github.com/erikbern/ann-benchmarks/ to get an overview over evaluated data sets and algorithms. Make a pull request on Github to add your own code or improvements to the + benchmarking system. +

+
+

Benchmarking Results

+

Results are split by distance measure and dataset. In the bottom, you can find an overview of an algorithm's performance on all datasets. Each dataset is annoted + by (k = ...), the number of nearest neighbors an algorithm was supposed to return. The plot shown depicts Recall (the fraction + of true nearest neighbors found, on average over all queries) against Queries per second. Clicking on a plot reveils detailled interactive plots, including + approximate recall, index size, and build time.

+

Results by Dataset

+ {% for distance_data in dataset_with_distances %} +

Distance: {{ distance_data.name }}

+ {% for entry in distance_data.entries %} + +
+
+

{{entry.desc}}

+
+
+ +
+
+
+
+ {% endfor %} + {% endfor %} +

Results by Algorithm

+
    Algorithms: + {% for algo in algorithms %} +
  • {{algo}}
  • + {% endfor %} +
+ {% for algo in algorithms%} + +
+
+

{{algo}}

+
+
+ +
+
+
+
+ {% endfor %} +{% endblock %}