diff --git a/data/sample_data.csv b/data/sample_data.csv index 2705ffb..46f4e87 100644 --- a/data/sample_data.csv +++ b/data/sample_data.csv @@ -1,4 +1,4 @@ -query_number, metric_type (NDCG; MAP; etc), score (float) +query_number,metric_type,score 1, NDCG, 0.078 2, MAP, 1 3, MAP, 2 diff --git a/data/sample_two.csv b/data/sample_two.csv new file mode 100644 index 0000000..46f4e87 --- /dev/null +++ b/data/sample_two.csv @@ -0,0 +1,12 @@ +query_number,metric_type,score +1, NDCG, 0.078 +2, MAP, 1 +3, MAP, 2 +4, MAP, 3 +5, MAP, 4 +6, MAP, 5 +7, MAP, 6 +8, MAP, 7 +9, MAP, 8 +10, MAP, 9 +11, MAP, 10.0 \ No newline at end of file diff --git a/src/server.py b/src/server.py index 1989239..a0dc316 100644 --- a/src/server.py +++ b/src/server.py @@ -4,21 +4,22 @@ import pandas as pd import redis import io +import ujson +import collections +from itertools import combinations +from scipy.stats import ttest_ind +from scipy.stats import f_oneway +import numpy as np app = Flask(__name__) ALLOWED_EXTENSIONS = {'txt', 'csv'} -POSTGRES_USER = "818_user" -POSTGRES_PW = "818" -POSTGRES_URL = "127.0.0.1:5432" -POSTGRES_DB = "project" - +r = redis.Redis(host='localhost', port=6379, db=0) def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS - @app.route('/upload', methods=['GET', 'POST']) def upload(): if request.method == 'POST': @@ -32,15 +33,21 @@ def upload(): if file.filename == '': flash('No selected file') return redirect(request.url) + if file and allowed_file(file.filename): filename = secure_filename(file.filename) file_contents = file.read() - data = file_contents.decode("utf-8") + data = file_contents.decode("ascii") df = pd.read_csv(io.StringIO(data), delimiter=',', header='infer') - print(df) - + r.sadd("experiments", filename) + r.set(filename, data) + + # Average, median, STD, MAD + r.set(filename+"_statistics", df['score'].describe().to_json()) + return redirect(url_for('upload', filename=filename)) + return ''' Upload new File @@ -51,21 +58,81 @@ def upload(): ''' +def one_way_anova(): + t, p = f_oneway(*data.values()) + return p + +def t_test(): + for list1, list2 in combinations(data.keys(), 2): + t, p = ttest_ind(data[list1], data[list2]) + print(list1, list2, p) + +def histogram_intersection(a, b): + v = np.minimum(a, b).sum().round(decimals=1) + return v + +@app.route('/compare//') +def compare(experiment_one, experiment_two): + exp_one_stats = r.get(experiment_one+'_statistics').decode('utf-8') + exp_two_stats = r.get(experiment_two+'_statistics').decode('utf-8') + + exp_one_data = r.get(experiment_one).decode('ascii') + exp_two_data = r.get(experiment_two).decode('ascii') + + df_data_1 = pd.read_csv(io.StringIO(exp_one_data), delimiter=',', header='infer') + df_data_2 = pd.read_csv(io.StringIO(exp_two_data), delimiter=',', header='infer') + + df_data_2 = df_data_2.rename(columns={"query_number": "query_number_q2", "metric_type": "metric_type_q2", "score": "score_q2"}) + + df_stats_1 = pd.read_json(exp_one_stats, typ='series') + df_stats_2 = pd.read_json(exp_two_stats, typ='series') + + # Calculate P-Value between query results + + merged_stats = pd.concat([df_stats_1, df_stats_2], axis=1, sort=False) + merged_stats['absolute_difference'] = merged_stats[0] - merged_stats[1] + print(merged_stats) + + merged_data = pd.concat([df_data_1, df_data_2], axis=1, sort=False) + merged_data['absolute_difference'] = merged_data['score'] - merged_data['score_q2'] + print(merged_data) + + # Correlation, covariance + stats_corr = merged_stats.corr(method=histogram_intersection) + stats_cov = merged_stats.cov() -@app.route('/compare//') -def compare(): - return 'This is the compare function for two experiments.' + data_corr = merged_data.corr(method=histogram_intersection) + data_cov = merged_data.cov() + aggregator = {} + aggregator["merged_stats"] = merged_stats.to_dict() + aggregator["merged_data"] = merged_data.to_dict() + aggregator["stats_corr"] = stats_corr.to_dict() + aggregator["stats_cov"] = stats_cov.to_dict() + aggregator["data_corr"] = data_corr.to_dict() + aggregator["data_cov"] = data_cov.to_dict() + return ujson.dumps(aggregator) -@app.route('/experiment') -def experiment(): - return 'This is the view of an experiment' +@app.route('/experiment/') +def experiment(experiment_id): + return r.get(experiment_id+'_statistics').decode('utf-8') -@app.route('/hello') +@app.route('/') def hello(): return 'Hello!' +def experiment_list(): + experiments = [element.decode("utf-8") for element in r.smembers("experiments")] + return experiments + +@app.route('/experiments') +def experiments(): + experiment_holder = collections.defaultdict(list) + for element in r.smembers("experiments"): + experiment_holder["experiments"].append(str(element.decode("utf-8"))) + return_json = ujson.dumps(experiment_holder) + return return_json if __name__ == '__main__': diff --git a/src/static/style.css b/src/static/style.css new file mode 100644 index 0000000..93623a4 --- /dev/null +++ b/src/static/style.css @@ -0,0 +1,26 @@ +html { font-family: sans-serif; background: #eee; padding: 1rem; } +body { max-width: 960px; margin: 0 auto; background: white; } +h1 { font-family: serif; color: #377ba8; margin: 1rem 0; } +a { color: #377ba8; } +hr { border: none; border-top: 1px solid lightgray; } +nav { background: lightgray; display: flex; align-items: center; padding: 0 0.5rem; } +nav h1 { flex: auto; margin: 0; } +nav h1 a { text-decoration: none; padding: 0.25rem 0.5rem; } +nav ul { display: flex; list-style: none; margin: 0; padding: 0; } +nav ul li a, nav ul li span, header .action { display: block; padding: 0.5rem; } +.content { padding: 0 1rem 1rem; } +.content > header { border-bottom: 1px solid lightgray; display: flex; align-items: flex-end; } +.content > header h1 { flex: auto; margin: 1rem 0 0.25rem 0; } +.flash { margin: 1em 0; padding: 1em; background: #cae6f6; border: 1px solid #377ba8; } +.post > header { display: flex; align-items: flex-end; font-size: 0.85em; } +.post > header > div:first-of-type { flex: auto; } +.post > header h1 { font-size: 1.5em; margin-bottom: 0; } +.post .about { color: slategray; font-style: italic; } +.post .body { white-space: pre-line; } +.content:last-child { margin-bottom: 0; } +.content form { margin: 1em 0; display: flex; flex-direction: column; } +.content label { font-weight: bold; margin-bottom: 0.5em; } +.content input, .content textarea { margin-bottom: 1em; } +.content textarea { min-height: 12em; resize: vertical; } +input.danger { color: #cc2f2e; } +input[type=submit] { align-self: start; min-width: 10em; } \ No newline at end of file diff --git a/src/templates/base.html b/src/templates/base.html new file mode 100644 index 0000000..40518c1 --- /dev/null +++ b/src/templates/base.html @@ -0,0 +1,24 @@ + +{% block title %}{% endblock %} - Flaskr + + +
+
+ {% block header %}{% endblock %} +
+ {% for message in get_flashed_messages() %} +
{{ message }}
+ {% endfor %} + {% block content %}{% endblock %} +
\ No newline at end of file diff --git a/src/templates/index.html b/src/templates/index.html new file mode 100644 index 0000000..e991f4d --- /dev/null +++ b/src/templates/index.html @@ -0,0 +1,28 @@ +{% extends 'base.html' %} + +{% block header %} +

{% block title %}Posts{% endblock %}

+{% if g.user %} +New +{% endif %} +{% endblock %} + +{% block content %} +{% for post in posts %} +
+
+
+

{{ post['title'] }}

+
by {{ post['username'] }} on {{ post['created'].strftime('%Y-%m-%d') }}
+
+ {% if g.user['id'] == post['author_id'] %} + Edit + {% endif %} +
+

{{ post['body'] }}

+
+{% if not loop.last %} +
+{% endif %} +{% endfor %} +{% endblock %} \ No newline at end of file