Skip to content

Commit

Permalink
Add GT URLs for wikipedia runbooks (#316)
Browse files Browse the repository at this point in the history
* added gt for wiki runbooks

* undo local change

* fixed url for wiki-1M runbook
  • Loading branch information
magdalendobson authored Nov 1, 2024
1 parent 3c35f54 commit 541e3d1
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 18 deletions.
28 changes: 16 additions & 12 deletions data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,22 @@ def cleaned_run_metric(run_metrics):
dataset = DATASETS[dataset_name]()
runbook_paths = [None]
if track == 'streaming':
runbook_paths = ['neurips23/runbooks/streaming/simple_runbook.yaml',
'neurips23/runbooks/streaming/simple_replace_runbook.yaml',
'neurips23/runbooks/streaming/random_replace_runbook.yaml',
'neurips23/runbooks/streaming/clustered_replace_runbook.yaml',
'neurips23/runbooks/streaming/clustered_runbook.yaml',
'neurips23/runbooks/streaming/clustered_runbook.yaml',
'neurips23/runbooks/streaming/delete_runbook.yaml',
'neurips23/runbooks/streaming/final_runbook.yaml',
'neurips23/runbooks/streaming/msturing-10M_slidingwindow_runbook.yaml',
'neurips23/runbooks/streaming/wikipedia-35M_expirationtime_runbook.yaml',
'neurips23/runbooks/streaming/wikipedia-35M_expiration_time_replace_runbook.yaml',
'neurips23/runbooks/streaming/msmarco-100M_expirationtime_runbook.yaml']
runbook_paths = ['neurips23/runbooks/simple_runbook.yaml',
'neurips23/runbooks/simple_replace_runbook.yaml',
'neurips23/runbooks/random_replace_runbook.yaml',
'neurips23/runbooks/clustered_replace_runbook.yaml',
'neurips23/runbooks/clustered_runbook.yaml',
'neurips23/runbooks/clustered_runbook.yaml',
'neurips23/runbooks/delete_runbook.yaml',
'neurips23/runbooks/final_runbook.yaml',
'neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expirationtime_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_replace_only_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expiration_time_replace_delete_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_replace_delete_runbook.yaml',
'neurips23/runbooks/msmarco-100M_expirationtime_runbook.yaml']
for runbook_path in runbook_paths:
print("Looking for runbook ", runbook_path)
results = load_all_results(dataset_name, neurips23track=track, runbook_path=runbook_path)
Expand Down
12 changes: 7 additions & 5 deletions neurips23/runbooks/gen_expiration_time_runbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/"
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url)

ratios = (0, 4, 18)
Expand All @@ -134,7 +134,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 8000000 #only use a prefix of the dataset
max_t = 80
gt_url = None
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False)

ratios = (0, 4, 18)
Expand All @@ -144,7 +144,7 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gt_url = None
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False)

ratios = (3, 8, 18)
Expand All @@ -154,7 +154,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-35M'
dataset_size = 35000000
max_t = 350
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None)
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url)

ratios = (1, 8, 18)
timesteps = (0, 100, 20)
Expand All @@ -163,7 +164,8 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra
dataset_name = 'wikipedia-1M'
dataset_size = 1000000
max_t = 100
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, None)
gt_url = "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml"
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url)

ratios = (0, 6, 25)
timesteps = (0, 200, 50)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1208,3 +1208,4 @@ wikipedia-1M:
316:
operation: search
max_pts: 293233
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_delete_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -1068,3 +1068,4 @@ wikipedia-1M:
278:
operation: search
max_pts: 698369
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_replace_only_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -840,4 +840,4 @@ wikipedia-1M:
260:
operation: search
max_pts: 410000
gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml/
gt_url: https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-1M_expiration_time_runbook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4436,3 +4436,4 @@ wikipedia-35M:
1150:
operation: search
max_pts: 6682767
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_delete_runbook.yaml"
Original file line number Diff line number Diff line change
Expand Up @@ -852,3 +852,4 @@ wikipedia-35M:
222:
operation: search
max_pts: 5548955
gt_url: "https://comp21storage.z5.web.core.windows.net/wiki-cohere-35M/wikipedia-35M_expiration_time_replace_only_runbook.yaml"
26 changes: 26 additions & 0 deletions neurips23/streaming/diskann/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,32 @@ msturing-1M:
query-args: |
[{"Ls":300, "T":16},
{"Ls":100, "T":16}]
wikipedia-1M:
diskann:
docker-tag: neurips23-streaming-diskann
module: neurips23.streaming.diskann.diskann-str
constructor: diskann
base-args: ["@metric"]
run-groups:
base:
args: |
[{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}]
query-args: |
[
{"Ls":100, "T":32}]
wikipedia-35M:
diskann:
docker-tag: neurips23-streaming-diskann
module: neurips23.streaming.diskann.diskann-str
constructor: diskann
base-args: ["@metric"]
run-groups:
base:
args: |
[{"R":32, "L":100, "insert_threads":32, "consolidate_threads":32}]
query-args: |
[
{"Ls":100, "T":32}]
msspacev-10M:
diskann:
docker-tag: neurips23-streaming-diskann
Expand Down

0 comments on commit 541e3d1

Please sign in to comment.