forked from TabbyML/tabby
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add python eval script (TabbyML#266)
* feat: add python eval script * update * add local script * move eval script * update * update * update * update * update * update * update * update * update * update * update * update * add README
- Loading branch information
Showing
11 changed files
with
553 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
__pycache__ | ||
.ipynb_checkpoints | ||
reports.* | ||
tabby |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Eval | ||
|
||
## Local | ||
`./eval.sh` | ||
|
||
## Skypilot | ||
`./eval_sky.sh` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[[repositories]] | ||
git_url = "https://github.com/huggingface/text-generation-inference" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
version: '3.5' | ||
services: | ||
tabby: | ||
command: serve --model TabbyML/SantaCoder-1B --device cuda | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
count: 1 | ||
capabilities: [gpu] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/bin/bash | ||
set -ex | ||
|
||
mkdir -p tabby | ||
cp config.toml tabby/ | ||
|
||
docker-compose down | ||
|
||
if nvidia-smi; then | ||
docker-compose -f docker-compose.yaml -f docker-compose.cuda.yaml up -d | ||
else | ||
docker-compose up -d | ||
fi | ||
|
||
while ! curl -X POST http://localhost:8080/v1/health; do | ||
echo "server not ready, waiting..." | ||
sleep 5 | ||
done | ||
|
||
papermill main.ipynb ./reports.ipynb -r filepattern "./tabby/dataset/*.jsonl" -r max_records "${MAX_RECORDS:-3}" | ||
|
||
jupyter nbconvert reports.ipynb --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags remove --to html | ||
|
||
docker-compose down | ||
|
||
echo done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
set -ex | ||
|
||
ARGS="tabby-eval skypilot.yaml --env MAX_RECORDS=300" | ||
|
||
if ! sky exec $ARGS; then | ||
sky launch -c $ARGS | ||
fi | ||
|
||
scp tabby-eval:~/sky_workdir/reports.ipynb ./ | ||
scp tabby-eval:~/sky_workdir/reports.html ./ |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from typing import Iterator | ||
|
||
import glob | ||
import json | ||
from dataclasses import dataclass | ||
from transformers import HfArgumentParser | ||
|
||
|
||
@dataclass | ||
class Item: | ||
git_url: str | ||
filepath: str | ||
language: str | ||
|
||
name: str | ||
body: str | ||
prefix: str | ||
suffix: str | ||
|
||
|
||
def iter_items(doc) -> Iterator[Item]: | ||
if doc["max_line_length"] > 500: | ||
return | ||
|
||
if doc["avg_line_length"] < 10 or doc["avg_line_length"] > 200: | ||
return | ||
|
||
if doc["alphanum_fraction"] < 0.25: | ||
return | ||
|
||
for tag in doc["tags"]: | ||
content = doc["content"] | ||
name = get_content(content, tag["name_range"]) | ||
body = get_content(content, tag["range"]) | ||
|
||
prefix = get_prefix(content, tag["range"]["start"]) | ||
suffix = get_suffix(content, tag["range"]["end"]) | ||
|
||
yield Item( | ||
name=name, | ||
body=body, | ||
prefix=prefix, | ||
suffix=suffix, | ||
git_url=doc["git_url"], | ||
filepath=doc["filepath"], | ||
language=doc["language"], | ||
) | ||
|
||
|
||
def iter_docs(filepattern: str): | ||
for filepath in glob.glob(filepattern): | ||
with open(filepath) as f: | ||
for line in f: | ||
yield json.loads(line) | ||
|
||
|
||
def get_content(content: str, range: dict): | ||
return content[range["start"] : range["end"]] | ||
|
||
|
||
def get_prefix(content: str, start: int, max=20): | ||
num_lines = 0 | ||
prefix_start = 0 | ||
for prefix_start in range(start - 1, 0, -1): | ||
if content[prefix_start] == "\n": | ||
num_lines += 1 | ||
|
||
if num_lines == max: | ||
break | ||
|
||
return content[prefix_start + 1 : start] | ||
|
||
|
||
def get_suffix(content: str, end: int, max=20): | ||
num_lines = 0 | ||
suffix_end = end | ||
for suffix_end in range(end, len(content)): | ||
if content[suffix_end] == "\n": | ||
num_lines += 1 | ||
|
||
if num_lines == max: | ||
break | ||
|
||
return content[end : suffix_end - 1] | ||
|
||
|
||
def items_from_filepattern(filepattern: str): | ||
for doc in iter_docs(filepattern): | ||
yield from iter_items(doc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
papermill | ||
git+https://github.com/TabbyML/tabby.git#egg=tabby-python-client&subdirectory=clients/tabby-python-client | ||
transformers | ||
editdistance | ||
matplotlib | ||
notebook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters