forked from TabbyML/tabby
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate-llama-model.sh
executable file
·65 lines (50 loc) · 1.29 KB
/
update-llama-model.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/bin/bash
set -e
ACCESS_TOKEN=$1
usage() {
echo "Usage: $0 <access_token>"
exit 1
}
if [ -z "${ACCESS_TOKEN}" ]; then
usage
fi
prepare_llama_cpp() {
git clone https://github.com/ggerganov/llama.cpp.git
pushd llama.cpp
git checkout 6961c4bd0b5176e10ab03b35394f1e9eab761792
mkdir build
pushd build
cmake ..
make quantize
popd
popd
}
update_model() {
CONVERTER=$1
MODEL_ID=$2
git clone https://huggingface.co/$MODEL_ID hf_model --depth 1
pushd hf_model
huggingface-cli lfs-enable-largefiles .
"$CONVERTER"
../llama.cpp/build/bin/quantize ./ggml/f16.v2.gguf ./ggml/q8_0.v2.gguf q8_0
huggingface-cli upload $MODEL_ID ggml/q8_0.v2.gguf ggml/q8_0.v2.gguf
popd
echo "Success!"
rm -rf hf_model
}
starcoder() {
python ../llama.cpp/convert-starcoder-hf-to-gguf.py . --outfile ./ggml/f16.v2.gguf 1
}
llama() {
python ../llama.cpp/convert.py . --outfile ./ggml/f16.v2.gguf --outtype f16
}
set -x
huggingface-cli login --token ${ACCESS_TOKEN}
prepare_llama_cpp || true
update_model starcoder TabbyML/StarCoder-1B
update_model starcoder TabbyML/StarCoder-3B
update_model starcoder TabbyML/StarCoder-7B
update_model llama TabbyML/CodeLlama-7B
update_model llama TabbyML/CodeLlama-13B
update_model llama TabbyML/Mistral-7B
update_model starcoder TabbyML/WizardCoder-3B