Skip to content

Commit f1e50d4

Browse files
authored
[Update] Update LiveMathBench (#1809)
* Update LiveMathBench * Update New O1 Evaluation * Update O1 evaluation
1 parent 8fdb72f commit f1e50d4

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from opencompass.openicl.icl_prompt_template import PromptTemplate
2+
from opencompass.openicl.icl_retriever import ZeroRetriever
3+
from opencompass.openicl.icl_inferencer import GenInferencer
4+
5+
from opencompass.datasets.livemathbench import LiveMathBenchDataset, LiveMathBenchEvaluator
6+
7+
8+
livemathbench_dataset = dict(
9+
abbr='LiveMathBench-v202412-greedy', # If you change the K and replication, you need to change the dataset name.
10+
type=LiveMathBenchDataset,
11+
path='opencompass/LiveMathBench',
12+
k=1,
13+
replication=1,
14+
dataset_splits=['CNMO', 'CCEE', 'AMC', 'WLPMC'],
15+
dataset_languages=['cn', 'en'],
16+
cot=False,
17+
version='202412',
18+
reader_cfg=dict(
19+
input_columns=['prompt'],
20+
output_column='answer'
21+
),
22+
infer_cfg=dict(
23+
prompt_template=dict(
24+
type=PromptTemplate,
25+
template=dict(
26+
round=[
27+
dict(role='HUMAN', prompt='{prompt}'),
28+
]
29+
)
30+
),
31+
retriever=dict(type=ZeroRetriever),
32+
inferencer=dict(
33+
type=GenInferencer,
34+
max_out_len=16384,
35+
),
36+
),
37+
eval_cfg=dict(
38+
evaluator=dict(
39+
type=LiveMathBenchEvaluator,
40+
model_name='',
41+
url=[],
42+
use_extract_model=False,
43+
extract_url=[],
44+
extract_model_name='',
45+
k=[1],
46+
replication=1,
47+
thresholds=[0.0, 0.25, 0.5, 0.75, 1.0]
48+
)
49+
)
50+
)
51+
livemathbench_datasets = [livemathbench_dataset]

0 commit comments

Comments
 (0)