lotte-score.py 1.33 KB
Newer Older
kihoon.lee's avatar
update  
kihoon.lee committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import argparse
import glob
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument("-p", "--print", help="Judge Output File Location", default=None)
args = parser.parse_args()

if args.print is None:
    raise ValueError("Judge Output File Location is required")

category_scores = {}
total_single_scores = []

for file_path in glob.glob(args.print):
    file = pd.read_json(file_path, orient="records", encoding="utf-8-sig", lines=True)
    for item in file.to_dict(orient="records"):
        category = item["category"]
        single_score = item["query_single"]["judge_score"]

        if category not in category_scores:
            category_scores[category] = {"single_scores": []}

        category_scores[category]["single_scores"].append(single_score)
        total_single_scores.append(single_score)

table_header = "| Category | Single turn |\n|---|---|"

table_rows = []
for category, scores in category_scores.items():
    avg_single = sum(scores["single_scores"]) / len(scores["single_scores"])
    table_rows.append(f"| {category} | {avg_single:.2f} |")

print(table_header)
for row in table_rows:
    print(row)

avg_total_single = sum(total_single_scores) / len(total_single_scores)

print("\n| Category | Score |\n|---|---|")
print(f"| Single turn | {avg_total_single:.2f} |")
print(f"| Overall | {avg_total_single:.2f} |")