import argparse import glob import pandas as pd from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils.dataframe import dataframe_to_rows def process_file(file_path, is_single_turn=False): df = pd.read_json(file_path, orient="records", lines=True, encoding="utf-8-sig") df['single_score'] = df['query_single'].apply(lambda x: x['judge_score']) if not is_single_turn: df['multi_score'] = df['query_multi'].apply(lambda x: x['judge_score']) agg_dict = {'single_score': 'mean'} if not is_single_turn: agg_dict['multi_score'] = 'mean' category_scores = df.groupby('category').agg(agg_dict).round(2) overall_scores = pd.DataFrame({ 'single_score': [df['single_score'].mean().round(2)], 'multi_score': [df['multi_score'].mean().round(2)] if not is_single_turn else [pd.NA] }, index=['Overall']) return pd.concat([category_scores, overall_scores]) def style_excel(ws): header_fill = PatternFill(start_color="FFD700", end_color="FFD700", fill_type="solid") overall_fill = PatternFill(start_color="E6E6E6", end_color="E6E6E6", fill_type="solid") border = Border(left=Side(style='thin'), right=Side(style='thin'), top=Side(style='thin'), bottom=Side(style='thin')) for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=ws.max_column): for cell in row: cell.border = border cell.alignment = Alignment(horizontal='center', vertical='center') if cell.row == 1: cell.font = Font(bold=True) cell.fill = header_fill elif cell.row == ws.max_row: # Overall 행 cell.font = Font(bold=True) cell.fill = overall_fill for column in ws.columns: max_length = 0 column_letter = column[0].column_letter for cell in column: try: if len(str(cell.value)) > max_length: max_length = len(cell.value) except: pass adjusted_width = (max_length + 2) ws.column_dimensions[column_letter].width = adjusted_width def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_dir", help="Input directory containing JSONL files", required=True) parser.add_argument("-o", "--output", help="Output Excel File Name", default="combined_scores.xlsx") args = parser.parse_args() file_patterns = [ ("cot_1_shot", "cot-1-shot.jsonl", False), ("1_shot", "1-shot.jsonl", False), ("default", "default.jsonl", False), ("lotte_single_turn", "lotte_single_turn.jsonl", True) ] all_results = {} for name, pattern, is_single_turn in file_patterns: file_path = glob.glob(f"{args.input_dir}/{pattern}")[0] results = process_file(file_path, is_single_turn) all_results[name] = results combined_results = pd.concat(all_results, axis=1) combined_results.columns = [f"{col[0]}_{col[1]}" for col in combined_results.columns] combined_results = combined_results.reset_index().rename(columns={'index': 'Category'}) # Overall을 마지막으로 이동 overall_row = combined_results[combined_results['Category'] == 'Overall'] combined_results = pd.concat([combined_results[combined_results['Category'] != 'Overall'], overall_row]) # lotte_single_turn의 multi_score 열 제거 combined_results = combined_results.drop('lotte_single_turn_multi_score', axis=1) # lotte_single_turn_single_score을 lotte_single_turn으로 변경 combined_results = combined_results.rename(columns={'lotte_single_turn_single_score': 'lotte_single_turn'}) # Excel 파일로 저장 wb = Workbook() ws = wb.active ws.title = "Combined Scores" for r in dataframe_to_rows(combined_results, index=False, header=True): ws.append(r) style_excel(ws) wb.save(args.output) print(f"Combined scores have been saved to {args.output}") if __name__ == "__main__": main()