import os
import pandas as pd

def collect_combined_scores(evaluated_dir):
    """
    evaluated_dir 내의 모든 combined_scores.xlsx 파일을 검색하여 리스트로 반환합니다.
    """
    combined_scores_files = []
    for root, dirs, files in os.walk(evaluated_dir):
        for file in files:
            if file == 'combined_scores.xlsx':
                combined_scores_files.append(os.path.join(root, file))
    return combined_scores_files

def extract_model_name(file_path):
    """
    파일 경로에서 모델 이름을 추출합니다.
    예: /path/to/model1/combined_scores.xlsx -> model1
    """
    return os.path.basename(os.path.dirname(file_path))

def drop_empty_string_columns(sheet_df):
    """
    각 컬럼의 문자열 길이를 기준으로 모든 값의 길이가 0인 컬럼을 삭제합니다.
    """
    for column in sheet_df.columns:
        # 각 셀의 문자열 길이를 측정 (NaN은 무시)
        string_lengths = sheet_df[column].dropna().astype(str).apply(len)
        
        # 모든 값의 문자열 길이가 0인 컬럼 삭제
        if string_lengths.sum() == 0:
            sheet_df.drop(columns=[column], inplace=True)
            
    return sheet_df

def create_score_sheets(combined_scores_files, categories, score_types):
    """
    각 스코어 유형별로 데이터프레임을 생성하여 딕셔너리에 저장합니다.
    """
    # 초기화: 각 스코어 유형에 대해 빈 데이터프레임 생성
    score_sheets = {score_type: pd.DataFrame(columns=categories) for score_type in score_types}
    
    for file in combined_scores_files:
        model_name = extract_model_name(file)
        try:
            df = pd.read_excel(file, index_col=0)  # 첫 번째 열을 인덱스로 설정 (카테고리)
        except Exception as e:
            print(f"Error reading {file}: {e}")
            continue
        
        for score_type in score_types:
            if score_type in df.columns:
                # 시트별 데이터프레임에 모델 이름을 인덱스로 추가하고 스코어를 행으로 추가
                # 존재하지 않는 카테고리는 NaN으로 채워짐
                score_sheets[score_type].loc[model_name] = df.loc[categories, score_type]
            else:
                print(f"Warning: '{score_type}' not found in {file}")
    
    # 각 시트에서 문자열 길이를 기준으로 빈 컬럼 삭제 및 'Score Average' 열 추가
    for score_type, sheet_df in score_sheets.items():
        # 1. 문자열 길이 기준으로 빈 컬럼 삭제
        drop_empty_string_columns(sheet_df)
        
        # 2. 기존 'Score Average' 열이 있을 경우 삭제
        if 'Score Average' in sheet_df.columns:
            sheet_df.drop(columns=['Score Average'], inplace=True)
        
        # 3. 각 모델별 평균값 계산
        sheet_df['Score Average'] = sheet_df.mean(axis=1, skipna=True)
        
        # 데이터프레임을 업데이트
        score_sheets[score_type] = sheet_df
    
    return score_sheets

def save_to_excel(score_sheets, output_file):
    """
    딕셔너리에 저장된 데이터프레임을 각 시트로 저장하여 엑셀 파일로 저장합니다.
    """
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        for sheet_name, df in score_sheets.items():
            df.to_excel(writer, sheet_name=sheet_name)
    print(f"모든 스코어가 '{output_file}' 파일에 저장되었습니다.")

def main():
    evaluated_dir = 'evaluated'  # 'evaluated' 디렉토리 경로
    output_file = 'merged_scores.xlsx'  # 출력 엑셀 파일 이름
    
    # 1. 'evaluated' 디렉토리 내 모든 combined_scores.xlsx 파일 검색
    combined_scores_files = collect_combined_scores(evaluated_dir)
    if not combined_scores_files:
        print("No 'combined_scores.xlsx' files found in the 'evaluated' directory.")
        return
    
    print(f"Found {len(combined_scores_files)} 'combined_scores.xlsx' files.")
    
    # 2. 컬럼과 행 정의
    score_types = [
        'cot_1_shot_single_score',
        'cot_1_shot_multi_score',
        '1_shot_single_score',
        '1_shot_multi_score',
        'default_single_score',
        'default_multi_score',
        'lotte_single_turn'
    ]
    
    categories = [
        '글쓰기(Writing)',
        '문법(Grammar)',
        '수학(Math)',
        '이해(Understanding)',
        '추론(Reasoning)',
        '코딩(Coding)',
        'lotte_qa',
        'meeting_summary',
        'mrc',
        'review_summary',
        'search_keyword',
        'search_summary',
        'task_assistant_hire',
        'task_assistant_mail_introduce',
        'task_assistant_mail_meeting',
        'task_assistant_mail_pr',
        'task_assistant_mail_share',
        'text2sql'
    ]
    
    # 3. 데이터 수집 및 시트 생성
    score_sheets = create_score_sheets(combined_scores_files, categories, score_types)
    
    # 4. 엑셀 파일로 저장
    save_to_excel(score_sheets, output_file)

if __name__ == "__main__":
    main()