run_labeling.py

import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm


def chat(question, answer):
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "현재 대화 주제를 파악한 뒤, 사용자가 AI 어시스턴트에게 질문하면 좋을 것 같은 질의 3가지를 추천합니다."
                    "출력 형식은 '1.str\n2.str\n3.str' 이며, 답변 예시는 다음과 같습니다."
                    "1. 오늘 날씨는 어떤가요?\n2. 기분 좋게 하루를 시작하려면 어떤걸 해야할까요? \n3. 효과적인 답변을 얻기 위해 제가 어떤 질문을 해야할까요?"
                ),
            },
            {
                "role": "user",
                "content": (
                    f"User: '{question}'\n AI:'{answer}'\n\n"
                    "도움을 받거나 정보를 얻고 싶은데, 추가적으로 제가 AI에게 어떤 질문을 하면 좋을까요? 친절한 어투로 질문하고 싶어요."
                ),
            },
        ],
    )
    return completion.choices[0].message.content.strip()


def add_recommendation(input_file, output_file):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    total_entries = len(data)

    for i, entry in enumerate(tqdm(data)):
        try:
            question = entry.get("질문", "")
            answer = entry.get("답변", "")
            recommendation = chat(question, answer)
            entry["추천질의"] = recommendation
            tqdm.write(
                f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)"
            )
            if (i + 1) % 100 == 0:
                with open(output_file, "w", encoding="utf-8") as f:
                    json.dump(data, f, ensure_ascii=False, indent=4)
        except Exception as e:
            tqdm.write(f"오류 발생: {e}")
            time.sleep(10)
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)


def concat_jsons(before_file_path, additional_file_path, after_file_path):
    with open(before_file_path, "r", encoding="utf-8") as file_01:
        before_json = json.load(file_01)

    with open(additional_file_path, "r", encoding="utf-8") as file_02:
        additional_json = json.load(file_02)

    for item in tqdm(additional_json):
        new_entry = {
            "질문": item.get("질문") if item.get("질문") else None,
            "답변": item.get("답변") if item.get("답변") else None,
            "추천질의": item.get("추천질의") if item.get("추천질의") else None,
            "그룹사": item.get("그룹사") if item.get("그룹사") else None,
            "생성일": item.get("생성일") if item.get("생성일") else None,
            "세션ID": item.get("세션ID") if item.get("세션ID") else None,
            "모델타입": item.get("모델타입") if item.get("모델타입") else None,
            "사용자 평가": item.get("사용자 평가") if item.get("사용자 평가") else None,
            "모인ID": item.get("모인ID") if item.get("모인ID") else None,
        }
        before_json.append(new_entry)

    with open(after_file_path, "w", encoding="utf-8") as file:
        json.dump(before_json, file, ensure_ascii=False, indent=4)


def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"

    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")

    return None

def main():
    add_recommendation(input_file="QR_v1.4a.json", output_file="QR_v1_4b.json")

    concat_jsons(
        before_file_path="QR_v1.4.json",
        additional_file_path="QR_v1.4b.json",
        after_file_path="QR_v1.5.json",
    )

    print(f"\033[94m ##DONE## \033[0m")


if __name__ == "__main__":
    testing()

    add_recommendation(input_file="QR_v1.4.json", output_file="QR_v1.5.json")
    
    print(f"\033[94m ##DONE## \033[0m")