run_labeling.py

import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm


def chat(question, answer):
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
                    "사용자는 인공지능 어시스턴트에게 추가 질문을 진행합니다."
                    "현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
                    "출력 형식은 '1.str\n2.str\n3.str' 입니다."
                    "출력 예시는 다음과 같습니다."
                    "1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?\n2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요? \n3. 스페셜티 커피는 어떤 종류가 있나요?"
                ),
            },
            {
                "role": "user",
                "content": (
                    f"사용자:{question}\n 어시스턴트:{answer}"
                    "위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."
                ),
            },
        ],
    )
    return completion.choices[0].message.content.strip()


def add_recommendation(input_file, output_file):
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)

    total_entries = len(data)

    for i, entry in enumerate(tqdm(data)):
        question = entry.get("질문", "")
        answer = entry.get("답변", "")
        recommendation = chat(question, answer)
        entry["추천질의"] = recommendation
        tqdm.write(
            f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)"
        )

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)


def concat_jsons(before_file_path, additional_file_path, after_file_path):
    with open(before_file_path, "r", encoding="utf-8") as file_01:
        before_json = json.load(file_01)

    with open(additional_file_path, "r", encoding="utf-8") as file_02:
        additional_json = json.load(file_02)

    for item in tqdm(additional_json):
        new_entry = {
            "질문": item.get("질문", ""),
            "답변": item.get("답변", ""),
            "추천질의": item.get("추천질의", ""),
            "그룹사": None,
            "생성일": None,
            "세션ID": None,
            "모델타입": None,
            "사용자 평가": None,
            "모인ID": None,
        }
        before_json.append(new_entry)

    with open(after_file_path, "w", encoding="utf-8") as file:
        json.dump(before_json, file, ensure_ascii=False, indent=4)


def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"

    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")

    return None


if __name__ == "__main__":
    testing()

    add_recommendation(input_file="QR_v1.4a.json", output_file="QR_v1_4b.json")

    concat_jsons(
        before_file_path="QR_v1.4.json",
        additional_file_path="QR_v1.4b.json",
        after_file_path="QR_v1.5.json",
    )

    print(f"\033[94m ##DONE## \033[0m")