run_labeling.py

import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm

def chat(question, answer):
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
                    "즉, 사용자가 인공지능 어시스턴트에게 추가 질문을 진행합니다."
                    "현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
                    "출력 형식은 '1.str\n2.str\n3.str' 입니다."
                    "출력 예시는 다음과 같습니다."
                    "1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?\n2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요? \n3. 스페셜티 커피는 어떤 종류가 있나요?"
                )
            },
            {
                "role": "user",
                "content": (f"사용자:{question}\n 어시스턴트:{answer}"
                "위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."),
            }
        ],
    )
    return completion.choices[0].message.content.strip()

def add_recommendation(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    total_entries = len(data)
    
    for i, entry in enumerate(tqdm(data)):
        question = entry.get("질문", "")
        answer = entry.get("답변", "")
        recommendation = chat(question, answer)
        entry["추천질의"] = recommendation
        tqdm.write(f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)")
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

def concat_jsons(before_file_path, additional_file_path, after_file_path):
    with open(before_file_path, 'r', encoding='utf-8') as file_01:
        before_json = json.load(file_01)
        
    with open(additional_file_path, 'r', encoding='utf-8') as file_02:
        additional_json = json.load(file_02)
    
    for item in tqdm(additional_json):
        new_entry = {
            "질문": item.get("질문", ""),
            "답변": item.get("답변", ""),
            "추천질의": item.get("추천질의", ""),
            "그룹사": None,
            "생성일": None,
            "세션ID": None,
            "모델타입": None,
            "사용자 평가": None,
            "모인ID": None
        }
        before_json.append(new_entry)

    with open(after_file_path, 'w', encoding='utf-8') as file:
        json.dump(before_json, file, ensure_ascii=False, indent=4)


def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"
    
    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")
    
    return None


if __name__ == "__main__":
    testing()
    
    add_recommendation(input_file="QR_v1.4a.json",
                       output_file="QR_v1_4b.json")

    concat_jsons(before_file_path = "QR_v1.4.json", 
                 additional_file_path = "QR_v1.4b.json", 
                 after_file_path= "QR_v1.5.json")

    print(f"\033[94m ##DONE## \033[0m")