run_labeling.py 3.51 KB
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm

kihoon.lee's avatar
update    
kihoon.lee committed
9

kihoon.lee's avatar
upload  
kihoon.lee committed
10
def chat(question, answer):
kihoon.lee's avatar
update    
kihoon.lee committed
11
12
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

kihoon.lee's avatar
upload  
kihoon.lee committed
13
14
15
16
17
18
19
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
kihoon.lee's avatar
update    
kihoon.lee committed
20
                    "사용자는 인공지능 어시스턴트에게 추가 질문을 진행합니다."
kihoon.lee's avatar
upload  
kihoon.lee committed
21
22
23
24
                    "현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
                    "출력 형식은 '1.str\n2.str\n3.str' 입니다."
                    "출력 예시는 다음과 같습니다."
                    "1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?\n2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요? \n3. 스페셜티 커피는 어떤 종류가 있나요?"
kihoon.lee's avatar
update    
kihoon.lee committed
25
                ),
kihoon.lee's avatar
upload  
kihoon.lee committed
26
27
28
            },
            {
                "role": "user",
kihoon.lee's avatar
update    
kihoon.lee committed
29
30
31
32
33
                "content": (
                    f"사용자:{question}\n 어시스턴트:{answer}"
                    "위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."
                ),
            },
kihoon.lee's avatar
upload  
kihoon.lee committed
34
35
36
37
        ],
    )
    return completion.choices[0].message.content.strip()

kihoon.lee's avatar
update    
kihoon.lee committed
38

kihoon.lee's avatar
upload  
kihoon.lee committed
39
def add_recommendation(input_file, output_file):
kihoon.lee's avatar
update    
kihoon.lee committed
40
    with open(input_file, "r", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
41
        data = json.load(f)
kihoon.lee's avatar
update    
kihoon.lee committed
42

kihoon.lee's avatar
upload  
kihoon.lee committed
43
    total_entries = len(data)
kihoon.lee's avatar
update    
kihoon.lee committed
44

kihoon.lee's avatar
upload  
kihoon.lee committed
45
46
47
48
49
    for i, entry in enumerate(tqdm(data)):
        question = entry.get("질문", "")
        answer = entry.get("답변", "")
        recommendation = chat(question, answer)
        entry["추천질의"] = recommendation
kihoon.lee's avatar
update    
kihoon.lee committed
50
51
52
53
54
        tqdm.write(
            f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)"
        )

    with open(output_file, "w", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
55
56
        json.dump(data, f, ensure_ascii=False, indent=4)

kihoon.lee's avatar
update    
kihoon.lee committed
57

kihoon.lee's avatar
update    
kihoon.lee committed
58
def concat_jsons(before_file_path, additional_file_path, after_file_path):
kihoon.lee's avatar
update    
kihoon.lee committed
59
    with open(before_file_path, "r", encoding="utf-8") as file_01:
kihoon.lee's avatar
update    
kihoon.lee committed
60
        before_json = json.load(file_01)
kihoon.lee's avatar
update    
kihoon.lee committed
61
62

    with open(additional_file_path, "r", encoding="utf-8") as file_02:
kihoon.lee's avatar
update    
kihoon.lee committed
63
        additional_json = json.load(file_02)
kihoon.lee's avatar
update    
kihoon.lee committed
64

kihoon.lee's avatar
update    
kihoon.lee committed
65
66
67
68
69
70
71
72
73
74
    for item in tqdm(additional_json):
        new_entry = {
            "질문": item.get("질문", ""),
            "답변": item.get("답변", ""),
            "추천질의": item.get("추천질의", ""),
            "그룹사": None,
            "생성일": None,
            "세션ID": None,
            "모델타입": None,
            "사용자 평가": None,
kihoon.lee's avatar
update    
kihoon.lee committed
75
            "모인ID": None,
kihoon.lee's avatar
update    
kihoon.lee committed
76
77
78
        }
        before_json.append(new_entry)

kihoon.lee's avatar
update    
kihoon.lee committed
79
    with open(after_file_path, "w", encoding="utf-8") as file:
kihoon.lee's avatar
update    
kihoon.lee committed
80
81
82
        json.dump(before_json, file, ensure_ascii=False, indent=4)


kihoon.lee's avatar
upload  
kihoon.lee committed
83
84
85
def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"
kihoon.lee's avatar
update    
kihoon.lee committed
86

kihoon.lee's avatar
upload  
kihoon.lee committed
87
88
    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")
kihoon.lee's avatar
update    
kihoon.lee committed
89

kihoon.lee's avatar
upload  
kihoon.lee committed
90
91
    return None

kihoon.lee's avatar
update    
kihoon.lee committed
92

kihoon.lee's avatar
upload  
kihoon.lee committed
93
94
if __name__ == "__main__":
    testing()
kihoon.lee's avatar
update    
kihoon.lee committed
95

kihoon.lee's avatar
update    
kihoon.lee committed
96
97
98
99
100
101
102
    add_recommendation(input_file="QR_v1.4a.json", output_file="QR_v1_4b.json")

    concat_jsons(
        before_file_path="QR_v1.4.json",
        additional_file_path="QR_v1.4b.json",
        after_file_path="QR_v1.5.json",
    )
kihoon.lee's avatar
upload  
kihoon.lee committed
103
104

    print(f"\033[94m ##DONE## \033[0m")