run_labeling.py 3.87 KB
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm

kihoon.lee's avatar
update    
kihoon.lee committed
9

kihoon.lee's avatar
upload  
kihoon.lee committed
10
def chat(question, answer):
kihoon.lee's avatar
update    
kihoon.lee committed
11
12
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

kihoon.lee's avatar
upload  
kihoon.lee committed
13
14
15
16
17
18
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
kihoon.lee's avatar
kihoon.lee committed
19
20
21
                    "현재 대화 주제를 파악한 뒤, 사용자가 AI 어시스턴트에게 질문하면 좋을 것 같은 질의 3가지를 추천합니다."
                    "출력 형식은 '1.str\n2.str\n3.str' 이며, 답변 예시는 다음과 같습니다."
                    "1. 오늘 날씨는 어떤가요?\n2. 기분 좋게 하루를 시작하려면 어떤걸 해야할까요? \n3. 효과적인 답변을 얻기 위해 제가 어떤 질문을 해야하나요?"
kihoon.lee's avatar
update    
kihoon.lee committed
22
                ),
kihoon.lee's avatar
upload  
kihoon.lee committed
23
24
25
            },
            {
                "role": "user",
kihoon.lee's avatar
update    
kihoon.lee committed
26
                "content": (
kihoon.lee's avatar
kihoon.lee committed
27
28
                    f"User: '{question}'\n AI:'{answer}'\n\n"
                    "도움을 받거나 정보를 얻고 싶은데, 추가적으로 제가 AI에게 어떤 질문을 하면 좋을까요? 친절한 어투로 질문하고 싶어요."
kihoon.lee's avatar
update    
kihoon.lee committed
29
30
                ),
            },
kihoon.lee's avatar
upload  
kihoon.lee committed
31
32
33
34
        ],
    )
    return completion.choices[0].message.content.strip()

kihoon.lee's avatar
update    
kihoon.lee committed
35

kihoon.lee's avatar
upload  
kihoon.lee committed
36
def add_recommendation(input_file, output_file):
kihoon.lee's avatar
update    
kihoon.lee committed
37
    with open(input_file, "r", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
38
        data = json.load(f)
kihoon.lee's avatar
update    
kihoon.lee committed
39

kihoon.lee's avatar
upload  
kihoon.lee committed
40
    total_entries = len(data)
kihoon.lee's avatar
update    
kihoon.lee committed
41

kihoon.lee's avatar
upload  
kihoon.lee committed
42
43
44
45
46
    for i, entry in enumerate(tqdm(data)):
        question = entry.get("질문", "")
        answer = entry.get("답변", "")
        recommendation = chat(question, answer)
        entry["추천질의"] = recommendation
kihoon.lee's avatar
update    
kihoon.lee committed
47
48
49
        tqdm.write(
            f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)"
        )
kihoon.lee's avatar
kihoon.lee committed
50
51
        if i == 10:
            break
kihoon.lee's avatar
update    
kihoon.lee committed
52
    with open(output_file, "w", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
53
54
        json.dump(data, f, ensure_ascii=False, indent=4)

kihoon.lee's avatar
update    
kihoon.lee committed
55

kihoon.lee's avatar
update    
kihoon.lee committed
56
def concat_jsons(before_file_path, additional_file_path, after_file_path):
kihoon.lee's avatar
update    
kihoon.lee committed
57
    with open(before_file_path, "r", encoding="utf-8") as file_01:
kihoon.lee's avatar
update    
kihoon.lee committed
58
        before_json = json.load(file_01)
kihoon.lee's avatar
update    
kihoon.lee committed
59
60

    with open(additional_file_path, "r", encoding="utf-8") as file_02:
kihoon.lee's avatar
update    
kihoon.lee committed
61
        additional_json = json.load(file_02)
kihoon.lee's avatar
update    
kihoon.lee committed
62

kihoon.lee's avatar
update    
kihoon.lee committed
63
64
    for item in tqdm(additional_json):
        new_entry = {
kihoon.lee's avatar
kihoon.lee committed
65
66
67
68
69
70
71
72
73
            "질문": item.get("질문") if item.get("질문") else None,
            "답변": item.get("답변") if item.get("답변") else None,
            "추천질의": item.get("추천질의") if item.get("추천질의") else None,
            "그룹사": item.get("그룹사") if item.get("그룹사") else None,
            "생성일": item.get("생성일") if item.get("생성일") else None,
            "세션ID": item.get("세션ID") if item.get("세션ID") else None,
            "모델타입": item.get("모델타입") if item.get("모델타입") else None,
            "사용자 평가": item.get("사용자 평가") if item.get("사용자 평가") else None,
            "모인ID": item.get("모인ID") if item.get("모인ID") else None,
kihoon.lee's avatar
update    
kihoon.lee committed
74
75
76
        }
        before_json.append(new_entry)

kihoon.lee's avatar
update    
kihoon.lee committed
77
    with open(after_file_path, "w", encoding="utf-8") as file:
kihoon.lee's avatar
update    
kihoon.lee committed
78
79
80
        json.dump(before_json, file, ensure_ascii=False, indent=4)


kihoon.lee's avatar
upload  
kihoon.lee committed
81
82
83
def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"
kihoon.lee's avatar
update    
kihoon.lee committed
84

kihoon.lee's avatar
upload  
kihoon.lee committed
85
86
    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")
kihoon.lee's avatar
update    
kihoon.lee committed
87

kihoon.lee's avatar
upload  
kihoon.lee committed
88
89
    return None

kihoon.lee's avatar
kihoon.lee committed
90
def main():
kihoon.lee's avatar
update    
kihoon.lee committed
91
92
93
94
95
96
97
    add_recommendation(input_file="QR_v1.4a.json", output_file="QR_v1_4b.json")

    concat_jsons(
        before_file_path="QR_v1.4.json",
        additional_file_path="QR_v1.4b.json",
        after_file_path="QR_v1.5.json",
    )
kihoon.lee's avatar
upload  
kihoon.lee committed
98
99

    print(f"\033[94m ##DONE## \033[0m")
kihoon.lee's avatar
kihoon.lee committed
100
101
102
103
104
105
106
107
108


if __name__ == "__main__":
    testing()

    add_recommendation(input_file="QR_v1.4.json", output_file="QR_v1_5.json")
    
    print(f"\033[94m ##DONE## \033[0m")