run_labeling.py 4.14 KB
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm

kihoon.lee's avatar
update    
kihoon.lee committed
9

kihoon.lee's avatar
upload  
kihoon.lee committed
10
def chat(question, answer):
kihoon.lee's avatar
update    
kihoon.lee committed
11
12
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

kihoon.lee's avatar
upload  
kihoon.lee committed
13
14
15
16
17
18
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
kihoon.lee's avatar
kihoon.lee committed
19
20
                    "현재 대화 주제를 파악한 뒤, 사용자가 AI 어시스턴트에게 질문하면 좋을 것 같은 질의 3가지를 추천합니다."
                    "출력 형식은 '1.str\n2.str\n3.str' 이며, 답변 예시는 다음과 같습니다."
kihoon.lee's avatar
kihoon.lee committed
21
                    "1. 오늘 날씨는 어떤가요?\n2. 기분 좋게 하루를 시작하려면 어떤걸 해야할까요? \n3. 효과적인 답변을 얻기 위해 제가 어떤 질문을 해야할까요?"
kihoon.lee's avatar
update    
kihoon.lee committed
22
                ),
kihoon.lee's avatar
upload  
kihoon.lee committed
23
24
25
            },
            {
                "role": "user",
kihoon.lee's avatar
update    
kihoon.lee committed
26
                "content": (
kihoon.lee's avatar
kihoon.lee committed
27
28
                    f"User: '{question}'\n AI:'{answer}'\n\n"
                    "도움을 받거나 정보를 얻고 싶은데, 추가적으로 제가 AI에게 어떤 질문을 하면 좋을까요? 친절한 어투로 질문하고 싶어요."
kihoon.lee's avatar
update    
kihoon.lee committed
29
30
                ),
            },
kihoon.lee's avatar
upload  
kihoon.lee committed
31
32
33
34
        ],
    )
    return completion.choices[0].message.content.strip()

kihoon.lee's avatar
update    
kihoon.lee committed
35

kihoon.lee's avatar
upload  
kihoon.lee committed
36
def add_recommendation(input_file, output_file):
kihoon.lee's avatar
update    
kihoon.lee committed
37
    with open(input_file, "r", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
38
        data = json.load(f)
kihoon.lee's avatar
update    
kihoon.lee committed
39

kihoon.lee's avatar
upload  
kihoon.lee committed
40
    total_entries = len(data)
kihoon.lee's avatar
update    
kihoon.lee committed
41

kihoon.lee's avatar
upload  
kihoon.lee committed
42
    for i, entry in enumerate(tqdm(data)):
kihoon.lee's avatar
kihoon.lee committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
        try:
            question = entry.get("질문", "")
            answer = entry.get("답변", "")
            recommendation = chat(question, answer)
            entry["추천질의"] = recommendation
            tqdm.write(
                f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)"
            )
            if (i + 1) % 100 == 0:
                with open(output_file, "w", encoding="utf-8") as f:
                    json.dump(data, f, ensure_ascii=False, indent=4)
        except Exception as e:
            tqdm.write(f"오류 발생: {e}")
            time.sleep(10)
kihoon.lee's avatar
update    
kihoon.lee committed
57
    with open(output_file, "w", encoding="utf-8") as f:
kihoon.lee's avatar
upload  
kihoon.lee committed
58
59
        json.dump(data, f, ensure_ascii=False, indent=4)

kihoon.lee's avatar
update    
kihoon.lee committed
60

kihoon.lee's avatar
update    
kihoon.lee committed
61
def concat_jsons(before_file_path, additional_file_path, after_file_path):
kihoon.lee's avatar
update    
kihoon.lee committed
62
    with open(before_file_path, "r", encoding="utf-8") as file_01:
kihoon.lee's avatar
update    
kihoon.lee committed
63
        before_json = json.load(file_01)
kihoon.lee's avatar
update    
kihoon.lee committed
64
65

    with open(additional_file_path, "r", encoding="utf-8") as file_02:
kihoon.lee's avatar
update    
kihoon.lee committed
66
        additional_json = json.load(file_02)
kihoon.lee's avatar
update    
kihoon.lee committed
67

kihoon.lee's avatar
update    
kihoon.lee committed
68
69
    for item in tqdm(additional_json):
        new_entry = {
kihoon.lee's avatar
kihoon.lee committed
70
71
72
73
74
75
76
77
78
            "질문": item.get("질문") if item.get("질문") else None,
            "답변": item.get("답변") if item.get("답변") else None,
            "추천질의": item.get("추천질의") if item.get("추천질의") else None,
            "그룹사": item.get("그룹사") if item.get("그룹사") else None,
            "생성일": item.get("생성일") if item.get("생성일") else None,
            "세션ID": item.get("세션ID") if item.get("세션ID") else None,
            "모델타입": item.get("모델타입") if item.get("모델타입") else None,
            "사용자 평가": item.get("사용자 평가") if item.get("사용자 평가") else None,
            "모인ID": item.get("모인ID") if item.get("모인ID") else None,
kihoon.lee's avatar
update    
kihoon.lee committed
79
80
81
        }
        before_json.append(new_entry)

kihoon.lee's avatar
update    
kihoon.lee committed
82
    with open(after_file_path, "w", encoding="utf-8") as file:
kihoon.lee's avatar
update    
kihoon.lee committed
83
84
85
        json.dump(before_json, file, ensure_ascii=False, indent=4)


kihoon.lee's avatar
upload  
kihoon.lee committed
86
87
88
def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"
kihoon.lee's avatar
update    
kihoon.lee committed
89

kihoon.lee's avatar
upload  
kihoon.lee committed
90
91
    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")
kihoon.lee's avatar
update    
kihoon.lee committed
92

kihoon.lee's avatar
upload  
kihoon.lee committed
93
94
    return None

kihoon.lee's avatar
kihoon.lee committed
95
def main():
kihoon.lee's avatar
update    
kihoon.lee committed
96
97
98
99
100
101
102
    add_recommendation(input_file="QR_v1.4a.json", output_file="QR_v1_4b.json")

    concat_jsons(
        before_file_path="QR_v1.4.json",
        additional_file_path="QR_v1.4b.json",
        after_file_path="QR_v1.5.json",
    )
kihoon.lee's avatar
upload  
kihoon.lee committed
103
104

    print(f"\033[94m ##DONE## \033[0m")
kihoon.lee's avatar
kihoon.lee committed
105
106
107
108
109


if __name__ == "__main__":
    testing()

kihoon.lee's avatar
update    
kihoon.lee committed
110
    add_recommendation(input_file="QR_v1.4.json", output_file="QR_v1.5.json")
kihoon.lee's avatar
kihoon.lee committed
111
112
113
    
    print(f"\033[94m ##DONE## \033[0m")