run_labeling.py 3.52 KB
Newer Older
kihoon.lee's avatar
upload  
kihoon.lee committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import openai
from openai import AsyncOpenAI, OpenAI
import os
import time
import pandas as pd
import json
from tqdm import tqdm

def chat(question, answer):
    client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
    
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                "role": "system",
                "content": (
                    "당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
                    "즉, 사용자가 인공지능 어시스턴트에게 추가 질문을 진행합니다."
                    "현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
                    "출력 형식은 '1.str\n2.str\n3.str' 입니다."
                    "출력 예시는 다음과 같습니다."
                    "1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?\n2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요? \n3. 스페셜티 커피는 어떤 종류가 있나요?"
                )
            },
            {
                "role": "user",
                "content": (f"사용자:{question}\n 어시스턴트:{answer}"
                "위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."),
            }
        ],
    )
    return completion.choices[0].message.content.strip()

def add_recommendation(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    total_entries = len(data)
    
    for i, entry in enumerate(tqdm(data)):
        question = entry.get("질문", "")
        answer = entry.get("답변", "")
        recommendation = chat(question, answer)
        entry["추천질의"] = recommendation
        tqdm.write(f"{i + 1}/{total_entries} 완료. ({(i + 1) / total_entries * 100:.2f}%)")
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

kihoon.lee's avatar
update    
kihoon.lee committed
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def concat_jsons(before_file_path, additional_file_path, after_file_path):
    with open(before_file_path, 'r', encoding='utf-8') as file_01:
        before_json = json.load(file_01)
        
    with open(additional_file_path, 'r', encoding='utf-8') as file_02:
        additional_json = json.load(file_02)
    
    for item in tqdm(additional_json):
        new_entry = {
            "질문": item.get("질문", ""),
            "답변": item.get("답변", ""),
            "추천질의": item.get("추천질의", ""),
            "그룹사": None,
            "생성일": None,
            "세션ID": None,
            "모델타입": None,
            "사용자 평가": None,
            "모인ID": None
        }
        before_json.append(new_entry)

    with open(after_file_path, 'w', encoding='utf-8') as file:
        json.dump(before_json, file, ensure_ascii=False, indent=4)



kihoon.lee's avatar
upload  
kihoon.lee committed
77
78
79
80
81
82
83
84
85
86

def testing():
    question = "안녕"
    answer = "안녕하세요. 무엇을 도와드릴까요?"
    
    recommendation = chat(question, answer)
    print(f"\033[94m{recommendation}\033[0m")
    
    return None

kihoon.lee's avatar
update    
kihoon.lee committed
87

kihoon.lee's avatar
upload  
kihoon.lee committed
88
89
90
if __name__ == "__main__":
    testing()
    
kihoon.lee's avatar
update    
kihoon.lee committed
91
92
93
94
95
96
    add_recommendation(input_file="QR_v1.4a.json",
                       output_file="QR_v1_4b.json")

    concat_jsons(before_file_path = "QR_v1.4.json", 
                 additional_file_path = "QR_v1.4b.json", 
                 after_file_path= "QR_v1.5.json")
kihoon.lee's avatar
upload  
kihoon.lee committed
97
98

    print(f"\033[94m ##DONE## \033[0m")