Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
kihoon.lee
dataset
Commits
6716d020
Commit
6716d020
authored
Aug 06, 2024
by
kihoon.lee
Browse files
update
parent
8c65ecc4
Changes
1
Hide whitespace changes
Inline
Side-by-side
question_recommendation/run_labeling.py
View file @
6716d020
...
@@ -6,9 +6,10 @@ import pandas as pd
...
@@ -6,9 +6,10 @@ import pandas as pd
import
json
import
json
from
tqdm
import
tqdm
from
tqdm
import
tqdm
def
chat
(
question
,
answer
):
def
chat
(
question
,
answer
):
client
=
OpenAI
(
api_key
=
os
.
getenv
(
'
OPENAI_API_KEY
'
))
client
=
OpenAI
(
api_key
=
os
.
getenv
(
"
OPENAI_API_KEY
"
))
completion
=
client
.
chat
.
completions
.
create
(
completion
=
client
.
chat
.
completions
.
create
(
model
=
"gpt-4o"
,
model
=
"gpt-4o"
,
messages
=
[
messages
=
[
...
@@ -16,45 +17,51 @@ def chat(question, answer):
...
@@ -16,45 +17,51 @@ def chat(question, answer):
"role"
:
"system"
,
"role"
:
"system"
,
"content"
:
(
"content"
:
(
"당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
"당신은 대화를 오래 이어가기위해 사용자에게 질문을 추천해주는 봇입니다."
"
즉,
사용자
가
인공지능 어시스턴트에게 추가 질문을 진행합니다."
"사용자
는
인공지능 어시스턴트에게 추가 질문을 진행합니다."
"현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
"현재 대화에서 사용자가 어시스턴트에게 질문하면 좋을 것 같은 3가지 질의를 추천해주세요."
"출력 형식은 '1.str
\n
2.str
\n
3.str' 입니다."
"출력 형식은 '1.str
\n
2.str
\n
3.str' 입니다."
"출력 예시는 다음과 같습니다."
"출력 예시는 다음과 같습니다."
"1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?
\n
2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요?
\n
3. 스페셜티 커피는 어떤 종류가 있나요?"
"1. 메뉴에 있는 음료 중 가장 비싼건 무엇인가요?
\n
2. 메뉴에서 따뜻한 음료와 차가운 음료 옵션을 모두 제공하는 항목은 무엇인가요?
\n
3. 스페셜티 커피는 어떤 종류가 있나요?"
)
)
,
},
},
{
{
"role"
:
"user"
,
"role"
:
"user"
,
"content"
:
(
f
"사용자:
{
question
}
\n
어시스턴트:
{
answer
}
"
"content"
:
(
"위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."
),
f
"사용자:
{
question
}
\n
어시스턴트:
{
answer
}
"
}
"위 내용에 대해 어시스턴트에게 추가로 물어볼만한 질의를 3개 추천해주세요."
),
},
],
],
)
)
return
completion
.
choices
[
0
].
message
.
content
.
strip
()
return
completion
.
choices
[
0
].
message
.
content
.
strip
()
def
add_recommendation
(
input_file
,
output_file
):
def
add_recommendation
(
input_file
,
output_file
):
with
open
(
input_file
,
'r'
,
encoding
=
'
utf-8
'
)
as
f
:
with
open
(
input_file
,
"r"
,
encoding
=
"
utf-8
"
)
as
f
:
data
=
json
.
load
(
f
)
data
=
json
.
load
(
f
)
total_entries
=
len
(
data
)
total_entries
=
len
(
data
)
for
i
,
entry
in
enumerate
(
tqdm
(
data
)):
for
i
,
entry
in
enumerate
(
tqdm
(
data
)):
question
=
entry
.
get
(
"질문"
,
""
)
question
=
entry
.
get
(
"질문"
,
""
)
answer
=
entry
.
get
(
"답변"
,
""
)
answer
=
entry
.
get
(
"답변"
,
""
)
recommendation
=
chat
(
question
,
answer
)
recommendation
=
chat
(
question
,
answer
)
entry
[
"추천질의"
]
=
recommendation
entry
[
"추천질의"
]
=
recommendation
tqdm
.
write
(
f
"
{
i
+
1
}
/
{
total_entries
}
완료. (
{
(
i
+
1
)
/
total_entries
*
100
:
.
2
f
}
%)"
)
tqdm
.
write
(
f
"
{
i
+
1
}
/
{
total_entries
}
완료. (
{
(
i
+
1
)
/
total_entries
*
100
:
.
2
f
}
%)"
with
open
(
output_file
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
)
with
open
(
output_file
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
json
.
dump
(
data
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
json
.
dump
(
data
,
f
,
ensure_ascii
=
False
,
indent
=
4
)
def
concat_jsons
(
before_file_path
,
additional_file_path
,
after_file_path
):
def
concat_jsons
(
before_file_path
,
additional_file_path
,
after_file_path
):
with
open
(
before_file_path
,
'r'
,
encoding
=
'
utf-8
'
)
as
file_01
:
with
open
(
before_file_path
,
"r"
,
encoding
=
"
utf-8
"
)
as
file_01
:
before_json
=
json
.
load
(
file_01
)
before_json
=
json
.
load
(
file_01
)
with
open
(
additional_file_path
,
'r'
,
encoding
=
'
utf-8
'
)
as
file_02
:
with
open
(
additional_file_path
,
"r"
,
encoding
=
"
utf-8
"
)
as
file_02
:
additional_json
=
json
.
load
(
file_02
)
additional_json
=
json
.
load
(
file_02
)
for
item
in
tqdm
(
additional_json
):
for
item
in
tqdm
(
additional_json
):
new_entry
=
{
new_entry
=
{
"질문"
:
item
.
get
(
"질문"
,
""
),
"질문"
:
item
.
get
(
"질문"
,
""
),
...
@@ -65,34 +72,33 @@ def concat_jsons(before_file_path, additional_file_path, after_file_path):
...
@@ -65,34 +72,33 @@ def concat_jsons(before_file_path, additional_file_path, after_file_path):
"세션ID"
:
None
,
"세션ID"
:
None
,
"모델타입"
:
None
,
"모델타입"
:
None
,
"사용자 평가"
:
None
,
"사용자 평가"
:
None
,
"모인ID"
:
None
"모인ID"
:
None
,
}
}
before_json
.
append
(
new_entry
)
before_json
.
append
(
new_entry
)
with
open
(
after_file_path
,
'w'
,
encoding
=
'
utf-8
'
)
as
file
:
with
open
(
after_file_path
,
"w"
,
encoding
=
"
utf-8
"
)
as
file
:
json
.
dump
(
before_json
,
file
,
ensure_ascii
=
False
,
indent
=
4
)
json
.
dump
(
before_json
,
file
,
ensure_ascii
=
False
,
indent
=
4
)
def
testing
():
def
testing
():
question
=
"안녕"
question
=
"안녕"
answer
=
"안녕하세요. 무엇을 도와드릴까요?"
answer
=
"안녕하세요. 무엇을 도와드릴까요?"
recommendation
=
chat
(
question
,
answer
)
recommendation
=
chat
(
question
,
answer
)
print
(
f
"
\033
[94m
{
recommendation
}
\033
[0m"
)
print
(
f
"
\033
[94m
{
recommendation
}
\033
[0m"
)
return
None
return
None
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
testing
()
testing
()
add_recommendation
(
input_file
=
"QR_v1.4a.json"
,
output_file
=
"QR_v1_4b.json"
)
concat_jsons
(
before_file_path
=
"QR_v1.4.json"
,
add_recommendation
(
input_file
=
"QR_v1.4a.json"
,
output_file
=
"QR_v1_4b.json"
)
additional_file_path
=
"QR_v1.4b.json"
,
after_file_path
=
"QR_v1.5.json"
)
concat_jsons
(
before_file_path
=
"QR_v1.4.json"
,
additional_file_path
=
"QR_v1.4b.json"
,
after_file_path
=
"QR_v1.5.json"
,
)
print
(
f
"
\033
[94m ##DONE##
\033
[0m"
)
print
(
f
"
\033
[94m ##DONE##
\033
[0m"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment