write the app.py and upload data
Browse files- .gitattributes +2 -0
- app.py +192 -0
- dialogues_set/dialogues_film.json +3 -0
- dialogues_set/dialogues_jindong.json +3 -0
- dialogues_set/dialogues_music.json +3 -0
- dialogues_set/dialogues_natural.json +3 -0
- dialogues_set/dialogues_taobao.json +3 -0
- dialogues_set/dialogues_travel_kd.json +3 -0
- requirements.txt +2 -0
.gitattributes
CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
36 |
+
dialogues_set/* filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai
|
2 |
+
import tiktoken
|
3 |
+
|
4 |
+
openai.api_key = os.getenv('API_KEY')
|
5 |
+
|
6 |
+
import collections
|
7 |
+
import datetime
|
8 |
+
import time
|
9 |
+
import json
|
10 |
+
import os
|
11 |
+
|
12 |
+
timezone = datetime.tz.gettz('Asia/Shanghai')
|
13 |
+
timestamp2string = lambda timestamp: datetime.datetime.fromtimestamp(timestamp).replace(tzinfo=timezone).strftime('%Y-%m-%d %H:%M:%S')
|
14 |
+
|
15 |
+
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
|
16 |
+
"""Returns the number of tokens used by a list of messages."""
|
17 |
+
try:
|
18 |
+
encoding = tiktoken.encoding_for_model(model)
|
19 |
+
except KeyError:
|
20 |
+
encoding = tiktoken.get_encoding("cl100k_base")
|
21 |
+
if model == "gpt-3.5-turbo": # note: future models may deviate from this
|
22 |
+
num_tokens = 0
|
23 |
+
for message in messages:
|
24 |
+
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
|
25 |
+
for key, value in message.items():
|
26 |
+
num_tokens += len(encoding.encode(value))
|
27 |
+
if key == "name": # if there's a name, the role is omitted
|
28 |
+
num_tokens += -1 # role is always required and always 1 token
|
29 |
+
num_tokens += 2 # every reply is primed with <im_start>assistant
|
30 |
+
return num_tokens
|
31 |
+
else:
|
32 |
+
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
|
33 |
+
See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens.""")
|
34 |
+
|
35 |
+
qas = []
|
36 |
+
qs = []
|
37 |
+
start_time = None
|
38 |
+
|
39 |
+
def read_qs():
|
40 |
+
directory = "./dialogues_set"
|
41 |
+
filenames = [
|
42 |
+
'dialogues_film.json',
|
43 |
+
'dialogues_jindong.json',
|
44 |
+
'dialogues_music.json',
|
45 |
+
'dialogues_natural.json',
|
46 |
+
'dialogues_taobao.json',
|
47 |
+
'dialogues_travel_kd.json'
|
48 |
+
]
|
49 |
+
for filename in filenames:
|
50 |
+
with open(f"{directory}/{filename}", "r", encoding="utf-8") as f:
|
51 |
+
for idx,line in enumerate(f):
|
52 |
+
idx2query = json.loads(line)
|
53 |
+
query = idx2query[str(idx)]
|
54 |
+
qs.append(query)
|
55 |
+
print(f"read {len(qs)} queries from files")
|
56 |
+
|
57 |
+
if os.path.exists("qas.json"):
|
58 |
+
with open("qas.json", "r", encoding="utf-8") as f:
|
59 |
+
qas = json.loads(f.read())
|
60 |
+
print(f"read {len(qas)} query-responses from qas.json")
|
61 |
+
|
62 |
+
existed_qs = collections.Counter([qa["q"] for qa in qas])
|
63 |
+
remained_qs = []
|
64 |
+
for q in qs:
|
65 |
+
if existed_qs[q]>0:
|
66 |
+
existed_qs[q] -= 1
|
67 |
+
else:
|
68 |
+
remained_qs.append(q)
|
69 |
+
print(f"filter out {len(qs)-len(remained_qs)} with reference to qas.json")
|
70 |
+
qs = remained_qs
|
71 |
+
|
72 |
+
return qs
|
73 |
+
|
74 |
+
|
75 |
+
def ask(query, max_attempt_times=3):
|
76 |
+
answer = None
|
77 |
+
attempt_times = 0
|
78 |
+
while answer is None and attempt_times<max_attempt_times:
|
79 |
+
attempt_times += 1
|
80 |
+
try:
|
81 |
+
response = openai.ChatCompletion.create(
|
82 |
+
model="gpt-3.5-turbo",
|
83 |
+
messages=[
|
84 |
+
{"role": "user", "content": query}
|
85 |
+
]
|
86 |
+
)["choices"][0]["message"]["content"]
|
87 |
+
except Exception as e:
|
88 |
+
print(e)
|
89 |
+
print(f"retry in {attempt_times*10} seconds...")
|
90 |
+
time.sleep(attempt_times*10)
|
91 |
+
return answer
|
92 |
+
|
93 |
+
|
94 |
+
def askingChatGPT(min_interval_seconds=10):
|
95 |
+
qs = read_qs()
|
96 |
+
|
97 |
+
start_time = time.time()
|
98 |
+
for i, q in enumerate(qs):
|
99 |
+
ask_start_time = time.time()
|
100 |
+
|
101 |
+
a = ask(q)
|
102 |
+
qas.append({"q":q, "a":a})
|
103 |
+
|
104 |
+
ask_end_time = time.time()
|
105 |
+
elapsed_time = ask_end_time - ask_start_time
|
106 |
+
delayTime = min_interval_seconds - elapsed_time
|
107 |
+
if delayTime>0:
|
108 |
+
time.sleep(delayTime)
|
109 |
+
|
110 |
+
print(f"{timestamp2string(time.time())}: iterations: {i} / {len(qs)} | elapsed time of this query (s): {elapsed_time:.2f}")
|
111 |
+
|
112 |
+
return
|
113 |
+
|
114 |
+
|
115 |
+
thread = threading.Thread(target=my_function)
|
116 |
+
thread.daemon = True
|
117 |
+
thread.start()
|
118 |
+
|
119 |
+
|
120 |
+
import gradio as gr
|
121 |
+
|
122 |
+
|
123 |
+
def showcase(api_key):
|
124 |
+
if not api_key==openai.api_key:
|
125 |
+
chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
|
126 |
+
else:
|
127 |
+
recent_qas = qas[:10]
|
128 |
+
chatbot_ret = [(f"Your entered api_key is correct.", f"The latest {len(recent_qas)} query-responses are displayed below.")]
|
129 |
+
for qa in recent_qas:
|
130 |
+
chatbot_ret += [(qa["q"],qa["a"])]
|
131 |
+
return chatbot_ret
|
132 |
+
|
133 |
+
|
134 |
+
def download(api_key):
|
135 |
+
if not api_key==openai.api_key:
|
136 |
+
chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
|
137 |
+
file_ret = gr.File.update(value=None, visible=False)
|
138 |
+
else:
|
139 |
+
chatbot_ret = [(f"Your entered api_key is correct.", f"The file containing all processed query-responses ({len(qas)} in total) can be downloaded below.")]
|
140 |
+
filename = f"qas{len(qas)}.json"
|
141 |
+
with open(filename, "w", encoding="utf-8") as f:
|
142 |
+
f.write(json.dumps(qas, ensure_ascii=False, indent=2))
|
143 |
+
file_ret = gr.File.update(value=filename, visible=True)
|
144 |
+
return chatbot_ret, file_ret
|
145 |
+
|
146 |
+
|
147 |
+
def display(api_key):
|
148 |
+
if not api_key==openai.api_key:
|
149 |
+
chatbot_ret = [(f"Your entered api_key:<br>{api_key}<br>is incorrect.", f"So i cannot provide you any information in this private space.")]
|
150 |
+
elif len(qas)<1:
|
151 |
+
chatbot_ret = [(f"Your entered api_key is correct.", f"But the progress has just started for a while and has no useful progress information to provide.")]
|
152 |
+
else:
|
153 |
+
time_takes = time.time() - start_time
|
154 |
+
time_remains = time_takes * (len(qs)-len(qas)) / len(qas)
|
155 |
+
end_time = start_time + time_remains
|
156 |
+
|
157 |
+
messages = []
|
158 |
+
for qa in qas:
|
159 |
+
messages.append({"role":"user", "content":qas["q"]})
|
160 |
+
messages.append({"role":"assistant", "content":qas["a"]})
|
161 |
+
num_tokens_processed = num_tokens_from_messages(messages)
|
162 |
+
num_tokens_total = num_tokens_processed * len(qs) / len(qas)
|
163 |
+
dollars_tokens_processed = 0.002 * int(num_tokens_processed/1000)
|
164 |
+
dollars_tokens_total = 0.002 * int(num_tokens_total/1000)
|
165 |
+
|
166 |
+
chatbot_ret = [(f"Your entered api_key is correct.", f"The information of progress is displayed below.")]
|
167 |
+
chatbot_ret += [(f"The number of processed / total queries:", f"{len(qas)} / {len(qs)}")]
|
168 |
+
chatbot_ret += [(f"The hours already takes / est. remains:", f"{time_takes/3600:.2f} / {time_remains/3600:.2f}")]
|
169 |
+
chatbot_ret += [(f"The time starts / est. ends:", f"{timestamp2string(start_time)} / {timestamp2string(end_time)}")]
|
170 |
+
chatbot_ret += [(f"The number of processed / est. total tokens:", f"{num_tokens_processed} / {num_tokens_total}")]
|
171 |
+
chatbot_ret += [(f"The dollars of processed / est. total tokens:", f"{dollars_tokens_processed} / {dollars_tokens_total}")]
|
172 |
+
|
173 |
+
return chatbot_ret
|
174 |
+
|
175 |
+
|
176 |
+
with gr.Blocks() as demo:
|
177 |
+
|
178 |
+
with gr.Column(variant="panel"):
|
179 |
+
chatbot = gr.Chatbot()
|
180 |
+
txt = gr.Textbox(show_label=False, placeholder="Enter my API_KEY to access this private space").style(container=False)
|
181 |
+
with gr.Row():
|
182 |
+
button_showcase = gr.Button("Show Recent Query-Responses")
|
183 |
+
button_download = gr.Button("Download All Query-Responses")
|
184 |
+
button_display = gr.Button("Display Progress Infomation")
|
185 |
+
|
186 |
+
downloadfile = gr.File(None, interactive=False, show_label=False, visible=False)
|
187 |
+
|
188 |
+
button_showcase.click(fn=showcase, inputs=[txt], outputs=[chatbot])
|
189 |
+
button_download.click(fn=download, inputs=[txt], outputs=[chatbot, downloadfile])
|
190 |
+
button_display.click(fn=display, inputs=[txt], outputs=[chatbot])
|
191 |
+
|
192 |
+
demo.launch()
|
dialogues_set/dialogues_film.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1065ddc9706d8644b5cb686e43c197e4ae1a6273a2ee85892ab014a3d0589048
|
3 |
+
size 1990042
|
dialogues_set/dialogues_jindong.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c3cb0b5351f451d7079ba1f647c61d50c65955af1a231b4ebba5ad4336dfcec
|
3 |
+
size 875228
|
dialogues_set/dialogues_music.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5edd415cdb29221cd727ea3536770440a56e0732f23e9dadc2037bc9c3242f2
|
3 |
+
size 1316759
|
dialogues_set/dialogues_natural.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d94a0738b9d7778afa4294392a60bd4b4fd1da395c70bf7943c774da1cc9126a
|
3 |
+
size 27800692
|
dialogues_set/dialogues_taobao.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ce15361975a74e91acdd032862c7e3ac421d347057b1fb375647d5ac6bb826
|
3 |
+
size 2472553
|
dialogues_set/dialogues_travel_kd.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c779ff0148f28403e3856b374fb7b4fac2125c749da067a55a7b499b7be2e87f
|
3 |
+
size 1397624
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
openai==0.27.0
|
2 |
+
tiktoken==0.3.0
|