Upload 2 files
Browse files- app.py +351 -0
- requirements.txt +21 -0
app.py
ADDED
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import httpx
|
4 |
+
import openai
|
5 |
+
from openai import OpenAI
|
6 |
+
from openai import AsyncOpenAI
|
7 |
+
|
8 |
+
from datasets import load_dataset
|
9 |
+
|
10 |
+
dataset = load_dataset("silk-road/50-Chinese-Novel-Characters")
|
11 |
+
|
12 |
+
|
13 |
+
novel_list = []
|
14 |
+
|
15 |
+
novel2roles = {}
|
16 |
+
|
17 |
+
role2datas = {}
|
18 |
+
|
19 |
+
from tqdm import tqdm
|
20 |
+
for data in tqdm(dataset['train']):
|
21 |
+
novel = data['book']
|
22 |
+
role = data['role']
|
23 |
+
if novel not in novel_list:
|
24 |
+
novel_list.append(novel)
|
25 |
+
|
26 |
+
if novel not in novel2roles:
|
27 |
+
novel2roles[novel] = []
|
28 |
+
|
29 |
+
if role not in novel2roles[novel]:
|
30 |
+
novel2roles[novel].append(role)
|
31 |
+
|
32 |
+
role_tuple = (novel, role)
|
33 |
+
|
34 |
+
if role_tuple not in role2datas:
|
35 |
+
role2datas[role_tuple] = []
|
36 |
+
|
37 |
+
role2datas[role_tuple].append(data)
|
38 |
+
|
39 |
+
|
40 |
+
from ChatHaruhi.utils import base64_to_float_array
|
41 |
+
|
42 |
+
from tqdm import tqdm
|
43 |
+
|
44 |
+
for novel in tqdm(novel_list):
|
45 |
+
for role in novel2roles[novel]:
|
46 |
+
for data in role2datas[(novel, role)]:
|
47 |
+
data["vec"] = base64_to_float_array(data["bge_zh_s15"])
|
48 |
+
|
49 |
+
def conv2story( role, conversations ):
|
50 |
+
lines = [conv["value"] if conv["from"] == "human" else role + ": " + conv["value"] for conv in conversations]
|
51 |
+
return "\n".join(lines)
|
52 |
+
|
53 |
+
for novel in tqdm(novel_list):
|
54 |
+
for role in novel2roles[novel]:
|
55 |
+
for data in role2datas[(novel, role)]:
|
56 |
+
data["story"] = conv2story( role, data["conversations"] )
|
57 |
+
|
58 |
+
|
59 |
+
from ChatHaruhi import ChatHaruhi
|
60 |
+
from ChatHaruhi.response_openai import get_response as get_response_openai
|
61 |
+
from ChatHaruhi.response_zhipu import get_response as get_response_zhipu
|
62 |
+
from ChatHaruhi.response_erniebot import get_response as get_response_erniebot
|
63 |
+
from ChatHaruhi.response_spark import get_response as get_response_spark
|
64 |
+
|
65 |
+
|
66 |
+
get_response = get_response_zhipu
|
67 |
+
|
68 |
+
narrators = ["叙述者", "旁白","文章作者","作者","Narrator","narrator"]
|
69 |
+
|
70 |
+
|
71 |
+
def package_persona( role_name, world_name ):
|
72 |
+
if role_name in narrators:
|
73 |
+
return package_persona_for_narrator( role_name, world_name )
|
74 |
+
|
75 |
+
return f"""I want you to act like {role_name} from {world_name}.
|
76 |
+
If others‘ questions are related with the novel, please try to reuse the original lines from the novel.
|
77 |
+
I want you to respond and answer like {role_name} using the tone, manner and vocabulary {role_name} would use."""
|
78 |
+
|
79 |
+
def package_persona_for_narrator( role_name, world_name ):
|
80 |
+
return f"""I want you to act like narrator {role_name} from {world_name}.
|
81 |
+
当角色行动之后,继续交代和推进新的剧情."""
|
82 |
+
|
83 |
+
role_tuple2chatbot = {}
|
84 |
+
|
85 |
+
|
86 |
+
def initialize_chatbot( novel, role ):
|
87 |
+
global role_tuple2chatbot
|
88 |
+
if (novel, role) not in role_tuple2chatbot:
|
89 |
+
persona = package_persona( role, novel )
|
90 |
+
persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"
|
91 |
+
stories = [data["story"] for data in role2datas[(novel, role)] ]
|
92 |
+
vecs = [data["vec"] for data in role2datas[(novel, role)] ]
|
93 |
+
chatbot = ChatHaruhi( role_name = role, persona = persona , stories = stories, story_vecs= vecs,\
|
94 |
+
llm = get_response)
|
95 |
+
chatbot.verbose = False
|
96 |
+
|
97 |
+
role_tuple2chatbot[(novel, role)] = chatbot
|
98 |
+
|
99 |
+
from tqdm import tqdm
|
100 |
+
for novel in tqdm(novel_list):
|
101 |
+
for role in novel2roles[novel]:
|
102 |
+
initialize_chatbot( novel, role )
|
103 |
+
|
104 |
+
readme_text = """# 使用说明
|
105 |
+
|
106 |
+
选择小说角色
|
107 |
+
|
108 |
+
如果你有什么附加信息,添加到附加信息里面就可以
|
109 |
+
|
110 |
+
比如"韩立会炫耀自己刚刚学会了Python"
|
111 |
+
|
112 |
+
然后就可以开始聊天了
|
113 |
+
|
114 |
+
因为这些角色还没有增加Greeting信息,所以之后再开发个随机乱聊功能
|
115 |
+
|
116 |
+
# 开发细节
|
117 |
+
|
118 |
+
- 采用ChatHaruhi3.0的接口进行prompting
|
119 |
+
- 这里的数据是用一个7B的tuned qwen模型进行抽取的
|
120 |
+
- 想看数据可以去看第三个tab
|
121 |
+
- 抽取模型用了40k左右的GLM蒸馏数据
|
122 |
+
- 抽取模型是腾讯大哥BPSK训练的
|
123 |
+
|
124 |
+
# 总结人物性格
|
125 |
+
|
126 |
+
第三个Tab里面,可以显示一个prompt总结人物的性格
|
127 |
+
|
128 |
+
复制到openai或者GLM或者Claude进行人物总结
|
129 |
+
|
130 |
+
|
131 |
+
# 这些小说数据从HaruhiZero 0.4模型开始,被加入训练
|
132 |
+
|
133 |
+
openai太慢了 今天试试GLM的
|
134 |
+
|
135 |
+
不过当前demo是openai的
|
136 |
+
|
137 |
+
"""
|
138 |
+
|
139 |
+
# from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
|
140 |
+
# tokenizer = AutoTokenizer.from_pretrained("silk-road/Haruhi-Zero-1_8B", trust_remote_code=True)
|
141 |
+
# model = AutoModelForCausalLM.from_pretrained("silk-road/Haruhi-Zero-1_8B", device_map="auto", trust_remote_code=True)
|
142 |
+
# model = model.eval()
|
143 |
+
|
144 |
+
# def get_response_qwen18(message):
|
145 |
+
# from ChatHaruhi.utils import normalize2uaua
|
146 |
+
# message_ua = normalize2uaua(message, if_replace_system = True)
|
147 |
+
# import json
|
148 |
+
# message_tuples = []
|
149 |
+
# for i in range(0, len(message_ua)-1, 2):
|
150 |
+
# message_tuple = (message_ua[i]["content"], message_ua[i+1]["content"])
|
151 |
+
# message_tuples.append(message_tuple)
|
152 |
+
# response, _ = model.chat(tokenizer, message_ua[-1]["content"], history=message_tuples)
|
153 |
+
# return response
|
154 |
+
|
155 |
+
from ChatHaruhi.response_openai import get_response, async_get_response
|
156 |
+
import gradio as gr
|
157 |
+
|
158 |
+
def get_role_list( novel ):
|
159 |
+
new_list = novel2roles[novel]
|
160 |
+
new_value = new_list[0]
|
161 |
+
return gr.update(choices = new_list, value = new_value)
|
162 |
+
|
163 |
+
save_log = "/content/output.txt"
|
164 |
+
|
165 |
+
def get_chatbot( novel, role ):
|
166 |
+
if (novel, role) not in role_tuple2chatbot:
|
167 |
+
initialize_chatbot( novel, role )
|
168 |
+
|
169 |
+
return role_tuple2chatbot[(novel, role)]
|
170 |
+
|
171 |
+
import json
|
172 |
+
|
173 |
+
def random_chat_callback( novel, role, chat_history):
|
174 |
+
datas = role2datas[(novel, role)]
|
175 |
+
|
176 |
+
reesponse_set = set()
|
177 |
+
|
178 |
+
for chat_tuple in chat_history:
|
179 |
+
if chat_tuple[1] is not None:
|
180 |
+
reesponse_set.add(chat_tuple[1])
|
181 |
+
|
182 |
+
for _ in range(5):
|
183 |
+
random_data = random.choice(datas)
|
184 |
+
convs = random_data["conversations"]
|
185 |
+
n = len(convs)
|
186 |
+
index = [x for x in range(0,n,2)]
|
187 |
+
|
188 |
+
for i in index:
|
189 |
+
query = convs[i]['value']
|
190 |
+
response = convs[i+1]['value']
|
191 |
+
if response not in reesponse_set:
|
192 |
+
chat_history.append( (query, response) )
|
193 |
+
return chat_history
|
194 |
+
|
195 |
+
return chat_history
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
+
async def submit_chat( novel, role, user_name, user_text, chat_history, persona_addition_info,model_sel):
|
200 |
+
|
201 |
+
if len(user_text) > 400:
|
202 |
+
user_text = user_text[:400]
|
203 |
+
|
204 |
+
if_user_in_text = True
|
205 |
+
|
206 |
+
chatbot = get_chatbot( novel, role )
|
207 |
+
chatbot.persona = initialize_persona( novel, role, persona_addition_info)
|
208 |
+
# chatbot.llm_async = async_get_response
|
209 |
+
|
210 |
+
if model_sel == "openai":
|
211 |
+
chatbot.llm = get_response_openai
|
212 |
+
elif model_sel == "Zhipu":
|
213 |
+
chatbot.llm = get_response_zhipu
|
214 |
+
elif model_sel == "spark":
|
215 |
+
chatbot.llm = get_response_spark
|
216 |
+
else:
|
217 |
+
chatbot.llm = get_response_erniebot
|
218 |
+
|
219 |
+
|
220 |
+
history = []
|
221 |
+
|
222 |
+
for chat_tuple in chat_history:
|
223 |
+
if chat_tuple[0] is not None:
|
224 |
+
history.append( {"speaker":"{{user}}","content":chat_tuple[0]} )
|
225 |
+
if chat_tuple[1] is not None:
|
226 |
+
history.append( {"speaker":"{{role}}","content":chat_tuple[1]} )
|
227 |
+
|
228 |
+
chatbot.history = history
|
229 |
+
|
230 |
+
input_text = user_text
|
231 |
+
|
232 |
+
if if_user_in_text:
|
233 |
+
input_text = user_name + " : " + user_text
|
234 |
+
response = chatbot.chat(user = "", text = input_text )
|
235 |
+
# response = await chatbot.async_chat(user = "", text = input_text )
|
236 |
+
else:
|
237 |
+
response = chatbot.chat(user = user_name, text = input_text)
|
238 |
+
# response = await chatbot.async_chat(user = user_name, text = input_text)
|
239 |
+
chat_history.append( (input_text, response) )
|
240 |
+
|
241 |
+
print_data = {"novel":novel, "role":role, "user_text":input_text, "response":response}
|
242 |
+
|
243 |
+
print(json.dumps(print_data, ensure_ascii=False))
|
244 |
+
|
245 |
+
with open(save_log, "a",encoding = "utf-8") as f:
|
246 |
+
f.write(json.dumps(print_data, ensure_ascii=False) + "\n")
|
247 |
+
|
248 |
+
return chat_history
|
249 |
+
|
250 |
+
|
251 |
+
def initialize_persona( novel, role, persona_addition_info):
|
252 |
+
whole_persona = package_persona( role, novel )
|
253 |
+
whole_persona += "\n" + persona_addition_info
|
254 |
+
whole_persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"
|
255 |
+
|
256 |
+
return whole_persona
|
257 |
+
|
258 |
+
def clean_history( ):
|
259 |
+
return []
|
260 |
+
|
261 |
+
def clean_input():
|
262 |
+
return ""
|
263 |
+
|
264 |
+
import random
|
265 |
+
|
266 |
+
def generate_summarize_prompt( novel, role_name ):
|
267 |
+
whole_prompt = f'''
|
268 |
+
你在分析小说{novel}中的角色{role_name}
|
269 |
+
结合小说{novel}中的内容,以及下文中角色{role_name}的对话
|
270 |
+
判断{role_name}的人物设定、人物特点以及语言风格
|
271 |
+
|
272 |
+
{role_name}的对话:
|
273 |
+
'''
|
274 |
+
stories = [data["story"] for data in role2datas[(novel, role_name)] ]
|
275 |
+
|
276 |
+
sample_n = 5
|
277 |
+
|
278 |
+
sample_stories = random.sample(stories, sample_n)
|
279 |
+
|
280 |
+
for story in sample_stories:
|
281 |
+
whole_prompt += story + "\n\n"
|
282 |
+
|
283 |
+
return whole_prompt.strip()
|
284 |
+
|
285 |
+
|
286 |
+
with gr.Blocks() as demo:
|
287 |
+
gr.Markdown("""# 50本小说的人物测试
|
288 |
+
|
289 |
+
这个interface由李鲁鲁实现,主要是用来看语料的
|
290 |
+
|
291 |
+
增加了随机聊天,支持GLM,openai切换
|
292 |
+
|
293 |
+
米唯实接入了qwen1.8B并布置于huggingface上""")
|
294 |
+
|
295 |
+
with gr.Tab("聊天"):
|
296 |
+
with gr.Row():
|
297 |
+
novel_sel = gr.Dropdown( novel_list, label = "小说", value = "悟空传" , interactive = True)
|
298 |
+
role_sel = gr.Dropdown( novel2roles[novel_sel.value], label = "角色", value = "孙悟空", interactive = True )
|
299 |
+
|
300 |
+
with gr.Row():
|
301 |
+
chat_history = gr.Chatbot(height = 600)
|
302 |
+
|
303 |
+
with gr.Row():
|
304 |
+
user_name = gr.Textbox(label="user_name", scale = 1, value = "鲁鲁", interactive = True)
|
305 |
+
user_text = gr.Textbox(label="user_text", scale = 20)
|
306 |
+
submit = gr.Button("submit", scale = 1)
|
307 |
+
|
308 |
+
with gr.Row():
|
309 |
+
random_chat = gr.Button("随机聊天", scale = 1)
|
310 |
+
clean_message = gr.Button("清空聊天", scale = 1)
|
311 |
+
|
312 |
+
with gr.Row():
|
313 |
+
persona_addition_info = gr.TextArea( label = "额外人物设定", value = "", interactive = True )
|
314 |
+
|
315 |
+
with gr.Row():
|
316 |
+
update_persona = gr.Button("补充人物设定到prompt", scale = 1)
|
317 |
+
model_sel = gr.Radio(["Zhipu","openai","spark","erniebot"], interactive = True, scale = 5, value = "Zhipu", label = "模型选择")
|
318 |
+
|
319 |
+
with gr.Row():
|
320 |
+
whole_persona = gr.TextArea( label = "完整的system prompt", value = "", interactive = False )
|
321 |
+
|
322 |
+
novel_sel.change(fn = get_role_list, inputs = [novel_sel], outputs = [role_sel]).then(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
|
323 |
+
|
324 |
+
role_sel.change(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
|
325 |
+
|
326 |
+
update_persona.click(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
|
327 |
+
|
328 |
+
random_chat.click(fn = random_chat_callback, inputs = [novel_sel, role_sel, chat_history], outputs = [chat_history])
|
329 |
+
|
330 |
+
user_text.submit(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])
|
331 |
+
submit.click(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])
|
332 |
+
|
333 |
+
clean_message.click(fn = clean_history, inputs = [], outputs = [chat_history])
|
334 |
+
|
335 |
+
with gr.Tab("README"):
|
336 |
+
gr.Markdown(readme_text)
|
337 |
+
|
338 |
+
with gr.Tab("辅助人物总结"):
|
339 |
+
with gr.Row():
|
340 |
+
generate_prompt = gr.Button("生成人物总结prompt", scale = 1)
|
341 |
+
|
342 |
+
with gr.Row():
|
343 |
+
whole_prompt = gr.TextArea( label = "复制这个prompt到Openai或者GLM或者Claude进行总结", value = "", interactive = False )
|
344 |
+
|
345 |
+
generate_prompt.click(fn = generate_summarize_prompt, inputs = [novel_sel, role_sel], outputs = [whole_prompt])
|
346 |
+
|
347 |
+
|
348 |
+
|
349 |
+
|
350 |
+
|
351 |
+
demo.launch(share=True, debug = True)
|
requirements.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
datasets
|
2 |
+
tiktoken
|
3 |
+
tqdm
|
4 |
+
openai
|
5 |
+
zhipuai
|
6 |
+
gradio
|
7 |
+
wget
|
8 |
+
erniebot
|
9 |
+
erniebot-agent[all]
|
10 |
+
|
11 |
+
scipy
|
12 |
+
transformers
|
13 |
+
accelerate
|
14 |
+
peft
|
15 |
+
bitsandbytes
|
16 |
+
sentencepiece
|
17 |
+
einops
|
18 |
+
transformers_stream_generator==0.0.4
|
19 |
+
deepspeed
|
20 |
+
auto-gptq
|
21 |
+
optimum
|