silk-road commited on
Commit
7fd6a07
1 Parent(s): d319ff8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +351 -0
  2. requirements.txt +21 -0
app.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import httpx
4
+ import openai
5
+ from openai import OpenAI
6
+ from openai import AsyncOpenAI
7
+
8
+ from datasets import load_dataset
9
+
10
+ dataset = load_dataset("silk-road/50-Chinese-Novel-Characters")
11
+
12
+
13
+ novel_list = []
14
+
15
+ novel2roles = {}
16
+
17
+ role2datas = {}
18
+
19
+ from tqdm import tqdm
20
+ for data in tqdm(dataset['train']):
21
+ novel = data['book']
22
+ role = data['role']
23
+ if novel not in novel_list:
24
+ novel_list.append(novel)
25
+
26
+ if novel not in novel2roles:
27
+ novel2roles[novel] = []
28
+
29
+ if role not in novel2roles[novel]:
30
+ novel2roles[novel].append(role)
31
+
32
+ role_tuple = (novel, role)
33
+
34
+ if role_tuple not in role2datas:
35
+ role2datas[role_tuple] = []
36
+
37
+ role2datas[role_tuple].append(data)
38
+
39
+
40
+ from ChatHaruhi.utils import base64_to_float_array
41
+
42
+ from tqdm import tqdm
43
+
44
+ for novel in tqdm(novel_list):
45
+ for role in novel2roles[novel]:
46
+ for data in role2datas[(novel, role)]:
47
+ data["vec"] = base64_to_float_array(data["bge_zh_s15"])
48
+
49
+ def conv2story( role, conversations ):
50
+ lines = [conv["value"] if conv["from"] == "human" else role + ": " + conv["value"] for conv in conversations]
51
+ return "\n".join(lines)
52
+
53
+ for novel in tqdm(novel_list):
54
+ for role in novel2roles[novel]:
55
+ for data in role2datas[(novel, role)]:
56
+ data["story"] = conv2story( role, data["conversations"] )
57
+
58
+
59
+ from ChatHaruhi import ChatHaruhi
60
+ from ChatHaruhi.response_openai import get_response as get_response_openai
61
+ from ChatHaruhi.response_zhipu import get_response as get_response_zhipu
62
+ from ChatHaruhi.response_erniebot import get_response as get_response_erniebot
63
+ from ChatHaruhi.response_spark import get_response as get_response_spark
64
+
65
+
66
+ get_response = get_response_zhipu
67
+
68
+ narrators = ["叙述者", "旁白","文章作者","作者","Narrator","narrator"]
69
+
70
+
71
+ def package_persona( role_name, world_name ):
72
+ if role_name in narrators:
73
+ return package_persona_for_narrator( role_name, world_name )
74
+
75
+ return f"""I want you to act like {role_name} from {world_name}.
76
+ If others‘ questions are related with the novel, please try to reuse the original lines from the novel.
77
+ I want you to respond and answer like {role_name} using the tone, manner and vocabulary {role_name} would use."""
78
+
79
+ def package_persona_for_narrator( role_name, world_name ):
80
+ return f"""I want you to act like narrator {role_name} from {world_name}.
81
+ 当角色行动之后,继续交代和推进新的剧情."""
82
+
83
+ role_tuple2chatbot = {}
84
+
85
+
86
+ def initialize_chatbot( novel, role ):
87
+ global role_tuple2chatbot
88
+ if (novel, role) not in role_tuple2chatbot:
89
+ persona = package_persona( role, novel )
90
+ persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"
91
+ stories = [data["story"] for data in role2datas[(novel, role)] ]
92
+ vecs = [data["vec"] for data in role2datas[(novel, role)] ]
93
+ chatbot = ChatHaruhi( role_name = role, persona = persona , stories = stories, story_vecs= vecs,\
94
+ llm = get_response)
95
+ chatbot.verbose = False
96
+
97
+ role_tuple2chatbot[(novel, role)] = chatbot
98
+
99
+ from tqdm import tqdm
100
+ for novel in tqdm(novel_list):
101
+ for role in novel2roles[novel]:
102
+ initialize_chatbot( novel, role )
103
+
104
+ readme_text = """# 使用说明
105
+
106
+ 选择小说角色
107
+
108
+ 如果你有什么附加信息,添加到附加信息里面就可以
109
+
110
+ 比如"韩立会炫耀自己刚刚学会了Python"
111
+
112
+ 然后就可以开始聊天了
113
+
114
+ 因为这些角色还没有增加Greeting信息,所以之后再开发个随机乱聊功能
115
+
116
+ # 开发细节
117
+
118
+ - 采用ChatHaruhi3.0的接口进行prompting
119
+ - 这里的数据是用一个7B的tuned qwen模型进行抽取的
120
+ - 想看数据可以去看第三个tab
121
+ - 抽取模型用了40k左右的GLM蒸馏数据
122
+ - 抽取模型是腾讯大哥BPSK训练的
123
+
124
+ # 总结人物性格
125
+
126
+ 第三个Tab里面,可以显示一个prompt总结人物的性格
127
+
128
+ 复制到openai或者GLM或者Claude进行人物总结
129
+
130
+
131
+ # 这些小说数据从HaruhiZero 0.4模型开始,被加入训练
132
+
133
+ openai太慢了 今天试试GLM的
134
+
135
+ 不过当前demo是openai的
136
+
137
+ """
138
+
139
+ # from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
140
+ # tokenizer = AutoTokenizer.from_pretrained("silk-road/Haruhi-Zero-1_8B", trust_remote_code=True)
141
+ # model = AutoModelForCausalLM.from_pretrained("silk-road/Haruhi-Zero-1_8B", device_map="auto", trust_remote_code=True)
142
+ # model = model.eval()
143
+
144
+ # def get_response_qwen18(message):
145
+ # from ChatHaruhi.utils import normalize2uaua
146
+ # message_ua = normalize2uaua(message, if_replace_system = True)
147
+ # import json
148
+ # message_tuples = []
149
+ # for i in range(0, len(message_ua)-1, 2):
150
+ # message_tuple = (message_ua[i]["content"], message_ua[i+1]["content"])
151
+ # message_tuples.append(message_tuple)
152
+ # response, _ = model.chat(tokenizer, message_ua[-1]["content"], history=message_tuples)
153
+ # return response
154
+
155
+ from ChatHaruhi.response_openai import get_response, async_get_response
156
+ import gradio as gr
157
+
158
+ def get_role_list( novel ):
159
+ new_list = novel2roles[novel]
160
+ new_value = new_list[0]
161
+ return gr.update(choices = new_list, value = new_value)
162
+
163
+ save_log = "/content/output.txt"
164
+
165
+ def get_chatbot( novel, role ):
166
+ if (novel, role) not in role_tuple2chatbot:
167
+ initialize_chatbot( novel, role )
168
+
169
+ return role_tuple2chatbot[(novel, role)]
170
+
171
+ import json
172
+
173
+ def random_chat_callback( novel, role, chat_history):
174
+ datas = role2datas[(novel, role)]
175
+
176
+ reesponse_set = set()
177
+
178
+ for chat_tuple in chat_history:
179
+ if chat_tuple[1] is not None:
180
+ reesponse_set.add(chat_tuple[1])
181
+
182
+ for _ in range(5):
183
+ random_data = random.choice(datas)
184
+ convs = random_data["conversations"]
185
+ n = len(convs)
186
+ index = [x for x in range(0,n,2)]
187
+
188
+ for i in index:
189
+ query = convs[i]['value']
190
+ response = convs[i+1]['value']
191
+ if response not in reesponse_set:
192
+ chat_history.append( (query, response) )
193
+ return chat_history
194
+
195
+ return chat_history
196
+
197
+
198
+
199
+ async def submit_chat( novel, role, user_name, user_text, chat_history, persona_addition_info,model_sel):
200
+
201
+ if len(user_text) > 400:
202
+ user_text = user_text[:400]
203
+
204
+ if_user_in_text = True
205
+
206
+ chatbot = get_chatbot( novel, role )
207
+ chatbot.persona = initialize_persona( novel, role, persona_addition_info)
208
+ # chatbot.llm_async = async_get_response
209
+
210
+ if model_sel == "openai":
211
+ chatbot.llm = get_response_openai
212
+ elif model_sel == "Zhipu":
213
+ chatbot.llm = get_response_zhipu
214
+ elif model_sel == "spark":
215
+ chatbot.llm = get_response_spark
216
+ else:
217
+ chatbot.llm = get_response_erniebot
218
+
219
+
220
+ history = []
221
+
222
+ for chat_tuple in chat_history:
223
+ if chat_tuple[0] is not None:
224
+ history.append( {"speaker":"{{user}}","content":chat_tuple[0]} )
225
+ if chat_tuple[1] is not None:
226
+ history.append( {"speaker":"{{role}}","content":chat_tuple[1]} )
227
+
228
+ chatbot.history = history
229
+
230
+ input_text = user_text
231
+
232
+ if if_user_in_text:
233
+ input_text = user_name + " : " + user_text
234
+ response = chatbot.chat(user = "", text = input_text )
235
+ # response = await chatbot.async_chat(user = "", text = input_text )
236
+ else:
237
+ response = chatbot.chat(user = user_name, text = input_text)
238
+ # response = await chatbot.async_chat(user = user_name, text = input_text)
239
+ chat_history.append( (input_text, response) )
240
+
241
+ print_data = {"novel":novel, "role":role, "user_text":input_text, "response":response}
242
+
243
+ print(json.dumps(print_data, ensure_ascii=False))
244
+
245
+ with open(save_log, "a",encoding = "utf-8") as f:
246
+ f.write(json.dumps(print_data, ensure_ascii=False) + "\n")
247
+
248
+ return chat_history
249
+
250
+
251
+ def initialize_persona( novel, role, persona_addition_info):
252
+ whole_persona = package_persona( role, novel )
253
+ whole_persona += "\n" + persona_addition_info
254
+ whole_persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"
255
+
256
+ return whole_persona
257
+
258
+ def clean_history( ):
259
+ return []
260
+
261
+ def clean_input():
262
+ return ""
263
+
264
+ import random
265
+
266
+ def generate_summarize_prompt( novel, role_name ):
267
+ whole_prompt = f'''
268
+ 你在分析小说{novel}中的角色{role_name}
269
+ 结合小说{novel}中的内容,以及下文中角色{role_name}的对话
270
+ 判断{role_name}的人物设定、人物特点以及语言风格
271
+
272
+ {role_name}的对话:
273
+ '''
274
+ stories = [data["story"] for data in role2datas[(novel, role_name)] ]
275
+
276
+ sample_n = 5
277
+
278
+ sample_stories = random.sample(stories, sample_n)
279
+
280
+ for story in sample_stories:
281
+ whole_prompt += story + "\n\n"
282
+
283
+ return whole_prompt.strip()
284
+
285
+
286
+ with gr.Blocks() as demo:
287
+ gr.Markdown("""# 50本小说的人物测试
288
+
289
+ 这个interface由李鲁鲁实现,主要是用来看语料的
290
+
291
+ 增加了随机聊天,支持GLM,openai切换
292
+
293
+ 米唯实接入了qwen1.8B并布置于huggingface上""")
294
+
295
+ with gr.Tab("聊天"):
296
+ with gr.Row():
297
+ novel_sel = gr.Dropdown( novel_list, label = "小说", value = "悟空传" , interactive = True)
298
+ role_sel = gr.Dropdown( novel2roles[novel_sel.value], label = "角色", value = "孙悟空", interactive = True )
299
+
300
+ with gr.Row():
301
+ chat_history = gr.Chatbot(height = 600)
302
+
303
+ with gr.Row():
304
+ user_name = gr.Textbox(label="user_name", scale = 1, value = "鲁鲁", interactive = True)
305
+ user_text = gr.Textbox(label="user_text", scale = 20)
306
+ submit = gr.Button("submit", scale = 1)
307
+
308
+ with gr.Row():
309
+ random_chat = gr.Button("随机聊天", scale = 1)
310
+ clean_message = gr.Button("清空聊天", scale = 1)
311
+
312
+ with gr.Row():
313
+ persona_addition_info = gr.TextArea( label = "额外人物设定", value = "", interactive = True )
314
+
315
+ with gr.Row():
316
+ update_persona = gr.Button("补充人物设定到prompt", scale = 1)
317
+ model_sel = gr.Radio(["Zhipu","openai","spark","erniebot"], interactive = True, scale = 5, value = "Zhipu", label = "模型选择")
318
+
319
+ with gr.Row():
320
+ whole_persona = gr.TextArea( label = "完整的system prompt", value = "", interactive = False )
321
+
322
+ novel_sel.change(fn = get_role_list, inputs = [novel_sel], outputs = [role_sel]).then(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
323
+
324
+ role_sel.change(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
325
+
326
+ update_persona.click(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])
327
+
328
+ random_chat.click(fn = random_chat_callback, inputs = [novel_sel, role_sel, chat_history], outputs = [chat_history])
329
+
330
+ user_text.submit(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])
331
+ submit.click(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])
332
+
333
+ clean_message.click(fn = clean_history, inputs = [], outputs = [chat_history])
334
+
335
+ with gr.Tab("README"):
336
+ gr.Markdown(readme_text)
337
+
338
+ with gr.Tab("辅助人物总结"):
339
+ with gr.Row():
340
+ generate_prompt = gr.Button("生成人物总结prompt", scale = 1)
341
+
342
+ with gr.Row():
343
+ whole_prompt = gr.TextArea( label = "复制这个prompt到Openai或者GLM或者Claude进行总结", value = "", interactive = False )
344
+
345
+ generate_prompt.click(fn = generate_summarize_prompt, inputs = [novel_sel, role_sel], outputs = [whole_prompt])
346
+
347
+
348
+
349
+
350
+
351
+ demo.launch(share=True, debug = True)
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets
2
+ tiktoken
3
+ tqdm
4
+ openai
5
+ zhipuai
6
+ gradio
7
+ wget
8
+ erniebot
9
+ erniebot-agent[all]
10
+
11
+ scipy
12
+ transformers
13
+ accelerate
14
+ peft
15
+ bitsandbytes
16
+ sentencepiece
17
+ einops
18
+ transformers_stream_generator==0.0.4
19
+ deepspeed
20
+ auto-gptq
21
+ optimum