File size: 11,712 Bytes
e74a2eb
 
daf0ac8
e74a2eb
daf0ac8
 
e74a2eb
daf0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ad8a43
 
daf0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74a2eb
 
daf0ac8
e74a2eb
daf0ac8
e74a2eb
daf0ac8
e74a2eb
daf0ac8
e74a2eb
7ad8a43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74a2eb
daf0ac8
 
 
 
 
 
 
 
10b3b99
daf0ac8
 
 
 
 
 
 
 
e74a2eb
daf0ac8
 
e74a2eb
daf0ac8
e74a2eb
daf0ac8
 
 
e74a2eb
daf0ac8
 
 
 
 
e74a2eb
daf0ac8
 
 
 
 
 
e74a2eb
daf0ac8
e74a2eb
 
 
daf0ac8
e74a2eb
daf0ac8
 
e74a2eb
daf0ac8
 
 
 
 
 
 
 
 
 
 
7ad8a43
e74a2eb
 
 
daf0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
e74a2eb
daf0ac8
 
 
e74a2eb
daf0ac8
e74a2eb
daf0ac8
e74a2eb
10b3b99
 
e74a2eb
daf0ac8
e74a2eb
 
daf0ac8
 
 
 
e74a2eb
daf0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74a2eb
 
 
daf0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
import gradio as gr
import os
import httpx
import openai
from openai import OpenAI
from openai import AsyncOpenAI

from datasets import load_dataset

dataset = load_dataset("silk-road/50-Chinese-Novel-Characters")


novel_list = []

novel2roles = {}

role2datas = {}

from tqdm import tqdm
for data in tqdm(dataset['train']):
    novel = data['book']
    role = data['role']
    if novel not in novel_list:
        novel_list.append(novel)

    if novel not in novel2roles:
        novel2roles[novel] = []

    if role not in novel2roles[novel]:
        novel2roles[novel].append(role)

    role_tuple = (novel, role)

    if role_tuple not in role2datas:
        role2datas[role_tuple] = []

    role2datas[role_tuple].append(data)


from ChatHaruhi.utils import base64_to_float_array

from tqdm import tqdm

for novel in tqdm(novel_list):
    for role in novel2roles[novel]:
        for data in role2datas[(novel, role)]:
            data["vec"] = base64_to_float_array(data["bge_zh_s15"])

def conv2story( role, conversations ):
    lines = [conv["value"] if conv["from"] == "human" else role + ": " + conv["value"] for conv in conversations]
    return "\n".join(lines)

for novel in tqdm(novel_list):
    for role in novel2roles[novel]:
        for data in role2datas[(novel, role)]:
            data["story"] = conv2story( role, data["conversations"] )


from ChatHaruhi import ChatHaruhi
from ChatHaruhi.response_openai import get_response as get_response_openai
from ChatHaruhi.response_zhipu import get_response as get_response_zhipu
from ChatHaruhi.response_qwen_base import get_response as get_response_qwen_base


get_response = get_response_zhipu

narrators = ["叙述者", "旁白","文章作者","作者","Narrator","narrator"]


def package_persona( role_name, world_name ):
    if role_name in narrators:
        return package_persona_for_narrator( role_name, world_name )

    return f"""I want you to act like {role_name} from {world_name}.
If others‘ questions are related with the novel, please try to reuse the original lines from the novel.
I want you to respond and answer like {role_name} using the tone, manner and vocabulary {role_name} would use."""

def package_persona_for_narrator( role_name, world_name ):
    return f"""I want you to act like narrator {role_name} from {world_name}.
当角色行动之后,继续交代和推进新的剧情."""

role_tuple2chatbot = {}


def initialize_chatbot( novel, role ):
    global role_tuple2chatbot
    if (novel, role) not in role_tuple2chatbot:
        persona = package_persona( role, novel )
        persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"
        stories = [data["story"] for data in role2datas[(novel, role)] ]
        vecs = [data["vec"] for data in role2datas[(novel, role)] ]
        chatbot = ChatHaruhi( role_name = role, persona = persona , stories = stories, story_vecs= vecs,\
                             llm = get_response)
        chatbot.verbose = False

        role_tuple2chatbot[(novel, role)] = chatbot

from tqdm import tqdm
for novel in tqdm(novel_list):
    for role in novel2roles[novel]:
        initialize_chatbot( novel, role )

readme_text = """# 使用说明

选择小说角色

如果你有什么附加信息,添加到附加信息里面就可以

比如"韩立会炫耀自己刚刚学会了Python"

然后就可以开始聊天了

因为这些角色还没有增加Greeting信息,所以之后再开发个随机乱聊功能

# 开发细节

- 采用ChatHaruhi3.0的接口进行prompting
- 这里的数据是用一个7B的tuned qwen模型进行抽取的
- 想看数据可以去看第三个tab
- 抽取模型用了40k左右的GLM蒸馏数据
- 抽取模型是腾讯大哥BPSK训练的

# 总结人物性格

第三个Tab里面,可以显示一个prompt总结人物的性格

复制到openai或者GLM或者Claude进行人物总结


# 这些小说数据从HaruhiZero 0.4模型开始,被加入训练

openai太慢了 今天试试GLM的

不过当前demo是openai的

"""

# from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
# tokenizer = AutoTokenizer.from_pretrained("silk-road/Haruhi-Zero-1_8B", trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained("silk-road/Haruhi-Zero-1_8B", device_map="auto", trust_remote_code=True)
# model = model.eval()

# def get_response_qwen18(message):
#     from ChatHaruhi.utils import normalize2uaua
#     message_ua = normalize2uaua(message, if_replace_system = True)
#     import json
#     message_tuples = []
#     for i in range(0, len(message_ua)-1, 2):
#         message_tuple = (message_ua[i]["content"], message_ua[i+1]["content"])
#         message_tuples.append(message_tuple)
#     response, _ = model.chat(tokenizer, message_ua[-1]["content"], history=message_tuples)
#     return response

from ChatHaruhi.response_openai import get_response, async_get_response
import gradio as gr

def get_role_list( novel ):
    new_list = novel2roles[novel]
    new_value = new_list[0]
    return gr.update(choices = new_list, value = new_value)

# save_log = "/content/output.txt"

def get_chatbot( novel, role ):
    if (novel, role) not in role_tuple2chatbot:
        initialize_chatbot( novel, role )

    return role_tuple2chatbot[(novel, role)]

import json

def random_chat_callback( novel, role, chat_history):
    datas = role2datas[(novel, role)]

    reesponse_set = set()

    for chat_tuple in chat_history:
        if chat_tuple[1] is not None:
            reesponse_set.add(chat_tuple[1])

    for _ in range(5):
        random_data = random.choice(datas)
        convs = random_data["conversations"]
        n = len(convs)
        index = [x for x in range(0,n,2)]

        for i in index:
            query = convs[i]['value']
            response = convs[i+1]['value']
            if response not in reesponse_set:
                chat_history.append( (query, response) )
                return chat_history

    return chat_history



async def submit_chat( novel, role, user_name, user_text, chat_history, persona_addition_info,model_sel):

    if len(user_text) > 400:
        user_text = user_text[:400]

    if_user_in_text = True

    chatbot = get_chatbot( novel, role )
    chatbot.persona = initialize_persona( novel, role,  persona_addition_info)
    # chatbot.llm_async = async_get_response

    if model_sel == "openai":
        chatbot.llm = get_response_openai
    elif model_sel == "Zhipu":
        chatbot.llm = get_response_zhipu
    else:
        chatbot.llm = get_response_qwen_base

    history = []

    for chat_tuple in chat_history:
        if chat_tuple[0] is not None:
            history.append( {"speaker":"{{user}}","content":chat_tuple[0]} )
        if chat_tuple[1] is not None:
            history.append( {"speaker":"{{role}}","content":chat_tuple[1]} )

    chatbot.history = history

    input_text = user_text

    if if_user_in_text:
        input_text = user_name + " : " + user_text
        response = chatbot.chat(user = "", text = input_text )
        # response = await chatbot.async_chat(user = "", text = input_text )
    else:
        response = chatbot.chat(user = user_name, text = input_text)
        # response = await chatbot.async_chat(user = user_name, text = input_text)
    chat_history.append( (input_text, response) )

    print_data = {"novel":novel, "role":role, "user_text":input_text, "response":response}

    print(json.dumps(print_data, ensure_ascii=False))

    # with open(save_log, "a",encoding = "utf-8") as f:
    #     f.write(json.dumps(print_data, ensure_ascii=False) + "\n")

    return chat_history


def initialize_persona( novel, role,  persona_addition_info):
    whole_persona = package_persona( role, novel )
    whole_persona += "\n" + persona_addition_info
    whole_persona += "\n{{RAG对话}}\n{{RAG对话}}\n{{RAG对话}}\n"

    return whole_persona

def clean_history( ):
    return []

def clean_input():
    return ""

import random

def generate_summarize_prompt( novel, role_name ):
    whole_prompt = f'''
你在分析小说{novel}中的角色{role_name}
结合小说{novel}中的内容,以及下文中角色{role_name}的对话
判断{role_name}的人物设定、人物特点以及语言风格

{role_name}的对话:
'''
    stories = [data["story"] for data in role2datas[(novel, role_name)] ]

    sample_n = 5

    sample_stories = random.sample(stories, sample_n)

    for story in sample_stories:
        whole_prompt += story + "\n\n"

    return whole_prompt.strip()


with gr.Blocks() as demo:
    gr.Markdown("""# 50本小说的人物测试

    这个interface由李鲁鲁实现,主要是用来看语料的

    增加了随机聊天,支持GLM,openai切换
    
    米唯实接入了qwen1.8B并布置于huggingface上""")

    with gr.Tab("聊天"):
        with gr.Row():
            novel_sel = gr.Dropdown( novel_list, label = "小说", value = "悟空传" , interactive = True)
            role_sel = gr.Dropdown( novel2roles[novel_sel.value], label = "角色", value = "孙悟空", interactive = True )

        with gr.Row():
            chat_history = gr.Chatbot(height = 600)

        with gr.Row():
            user_name = gr.Textbox(label="user_name", scale = 1, value = "鲁鲁", interactive = True)
            user_text = gr.Textbox(label="user_text", scale = 20)
            submit = gr.Button("submit", scale = 1)

        with gr.Row():
            random_chat = gr.Button("随机聊天", scale = 1)
            clean_message = gr.Button("清空聊天", scale = 1)

        with gr.Row():
            persona_addition_info = gr.TextArea( label = "额外人物设定", value = "",  interactive = True  )

        with gr.Row():
            update_persona = gr.Button("补充人物设定到prompt", scale = 1)
            model_sel = gr.Radio(["Zhipu","openai","qwen1.8B"], interactive = True, scale = 5, value = "qwen1.8B", label = "模型选择")

        with gr.Row():
            whole_persona = gr.TextArea( label = "完整的system prompt", value = "",  interactive = False  )

        novel_sel.change(fn = get_role_list, inputs = [novel_sel], outputs = [role_sel]).then(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])

        role_sel.change(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])

        update_persona.click(fn = initialize_persona, inputs = [novel_sel, role_sel, persona_addition_info], outputs = [whole_persona])

        random_chat.click(fn = random_chat_callback, inputs = [novel_sel, role_sel, chat_history], outputs = [chat_history])

        user_text.submit(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])
        submit.click(fn = submit_chat, inputs = [novel_sel, role_sel, user_name, user_text, chat_history, persona_addition_info,model_sel], outputs = [chat_history]).then(fn = clean_input, inputs = [], outputs = [user_text])

        clean_message.click(fn = clean_history, inputs = [], outputs = [chat_history])

    with gr.Tab("README"):
        gr.Markdown(readme_text)

    with gr.Tab("辅助人物总结"):
        with gr.Row():
            generate_prompt = gr.Button("生成人物总结prompt", scale = 1)

        with gr.Row():
            whole_prompt = gr.TextArea( label = "复制这个prompt到Openai或者GLM或者Claude进行总结", value = "",  interactive = False  )

        generate_prompt.click(fn = generate_summarize_prompt, inputs = [novel_sel, role_sel], outputs = [whole_prompt])





demo.launch(share=True, debug = True)