Spaces:

fzmnm
/

TinyChatbots

Sleeping

App Files Files Community

TinyChatbots / app.py

fzmnm

Update app.py

5c204c4 verified 4 months ago

raw

history blame contribute delete

3.63 kB

	"""
	huggingface_hub==0.30.1
	transformers==4.48.2
	# gradio==5.0.1
	gradio==5.23.2
	torch==2.5.1
	pydantic==2.8.2
	"""

	import gradio as gr
	print("Gradio version:", gr.__version__)
	from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
	import torch
	from threading import Thread

	# import os; os.chdir(os.path.dirname(__file__))

	model_name = "fzmnm/TinyLili-zh-64M"


	max_tokens=4096
	max_new_tokens=1024
	temperature=0.7
	top_p=0.95

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	model.eval()

	model.generation_config.pad_token_id = tokenizer.eos_token_id


	def build_input_str(message: str, history: 'list[list[str]]'):
	history = history + [{'role': 'user', 'content': message}]
	input_str = tokenizer.apply_chat_template(history, tokenize=False)
	input_str += '\n<\|im_start\|>assistant\n'
	return input_str

	def stop_criteria(input_str):
	end_tokens=['<s>','<\|im_end\|>']
	return any(input_str.endswith(end_token) for end_token in end_tokens)

	def remove_ending(input_str):
	if input_str.endswith("<\|im_end\|>"):
	return input_str[:-10]
	return input_str

	class StopOnTokens(StoppingCriteria):
	def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
	input_str = tokenizer.decode(input_ids[0], skip_special_tokens=True)
	return stop_criteria(input_str)


	def chat(message, history, temperature):
	input_str = build_input_str(message, history)
	input_ids = tokenizer.encode(input_str, return_tensors="pt")
	input_ids = input_ids[:, -max_tokens:]
	streamer = TextIteratorStreamer(
	tokenizer,
	timeout=10,
	skip_prompt=True,
	skip_special_tokens=True)
	stopping_criteria = StoppingCriteriaList([StopOnTokens()])
	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	stopping_criteria=stopping_criteria,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	do_sample=True,
	temperature=float(temperature),
	)
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()
	try:
	output_str = ""
	for new_str in streamer:
	output_str += new_str
	yield remove_ending(output_str)
	t.join()
	finally:
	if t.is_alive():
	print('Canceling thread...')
	t.join(timeout=1)
	if t.is_alive():
	raise RuntimeError("Thread did not terminate properly.")

	example_strs=[
	'北京有什么好玩的? ',
	'土星上有什么好吃的',
	'什么是黑洞？',
	'一个人的目的是否必须要被社会认可？',
	'奶奶今年八十岁了，可她还是坚持一个人住乡下，说那是她的根。我们全家都劝她搬来城市，可她总说“住得舒服，比啥都重要”。但她上个月摔了一跤，脚还没完全好，万一再出事怎么办？她那么倔，我们还能怎么劝呢？',
	]

	app = gr.ChatInterface(
	fn=chat,
	type='messages',
	examples=[[s,temperature] for s in example_strs],
	title='聊天机器人',
	stop_btn=True,
	# run_examples_on_click=False, # there is a bug with example questions that it does not toggle stop_btn on. toggling this option can circumvent this issue. however, it is not supported in 5.0.1
	additional_inputs=[
	gr.Slider(minimum=0.1, maximum=4.0, value=temperature, step=0.05, label='Temperature'),
	],
	cache_examples=False,
	)

	app.queue()

	if __name__ == "__main__":
	app.launch()