Spaces:

akhaliq
/

Qwen3-4B-Instruct-2507

Running on Zero

Qwen3-4B-Instruct-2507 / app.py

akhaliq HF Staff

Upload app.py with huggingface_hub

2e3fa61 verified 2 days ago

1.6 kB

	import gradio as gr
	import spaces
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_name = "Qwen/Qwen3-4B-Instruct-2507"

	# Load the tokenizer and the model
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype="auto",
	device_map="auto"
	)

	@spaces.GPU(duration=120)
	def generate_response(prompt):
	# Prepare the model input
	messages = [
	{"role": "user", "content": prompt}
	]
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True,
	)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	# Conduct text completion
	generated_ids = model.generate(
	**model_inputs,
	max_new_tokens=1024 # Reduced for performance and safety
	)
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()

	content = tokenizer.decode(output_ids, skip_special_tokens=True)
	return content

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Qwen Chatbot")
	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Input")
	clear = gr.Button("Clear")

	def respond(message, chat_history):
	if not message:
	return "", chat_history

	bot_response = generate_response(message)
	chat_history.append((message, bot_response))
	return "", chat_history

	msg.submit(respond, [msg, chatbot], [msg, chatbot])
	clear.click(lambda: None, None, chatbot, queue=False)

	# Launch the app
	demo.launch()