Spaces:

saadfarhad
/

Crowdanalyzer_v1

Runtime error

App Files Files Community

Crowdanalyzer_v1 / app.py

saadfarhad

Update app.py

a5e0173 verified 9 months ago

raw

history blame

2.66 kB

	import gradio as gr
	import torch
	from transformers import AutoModel, AutoTokenizer

	# Model setting
	model_path = "OpenGVLab/InternVideo2_5_Chat_8B"

	# Load the tokenizer and model with remote code enabled.
	tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
	model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()

	# Get the image processor from the vision tower.
	image_processor = model.get_vision_tower().image_processor

	# Evaluation settings
	max_num_frames = 512
	generation_config = {
	"do_sample": False,
	"temperature": 0.0,
	"max_new_tokens": 1024,
	"top_p": 0.1,
	"num_beams": 1,
	}

	video_path = "your_video.mp4" # (For testing locally, update as needed)

	# Single-turn conversation example:
	def single_turn_chat(video_path, user_prompt):
	output, chat_history = model.chat(
	video_path=video_path,
	tokenizer=tokenizer,
	user_prompt=user_prompt,
	return_history=True,
	max_num_frames=max_num_frames,
	generation_config=generation_config
	)
	return output

	# Multi-turn conversation example:
	def multi_turn_chat(video_path, user_prompt, chat_history):
	output, chat_history = model.chat(
	video_path=video_path,
	tokenizer=tokenizer,
	user_prompt=user_prompt,
	chat_history=chat_history,
	return_history=True,
	max_num_frames=max_num_frames,
	generation_config=generation_config
	)
	return output, chat_history

	# For the Gradio interface, we'll combine these into a chat function.
	def chat_interface(video_path, user_prompt, chat_history):
	if chat_history is None:
	chat_history = []
	output, new_history = model.chat(
	video_path=video_path,
	tokenizer=tokenizer,
	user_prompt=user_prompt,
	chat_history=chat_history,
	return_history=True,
	max_num_frames=max_num_frames,
	generation_config=generation_config
	)
	return output, new_history

	# Build the Gradio interface.
	with gr.Blocks() as demo:
	gr.Markdown("## InternVideo2_5_Chat_8B Chat Interface")
	with gr.Row():
	video_input = gr.Video(label="Upload Video", type="filepath")
	question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
	chat_state = gr.State([]) # To maintain conversation history
	output_text = gr.Textbox(label="Model Response")

	send_btn = gr.Button("Send")
	send_btn.click(
	chat_interface,
	inputs=[video_input, question_input, chat_state],
	outputs=[output_text, chat_state]
	)

	if __name__ == "__main__":
	demo.launch()