Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from transformers import AutoModel, AutoTokenizer | |
| # Model setting | |
| model_path = "OpenGVLab/InternVideo2_5_Chat_8B" | |
| # Load the tokenizer and model with remote code enabled. | |
| tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
| model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda() | |
| # Get the image processor from the vision tower. | |
| image_processor = model.get_vision_tower().image_processor | |
| # Evaluation settings | |
| max_num_frames = 512 | |
| generation_config = { | |
| "do_sample": False, | |
| "temperature": 0.0, | |
| "max_new_tokens": 1024, | |
| "top_p": 0.1, | |
| "num_beams": 1, | |
| } | |
| video_path = "your_video.mp4" # (For testing locally, update as needed) | |
| # Single-turn conversation example: | |
| def single_turn_chat(video_path, user_prompt): | |
| output, chat_history = model.chat( | |
| video_path=video_path, | |
| tokenizer=tokenizer, | |
| user_prompt=user_prompt, | |
| return_history=True, | |
| max_num_frames=max_num_frames, | |
| generation_config=generation_config | |
| ) | |
| return output | |
| # Multi-turn conversation example: | |
| def multi_turn_chat(video_path, user_prompt, chat_history): | |
| output, chat_history = model.chat( | |
| video_path=video_path, | |
| tokenizer=tokenizer, | |
| user_prompt=user_prompt, | |
| chat_history=chat_history, | |
| return_history=True, | |
| max_num_frames=max_num_frames, | |
| generation_config=generation_config | |
| ) | |
| return output, chat_history | |
| # For the Gradio interface, we'll combine these into a chat function. | |
| def chat_interface(video_path, user_prompt, chat_history): | |
| if chat_history is None: | |
| chat_history = [] | |
| output, new_history = model.chat( | |
| video_path=video_path, | |
| tokenizer=tokenizer, | |
| user_prompt=user_prompt, | |
| chat_history=chat_history, | |
| return_history=True, | |
| max_num_frames=max_num_frames, | |
| generation_config=generation_config | |
| ) | |
| return output, new_history | |
| # Build the Gradio interface. | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## InternVideo2_5_Chat_8B Chat Interface") | |
| with gr.Row(): | |
| video_input = gr.Video(label="Upload Video", type="filepath") | |
| question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...") | |
| chat_state = gr.State([]) # To maintain conversation history | |
| output_text = gr.Textbox(label="Model Response") | |
| send_btn = gr.Button("Send") | |
| send_btn.click( | |
| chat_interface, | |
| inputs=[video_input, question_input, chat_state], | |
| outputs=[output_text, chat_state] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |