import gradio as gr import torch from transformers import AutoProcessor, AutoModelForVision2Seq from web_data import custom_css, header, footer processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct") model = AutoModelForVision2Seq.from_pretrained( "HuggingFaceTB/SmolVLM-256M-Instruct", torch_dtype=torch.bfloat16, ) def respond(message, history: list[tuple[str, str]], image, system_message): messages = [ { "role": "system", "content": [ {"type": "text", "text": system_message} ] }, ] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) user_message = { "role": "user", "content": [ {"type": "text", "text": message} ] } if image: user_message['content'].append({"type": "image"}) messages.append(user_message) prompt = processor.apply_chat_template(messages, add_generation_prompt=True) if image: resized_image = image.resize((32, 32)) inputs = processor(text=prompt, images=[resized_image], return_tensors="pt") else: inputs = processor(text=prompt, return_tensors="pt") generated_ids = model.generate(**inputs, max_new_tokens=500) generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True) response = generated_texts[0].split('Assistant:')[-1].strip() yield response # ======the code below this section is pure vibing coding====== with gr.Blocks(theme=gr.themes.Glass(), css=custom_css) as demo: gr.HTML(header) with gr.Row(variant="panel"): with gr.Column(scale=1): with gr.Accordion("🌀 CONTROL PANEL", open=True): system_message = gr.Textbox( value="You're a cool AI that speaks Gen-Z slang 😎", label="🤖 BOT PERSONALITY", lines=2, max_lines=4, elem_classes="cyber-input" ) image_input = gr.Image( type="pil", label="📸 UPLOAD PIC", height=200, elem_classes="glow-border" ) with gr.Column(scale=3): chat_interface = gr.ChatInterface( respond, additional_inputs=[image_input, system_message], submit_btn="📤 SEND", ) gr.HTML(footer) if __name__ == "__main__": demo.launch()