#!/bin/bash # Start the Ollama server in the background ollama serve & # Wait a few seconds for the server to be fully operational sleep 5 # Pull the model from Hugging Face. # This command downloads the model and makes it available to the API. # It's the non-interactive version of "ollama run". ollama pull hf.co/unsloth/gemma-3-4b-it-qat-GGUF:Q4_K_M # Start the Gradio web application # This will connect to the Ollama server which is already running. python3 app.py