import streamlit as st from huggingface_hub import hf_hub_download from llama_cpp import Llama # Set up model from public Hugging Face repo REPO_ID = "MaziyarPanahi/BioMistral-7B-GGUF" FILENAME = "BioMistral-7B.Q4_K_M.gguf" st.set_page_config(page_title="🩺 Medical Chatbot") st.title("🧠 Medical Chatbot using BioMistral") # Download from HF Hub (automatically caches) with st.spinner("📥 Downloading BioMistral model from Hugging Face..."): model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME) # Load the GGUF model with llama-cpp-python with st.spinner("⚙️ Loading the model into memory..."): llm = Llama( model_path=model_path, n_ctx=4096, n_threads=8, n_gpu_layers=35 # Use 0 for CPU-only ) # UI to get user input query = st.text_input("💬 Ask a medical question:") if query: with st.spinner("🧠 Thinking..."): response = llm(query, max_tokens=512, stop=[""]) st.markdown("**Answer:**") st.write(response["choices"][0]["text"].strip())