import os import tempfile import streamlit as st import requests from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, ServiceContext, ) from llama_index.llms.huggingface import HuggingFaceLLM from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.node_parser import SentenceSplitter from llama_index.core.prompts import PromptTemplate # ---------------------- UI Header ---------------------- st.set_page_config(page_title="📘 GitHub Repository Explainer") st.title("📘 GitHub Repository Explainer (100% Free)") st.caption("Explain any GitHub repo in plain English using open-source LLMs.") # ---------------------- Input ---------------------- github_url = st.text_input("Enter GitHub repo URL (public):", placeholder="https://github.com/user/repo") # ---------------------- Functions ---------------------- def fetch_and_prepare_repo(github_url): if not github_url.startswith("https://github.com/"): raise ValueError("Invalid GitHub URL") repo_name = github_url.strip("/").split("/")[-1] zip_url = github_url + "/archive/refs/heads/main.zip" with tempfile.TemporaryDirectory() as tmpdir: zip_path = os.path.join(tmpdir, "repo.zip") r = requests.get(zip_url) with open(zip_path, "wb") as f: f.write(r.content) import zipfile with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(tmpdir) repo_folder = [f.path for f in os.scandir(tmpdir) if f.is_dir()][0] return repo_folder # ---------------------- Main ---------------------- if github_url: with st.spinner("⏳ Loading and analyzing repo..."): repo_dir = fetch_and_prepare_repo(github_url) documents = SimpleDirectoryReader(input_dir=repo_dir).load_data() embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") llm = HuggingFaceLLM( context_window=3900, max_new_tokens=256, generate_kwargs={"temperature": 0.2, "do_sample": False}, model_name="google/flan-t5-base", tokenizer_name="google/flan-t5-base", device_map="auto", ) service_context = ServiceContext.from_defaults( chunk_size=512, llm=llm, embed_model=embed_model, node_parser=SentenceSplitter() ) index = VectorStoreIndex.from_documents(documents, service_context=service_context) query_engine = index.as_query_engine() custom_prompt = PromptTemplate( "You are a helpful assistant. Given this GitHub code context:\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Answer this question:\n" "{query_str}\n" "---------------------\n" "Answer:" ) query_engine.update_prompts({"response_synthesizer:text_qa_template": custom_prompt}) query = "Explain what this GitHub repository does, including any setup instructions or key features." response = query_engine.query(query) st.success("✅ Repository analysis complete!") st.markdown("### 🧠 Project Summary") st.write(response.response)