import os import zipfile import tempfile import google.generativeai as genai from dotenv import load_dotenv from doc_generator import generate_requirements_txt load_dotenv() genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) model = genai.GenerativeModel("models/gemini-2.0-flash") ANNOTATIONS = { "app.py": "← Gradio + MCP server", "README.md": "← With demo + tag \"mcp-server-track\"", "demo_video.mp4": "← Link embedded in README" } PROMPT = """You are an expert software project documentation assistant. You will write a clear, complete, and well-structured `README.md` file for a source code repository with the following files and content excerpts: {file_summaries} The README must contain: 1. A title 2. A short project description 3. An "Installation" section 4. A "Usage" section 5. A "Features" section 6. An "Authors" section (write "To be completed" if not detected) 7. A "License" section (write "To be completed" if not detected) Respond only with the README.md content, without markdown ``` tags. """ def summarize_files(dir_path, max_files=20, max_chars=5000): summaries = [] for root, _, files in os.walk(dir_path): for file in files: if file.endswith((".py", ".js", ".ts", ".java", ".md", ".json", ".txt")): try: with open(os.path.join(root, file), "r", encoding="utf-8") as f: content = f.read() rel_path = os.path.relpath(os.path.join(root, file), dir_path) summaries.append(f"### {rel_path}\n```\n{content[:1000]}\n```") if len("".join(summaries)) > max_chars: break except Exception: continue if len(summaries) >= max_files: break return "\n\n".join(summaries) def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str): with tempfile.TemporaryDirectory() as tempdir: with zipfile.ZipFile(zip_file_path, "r") as zip_ref: zip_ref.extractall(tempdir) file_summaries = summarize_files(tempdir) prompt = PROMPT.format(file_summaries=file_summaries) response = model.generate_content(prompt) readme_content = response.text.strip() readme_path = os.path.join(output_dir, "README.md") index_path = os.path.join(output_dir, "index.md") os.makedirs(output_dir, exist_ok=True) # Clean markdown code blocks if they exist lines = readme_content.splitlines() if len(lines) > 2: lines = lines[1:-1] # remove the first and last lines readme_content = "\n".join(lines) else: # if less than 3 lines, empty or keep as needed readme_content = "" with open(readme_path, "w", encoding="utf-8") as f: f.write(readme_content) # ✅ Generate index from tempdir (correct location of extracted files) write_index_file(tempdir, index_path) return readme_path, index_path def generate_tree_structure(path: str, prefix: str = "") -> str: entries = sorted(os.listdir(path)) lines = [] dir_name = os.path.basename(os.path.abspath(path)) lines.append(f"📁 repo/") for idx, entry in enumerate(entries): full_path = os.path.join(path, entry) connector = "├── " comment = f" {ANNOTATIONS.get(entry, '')}".rstrip() lines.append(prefix + connector + (entry + "/" if os.path.isdir(full_path) else entry) + comment) if os.path.isdir(full_path): extension_prefix = "│ " subtree = generate_tree_structure(full_path, prefix + extension_prefix) lines.extend(subtree.splitlines()[1:]) # skip repeated dir name lines.extend(["├── README.md", "└── index.md"]) return "\n".join(lines) def write_index_file(project_path: str, output_path: str): structure = generate_tree_structure(project_path) with open(output_path, "w", encoding="utf-8") as f: f.write(structure)