Spaces:

Agents-MCP-Hackathon
/

AutoDocsSmartDocumentationGenerator

Sleeping

App Files Files Community

AutoDocsSmartDocumentationGenerator / readme_generator.py

The0eau

Upload

d12bff5 about 1 month ago

raw

history blame contribute delete

4.11 kB

	import os
	import zipfile
	import tempfile
	import google.generativeai as genai
	from dotenv import load_dotenv
	from doc_generator import generate_requirements_txt

	load_dotenv()
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
	model = genai.GenerativeModel("models/gemini-2.0-flash")

	ANNOTATIONS = {
	"app.py": "← Gradio + MCP server",
	"README.md": "← With demo + tag \"mcp-server-track\"",
	"demo_video.mp4": "← Link embedded in README"
	}

	PROMPT = """You are an expert software project documentation assistant.

	You will write a clear, complete, and well-structured `README.md` file for a source code repository with the following files and content excerpts:

	{file_summaries}

	The README must contain:
	1. A title
	2. A short project description
	3. An "Installation" section
	4. A "Usage" section
	5. A "Features" section
	6. An "Authors" section (write "To be completed" if not detected)
	7. A "License" section (write "To be completed" if not detected)

	Respond only with the README.md content, without markdown ``` tags.
	"""

	def summarize_files(dir_path, max_files=20, max_chars=5000):
	summaries = []
	for root, _, files in os.walk(dir_path):
	for file in files:
	if file.endswith((".py", ".js", ".ts", ".java", ".md", ".json", ".txt")):
	try:
	with open(os.path.join(root, file), "r", encoding="utf-8") as f:
	content = f.read()
	rel_path = os.path.relpath(os.path.join(root, file), dir_path)
	summaries.append(f"### {rel_path}\n```\n{content[:1000]}\n```")
	if len("".join(summaries)) > max_chars:
	break
	except Exception:
	continue
	if len(summaries) >= max_files:
	break
	return "\n\n".join(summaries)

	def generate_readme_from_zip(zip_file_path: str, output_dir: str) -> (str, str):
	with tempfile.TemporaryDirectory() as tempdir:
	with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
	zip_ref.extractall(tempdir)

	file_summaries = summarize_files(tempdir)
	prompt = PROMPT.format(file_summaries=file_summaries)
	response = model.generate_content(prompt)
	readme_content = response.text.strip()

	readme_path = os.path.join(output_dir, "README.md")
	index_path = os.path.join(output_dir, "index.md")
	os.makedirs(output_dir, exist_ok=True)
	# Clean markdown code blocks if they exist
	lines = readme_content.splitlines()
	if len(lines) > 2:
	lines = lines[1:-1] # remove the first and last lines
	readme_content = "\n".join(lines)
	else:
	# if less than 3 lines, empty or keep as needed
	readme_content = ""

	with open(readme_path, "w", encoding="utf-8") as f:
	f.write(readme_content)

	# ✅ Generate index from tempdir (correct location of extracted files)
	write_index_file(tempdir, index_path)

	return readme_path, index_path

	def generate_tree_structure(path: str, prefix: str = "") -> str:
	entries = sorted(os.listdir(path))
	lines = []
	dir_name = os.path.basename(os.path.abspath(path))
	lines.append(f"📁 repo/")

	for idx, entry in enumerate(entries):
	full_path = os.path.join(path, entry)
	connector = "├── "
	comment = f" {ANNOTATIONS.get(entry, '')}".rstrip()

	lines.append(prefix + connector + (entry + "/" if os.path.isdir(full_path) else entry) + comment)

	if os.path.isdir(full_path):
	extension_prefix = "│ "
	subtree = generate_tree_structure(full_path, prefix + extension_prefix)
	lines.extend(subtree.splitlines()[1:]) # skip repeated dir name

	lines.extend(["├── README.md",
	"└── index.md"])

	return "\n".join(lines)


	def write_index_file(project_path: str, output_path: str):
	structure = generate_tree_structure(project_path)
	with open(output_path, "w", encoding="utf-8") as f:
	f.write(structure)