Spaces:

Agents-MCP-Hackathon
/

AutoReadmeAgent

Running

App Files Files Community

bogeumkim commited on Jun 8

Commit

b57f297

1 Parent(s): 87fb172

Add preprocessor.py

Browse files

Files changed (6) hide show

app.py +41 -5
modal_app.py +12 -0
pyproject.toml +3 -0
utils/ignore.txt +0 -0
utils/preprocessor.py +48 -0
uv.lock +0 -0

app.py CHANGED Viewed

@@ -1,12 +1,48 @@
 import gradio as gr
-def greet(name, intensity):
-    return "Hello, " + name + "!" * int(intensity)
 demo = gr.Interface(
-    fn=greet,
-    inputs=["text", "slider"],
-    outputs=["text"],
 )
 demo.launch()

+import re
+import requests
+from markdownify import markdownify
+from requests.exceptions import RequestException
 import gradio as gr
+# Import the Preprocessor class
+from utils.preprocessor import Preprocessor
+def visit_webpage(url, max_output_length=40000):
+    """
+    Fetch the webpage, convert to markdown, and use Preprocessor methods.
+    """
+    try:
+        response = requests.get(url, timeout=20)
+        response.raise_for_status()
+        markdown_content = markdownify(response.text).strip()
+        markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+        if len(markdown_content) > max_output_length:
+            markdown_content = (
+                markdown_content[: max_output_length // 2]
+                + f"\n..._This content has been truncated to stay below {max_output_length} characters_...\n"
+                + markdown_content[-max_output_length // 2 :]
+            )
+        # Use Preprocessor class methods
+        section = Preprocessor.extract_section(markdown_content)
+        dir_paths, files = Preprocessor.extract_dirs_from_text(section)
+        # Format the result
+        result = (
+            f"paths: {dir_paths}\n\n"
+            f"files: {files}"
+        )
+        return result
+    except requests.exceptions.Timeout:
+        return "The request timed out. Please try again later or check the URL."
+    except RequestException as e:
+        return f"Error fetching the webpage: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
 demo = gr.Interface(
+    fn=visit_webpage,
+    inputs=gr.Textbox(label="Website URL"),
+    outputs=gr.Textbox(label="Extracted Section, Directory Paths, and File Paths"),
+    title="Webpage Section and Path Extractor"
 )
 demo.launch()

modal_app.py CHANGED Viewed

	@@ -0,0 +1,12 @@

+import modal
+app = modal.App("auto-readme-agent")
+@app.function()
+def square(x):
+    print("This code is running on a remote worker!")
+    return x**2
+@app.local_entrypoint()
+def main():
+    print("the square is", square.remote(42))

pyproject.toml CHANGED Viewed

@@ -6,4 +6,7 @@ readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
     "gradio>=5.33.0",
 ]

 requires-python = ">=3.11"
 dependencies = [
     "gradio>=5.33.0",
+    "markdownify>=1.1.0",
+    "modal>=1.0.3",
+    "smolagents>=1.17.0",
 ]

utils/ignore.txt ADDED Viewed

File without changes

utils/preprocessor.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import re
+class Preprocessor:
+    """
+    Provides methods to preprocess markdown content:
+    - extract a section after a specific keyword
+    - extract directories from markdown links
+    - extract file paths from markdown links
+    """
+    @staticmethod
+    def extract_section(markdown_content, keyword="Latest commit"):
+        """
+        Extract lines starting from the line after the one containing
+        'keyword' up to the next empty line.
+        """
+        lines = markdown_content.splitlines()
+        extract = []
+        found = False
+        for i, line in enumerate(lines):
+            if not found and keyword in line:
+                found = True
+                # Start collecting from the next line
+                start_idx = i + 1
+                while start_idx < len(lines):
+                    next_line = lines[start_idx]
+                    if next_line.strip() == "":
+                        break
+                    extract.append(next_line)
+                    start_idx += 1
+                break
+        return "\n".join(extract) if extract else f"No content found after '{keyword}'."
+    @staticmethod
+    def extract_dirs_from_text(text):
+        """
+        Extract unique directory paths from markdown links in text.
+        Only the path part before the filename is kept.
+        """
+        pattern = r'\((/[^ )]+)\s+"([^"]+)"\)'
+        matches = re.findall(pattern, text)
+        dirs = set()
+        files = set()
+        for match in matches:
+            dirs.add(match[0])
+            if not match[1].startswith("."):
+                files.add(match[1])
+        return dirs, files

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff