ScreenCoder / app.py
Jimmyzheng-10's picture
Update
6740ab9
import gradio as gr
import os
import tempfile
import cv2
import numpy as np
import urllib.parse
from screencoder.main import generate_html_for_demo
from PIL import Image
import shutil
import html
import base64
from bs4 import BeautifulSoup
from pathlib import Path
# Predefined examples
examples_data = [
[
"screencoder/data/input/test1.png",
"",
"",
"",
"",
"screencoder/data/input/test1.png"
],
[
"screencoder/data/input/test3.png",
"",
"",
"",
"",
"screencoder/data/input/test3.png"
],
[
"screencoder/data/input/draft.png",
"Add more text about 'Trump-Musk Fued' in the whole area.",
"Beautify the logo 'Google'.",
"",
"Add text content about 'Trump and Musk' in 'Top Stories' and 'Wikipedia'. Add boundary box for each part.",
"screencoder/data/input/draft.png"
],
]
example_rows = [row[:5] for row in examples_data]
# TAILWIND_SCRIPT = "<script src='https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4'></script>"
def image_to_data_url(image_path):
"""Convert an image file to a data URL for embedding in HTML."""
try:
with open(image_path, 'rb') as img_file:
img_data = img_file.read()
# Detect image type from file extension
ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp'
}.get(ext, 'image/png')
encoded = base64.b64encode(img_data).decode('utf-8')
return f'data:{mime_type};base64,{encoded}'
except Exception as e:
print(f"Error converting image to data URL: {e}")
return None
def patch_css_js_paths(soup: BeautifulSoup, output_dir: Path):
"""
Fix CSS and JS paths in the HTML to work with Gradio's file serving.
Converts relative paths to /file= paths or removes them if files don't exist.
"""
try:
# CSS
for link in soup.find_all("link", rel=lambda x: x and "stylesheet" in x):
href = link.get("href", "")
if href.startswith(("http", "data:")):
continue
f = output_dir / href.lstrip("./")
if f.exists():
link["href"] = f"/file={f}"
print(f"Fixed CSS path: {href} -> /file={f}")
else:
print(f"Removing non-existent CSS: {href}")
link.decompose()
# JS
for script in soup.find_all("script", src=True):
src = script["src"]
if src.startswith(("http", "data:")):
continue
f = output_dir / src.lstrip("./")
if f.exists():
script["src"] = f"/file={f}"
print(f"Fixed JS path: {src} -> /file={f}")
else:
print(f"Removing non-existent JS: {src}")
script.decompose()
except Exception as e:
print(f"Error in patch_css_js_paths: {e}")
return soup
def render_preview(code: str, width: int, height: int, scale: float) -> str:
"""
Preview renderer with both width and height control for the inner canvas.
"""
try:
soup = BeautifulSoup(code, 'html.parser')
for script in soup.find_all('script'):
src = script.get('src', '')
if src and any(pattern in src for pattern in ['assets/', 'index-', 'iframeResizer']):
script.decompose()
for link in soup.find_all('link'):
href = link.get('href', '')
if href and any(pattern in href for pattern in ['assets/', 'index-']):
link.decompose()
cleaned_code = str(soup)
except Exception as e:
print(f"Error cleaning HTML in render_preview: {e}")
# Fallback to original code if cleaning fails
cleaned_code = code
safe_code = html.escape(cleaned_code).replace("'", "&apos;")
iframe_html = f"""
<div style="width: 100%; max-width: 1920px; margin: 0 auto; overflow-x: auto; overflow-y: hidden;">
<div style="
width: 1920px;
height: 1000px;
margin: 0 auto;
display: flex;
justify-content: center;
align-items: center;
border: 1px solid #ddd;
overflow: hidden;
background: #f9fafb;
position: relative;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);">
<div style="
width: {width}px;
height: {height}px;
transform: scale({scale});
transform-origin: left center;
border: none;
position: relative;">
<iframe
style="width: 100%; height: 100%; border: none; display: block;"
srcdoc='{safe_code}'>
</iframe>
</div>
</div>
</div>
"""
return iframe_html
def process_and_generate(image_input, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
"""
Main processing pipeline: takes an image (path or numpy), generates code, creates a downloadable
package, and returns the initial preview and code outputs for both layout and final versions.
"""
final_image_path = ""
is_temp_file = False
# Handle image_input which can be a numpy array (from upload) or a string (from example)
if isinstance(image_input, str) and os.path.exists(image_input):
final_image_path = image_input
elif image_input is not None: # Assumes numpy array
is_temp_file = True
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
# Gradio Image component provides RGB numpy array
cv2.imwrite(tmp.name, cv2.cvtColor(image_input, cv2.COLOR_RGB2BGR))
final_image_path = tmp.name
elif image_path_from_state:
final_image_path = image_path_from_state
else:
# Return empty values for all outputs
return "No image provided.", "", "", "Please upload or select an image.", gr.update(visible=False), None
instructions = {
"sidebar": sidebar_prompt, "header": header_prompt,
"navigation": navigation_prompt, "main content": main_content_prompt
}
layout_html, final_html, run_id = generate_html_for_demo(final_image_path, instructions)
if not run_id: # Handle potential errors from the generator
error_message = f"Generation failed. Error: {layout_html}"
return error_message, "", "", error_message, gr.update(visible=False), None
# --- Helper function to process HTML content ---
def process_html(html_content, run_id):
if not html_content:
return "", "" # Return empty strings if content is missing
base_dir = Path(__file__).parent.resolve()
soup = BeautifulSoup(html_content, 'html.parser')
# Fix CSS and JS paths
try:
output_dir = base_dir / 'screencoder' / 'data' / 'output' / run_id
soup = patch_css_js_paths(soup, output_dir)
except Exception as e:
print(f"Error fixing CSS/JS paths: {e}")
# Convert image paths to data URLs
for img in soup.find_all('img'):
if img.get('src') and not img['src'].startswith(('http', 'data:')):
original_src = img['src']
img_path = base_dir / 'screencoder' / 'data' / 'output' / run_id / original_src
if img_path.exists():
data_url = image_to_data_url(str(img_path))
if data_url:
img['src'] = data_url
else:
img['src'] = f'/file={str(img_path)}'
else:
img['src'] = original_src # Keep original if not found
processed_html = str(soup)
preview = render_preview(processed_html, 1920, 1080, 0.55)
return preview, processed_html
# --- Process both HTML versions ---
layout_preview, layout_code = process_html(layout_html, run_id)
final_preview, final_code = process_html(final_html, run_id)
# --- Package the output ---
base_dir = Path(__file__).parent.resolve()
output_dir = base_dir / 'screencoder' / 'data' / 'output' / run_id
packages_dir = base_dir / 'screencoder' / 'data' / 'packages'
packages_dir.mkdir(exist_ok=True)
package_path = packages_dir / f'{run_id}.zip'
shutil.make_archive(str(packages_dir / run_id), 'zip', str(output_dir))
package_url = f'/file={str(package_path)}'
if is_temp_file:
os.unlink(final_image_path)
# Return all the outputs, including for the state objects
return layout_preview, final_preview, final_code, layout_code, final_code, gr.update(value=package_url, visible=True)
with gr.Blocks(css="""
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
* {
font-family: 'Poppins', -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif !important;
font-feature-settings: 'liga' 1, 'calt' 1 !important;
text-rendering: optimizeLegibility !important;
-webkit-font-smoothing: antialiased !important;
-moz-osx-font-smoothing: grayscale !important;
}
h1, h2, h3, h4, h5, h6 {
font-weight: 600 !important;
color: #1f2937 !important;
letter-spacing: -0.02em !important;
line-height: 1.2 !important;
}
h1 {
font-size: 2.5rem !important;
font-weight: 700 !important;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin-bottom: 1.5rem !important;
letter-spacing: -0.03em !important;
}
h2 {
font-size: 1.75rem !important;
font-weight: 600 !important;
color: #374151 !important;
margin-bottom: 1rem !important;
letter-spacing: -0.01em !important;
}
.gr-button {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
border-radius: 10px !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
letter-spacing: 0.01em !important;
}
.gr-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
}
.gr-textbox, .gr-slider {
border-radius: 10px !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-textbox input, .gr-textbox textarea {
font-family: 'Poppins', sans-serif !important;
font-size: 14px !important;
font-weight: 400 !important;
letter-spacing: 0.01em !important;
line-height: 1.5 !important;
}
.gr-slider {
font-family: 'Poppins', sans-serif !important;
font-weight: 500 !important;
}
.gr-tabs {
border-radius: 12px !important;
overflow: hidden !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-tab-nav {
background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%) !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-tab-nav button {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
color: #64748b !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
letter-spacing: 0.01em !important;
}
.gr-tab-nav button.selected {
color: #3b82f6 !important;
background: white !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important;
font-weight: 600 !important;
}
.gr-accordion {
border-radius: 12px !important;
border: 1px solid #e5e7eb !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-accordion-header {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-markdown {
font-family: 'Poppins', sans-serif !important;
line-height: 1.7 !important;
font-weight: 400 !important;
letter-spacing: 0.01em !important;
}
.gr-markdown strong {
color: #059669 !important;
font-weight: 600 !important;
}
.gr-examples {
border-radius: 12px !important;
border: 1px solid #e5e7eb !important;
background: #f9fafb !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-examples-header {
font-weight: 600 !important;
font-family: 'Poppins', sans-serif !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-code {
font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', 'Monaco', monospace !important;
font-size: 13px !important;
line-height: 1.6 !important;
letter-spacing: 0.01em !important;
}
.gr-label {
font-family: 'Poppins', sans-serif !important;
font-weight: 500 !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-dropdown {
font-family: 'Poppins', sans-serif !important;
font-weight: 400 !important;
}
.gr-checkbox {
font-family: 'Poppins', sans-serif !important;
font-weight: 400 !important;
}
""") as demo:
gr.Markdown("# ScreenCoder: Advancing Visual-to-Code Generation for Front-End Automation via Modular Multimodal Agents")
gr.Markdown("[Github](https://github.com/leigest519/ScreenCoder/tree/main) | [Paper](https://arxiv.org/abs/2507.22827)")
gr.Markdown("**Tips**: Use the sliders to adjust preview size and zoom level. Swipe to change viewing angle. Click download button to get the package.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Step 1: Provide an Image")
active_image = gr.Image(type="filepath", height=400)
upload_button = gr.UploadButton("Click to Upload", file_types=["image"], variant="primary")
gr.Markdown("## Step 2: Write Prompts (Optional)")
with gr.Accordion("Component-specific Prompts", open=False):
sidebar_prompt = gr.Textbox(label="Sidebar", placeholder="Instructions for the sidebar...")
header_prompt = gr.Textbox(label="Header", placeholder="Instructions for the header...")
navigation_prompt = gr.Textbox(label="Navigation", placeholder="Instructions for the navigation...")
main_content_prompt = gr.Textbox(label="Main Content", placeholder="Instructions for the main content...")
generate_btn = gr.Button("Generate HTML", variant="primary")
with gr.Column(scale=2):
gr.Markdown("## Preview Area")
with gr.Tabs():
with gr.TabItem("Preview With Placeholder"):
with gr.Row():
scale_slider = gr.Slider(0.2, 1.5, value=0.55, step=0.05, label="Zoom")
width_slider = gr.Slider(400, 2000, value=1920, step=50, label="Canvas Width (px)")
height_slider = gr.Slider(300, 1200, value=1080, step=50, label="Canvas Height (px)")
html_preview = gr.HTML(label="Rendered HTML", show_label=False)
with gr.TabItem("Preview"):
with gr.Row():
scale_slider_with_placeholder = gr.Slider(0.2, 1.5, value=0.55, step=0.05, label="Zoom")
width_slider_with_placeholder = gr.Slider(400, 2000, value=1920, step=100, label="Canvas Width (px)")
height_slider_with_placeholder = gr.Slider(300, 1200, value=1080, step=50, label="Canvas Height (px)")
html_preview_with_placeholder = gr.HTML(label="Rendered HTML", show_label=False)
with gr.TabItem("Code"):
html_code_output = gr.Code(label="Generated HTML", language="html")
download_button = gr.Button("Download Package", visible=False, variant="secondary")
gr.Examples(
examples=example_rows,
inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
cache_examples=False,
label="Examples"
)
# State to hold the HTML content for each preview tab
layout_code_state = gr.State("")
final_code_state = gr.State("")
active_image_path_state = gr.State()
active_image.change(
lambda p: p if isinstance(p, str) else None,
inputs=active_image,
outputs=active_image_path_state,
show_progress=False
)
demo.load(
lambda: (examples_data[0][0], examples_data[0][0]), None, [active_image, active_image_path_state]
)
def handle_upload(uploaded_image_np):
# When a new image is uploaded, it's numpy. Clear the path state.
return uploaded_image_np, None, gr.update(visible=False)
upload_button.upload(handle_upload, upload_button, [active_image, active_image_path_state, download_button])
generate_btn.click(
process_and_generate,
[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
[html_preview, html_preview_with_placeholder, html_code_output, layout_code_state, final_code_state, download_button],
show_progress="full"
)
preview_controls = [scale_slider, width_slider, height_slider]
for control in preview_controls:
control.change(
render_preview,
[layout_code_state, width_slider, height_slider, scale_slider],
html_preview,
show_progress=True
)
preview_controls_with_placeholder = [scale_slider_with_placeholder, width_slider_with_placeholder, height_slider_with_placeholder]
for control in preview_controls_with_placeholder:
control.change(
render_preview,
[final_code_state, width_slider_with_placeholder, height_slider_with_placeholder, scale_slider_with_placeholder],
html_preview_with_placeholder,
show_progress=True
)
download_button.click(None, download_button, None, js= \
"(url) => { const link = document.createElement('a'); link.href = url; link.download = ''; document.body.appendChild(link); link.click(); document.body.removeChild(link); }")
base_dir = Path(__file__).parent.resolve()
allowed_paths = [
str(base_dir),
str(base_dir / 'screencoder' / 'data' / 'output'),
str(base_dir / 'screencoder' / 'data' / 'packages')
]
for example in examples_data:
example_abs_path = (base_dir / example[0]).resolve()
example_dir = example_abs_path.parent
if str(example_dir) not in allowed_paths:
allowed_paths.append(str(example_dir))
print("Allowed paths for file serving:")
for path in allowed_paths:
print(f" - {path}")
if __name__ == "__main__":
demo.launch(
allowed_paths=allowed_paths,
server_name="0.0.0.0",
server_port=7860,
share=False
)