Spaces:

Jimmyzheng-10
/

ScreenCoder

Build error

App Files Files Community

Jimmyzheng-10 commited on Jul 24

Commit

0246ff9

1 Parent(s): 1bcdbad

Update

Browse files

Files changed (10) hide show

app.py +53 -30
screencoder/UIED/detect_compo/ip_region_proposal.py +36 -0
screencoder/UIED/detect_compo/lib_ip/ip_detection.py +4 -4
screencoder/UIED/run_single.py +35 -5
screencoder/block_parsor.py +90 -103
screencoder/html_generator.py +105 -234
screencoder/image_box_detection.py +43 -26
screencoder/image_replacer.py +43 -26
screencoder/main.py +96 -105
screencoder/mapping.py +41 -23

app.py CHANGED Viewed

@@ -5,20 +5,35 @@ import cv2
 import numpy as np
 from screencoder.main import generate_html_for_demo
-# Default Demo Examples
-SAMPLE_IMAGES_DIR = "screencoder/data/input"
-examples_data = []
-if os.path.exists(SAMPLE_IMAGES_DIR):
-    sample_files = [f for f in sorted(os.listdir(SAMPLE_IMAGES_DIR)) if f.endswith(('.png', '.jpg', '.jpeg')) and not f.startswith('.')]
-    for filename in sample_files:
-        path = os.path.join(SAMPLE_IMAGES_DIR, filename)
-        prompt = f"Generate a modern UI based on the '{filename}' example, focusing on a clean and intuitive layout."
-        examples_data.append([path, prompt, path])
-else:
-    print(f"Warning: Sample images directory not found at {SAMPLE_IMAGES_DIR}. Examples will be empty.")
-def process_image_and_prompt(image_np, image_path_from_state, prompt):
     final_image_path = ""
     is_temp_file = False
@@ -35,8 +50,15 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
     else:
         return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
-    print(f"With prompt: '{prompt}'")
-    html_content = generate_html_for_demo(final_image_path, prompt)
     if is_temp_file:
         os.unlink(final_image_path)
@@ -44,7 +66,7 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
     return html_content, html_content
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
-    active_image_path_state = gr.State(value=examples_data[0][2] if examples_data else None)
     gr.Markdown("# ScreenCoder: Screenshot to Code")
@@ -60,14 +82,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
             upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
-            gr.Markdown("### Step 2: Write a Prompt (Optional)")
-            prompt_input = gr.Textbox(
-                label="Instructions",
-                placeholder="e.g., 'Make this a dark theme and change the text.'",
-                lines=3,
-                value=examples_data[0][1] if examples_data else "Based on the layout, please fill in appropriate English text and beautify the image blocks."
-            )
             generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
         with gr.Column(scale=2):
@@ -80,24 +101,26 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
     if examples_data:
         gr.Examples(
             examples=examples_data,
-            inputs=[active_image],
             label="Click an example to try it out",
         )
     def handle_upload(uploaded_image_np):
-        """On upload, update image, clear state, and set a generic prompt for user input."""
-        default_prompt = "Based on the layout, please fill in appropriate English text and beautify the image blocks."
-        return uploaded_image_np, None, default_prompt
     upload_button.upload(
         fn=handle_upload,
         inputs=upload_button,
-        outputs=[active_image, active_image_path_state, prompt_input]
     )
     generate_btn.click(
         fn=process_image_and_prompt,
-        inputs=[active_image, active_image_path_state, prompt_input],
         outputs=[html_preview, html_code_output],
         show_progress="full"
     )

 import numpy as np
 from screencoder.main import generate_html_for_demo
+# Manually defined examples
+examples_data = [
+    [
+        "screencoder/data/input/test1.png",
+        "",
+        "",
+        "",
+        "",
+        "screencoder/data/input/test1.png"
+    ],
+    [
+        "screencoder/data/input/test2.png",
+        "",
+        "",
+        "",
+        "",
+        "screencoder/data/input/test2.png"
+    ],
+    [
+        "screencoder/data/input/test3.png",
+        "",
+        "",
+        "",
+        "",
+        "screencoder/data/input/test3.png"
+    ],
+]
+def process_image_and_prompt(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
     final_image_path = ""
     is_temp_file = False
     else:
         return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
+    instructions = {
+        "sidebar": sidebar_prompt,
+        "header": header_prompt,
+        "navigation": navigation_prompt,
+        "main content": main_content_prompt
+    }
+    print(f"With instructions: {instructions}")
+    html_content = generate_html_for_demo(final_image_path, instructions)
     if is_temp_file:
         os.unlink(final_image_path)
     return html_content, html_content
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
+    active_image_path_state = gr.State(value=examples_data[0][5] if examples_data else None)
     gr.Markdown("# ScreenCoder: Screenshot to Code")
             upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
+            gr.Markdown("### Step 2: Write Prompts (Optional)")
+            with gr.Accordion("Component-specific Prompts", open=True):
+                sidebar_prompt = gr.Textbox(label="Sidebar Prompt", placeholder="Instructions for the sidebar...", value="")
+                header_prompt = gr.Textbox(label="Header Prompt", placeholder="Instructions for the header...", value="")
+                navigation_prompt = gr.Textbox(label="Navigation Prompt", placeholder="Instructions for the navigation...", value="")
+                main_content_prompt = gr.Textbox(label="Main Content Prompt", placeholder="Instructions for the main content...", value="")
             generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
         with gr.Column(scale=2):
     if examples_data:
         gr.Examples(
             examples=examples_data,
+            fn=lambda *args: args,  # Simply return all inputs
+            inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
+            outputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
             label="Click an example to try it out",
+            cache_examples=False,
         )
     def handle_upload(uploaded_image_np):
+        """On upload, update image, clear state, and set empty prompts."""
+        return uploaded_image_np, None, "", "", "", ""
     upload_button.upload(
         fn=handle_upload,
         inputs=upload_button,
+        outputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt]
     )
     generate_btn.click(
         fn=process_image_and_prompt,
+        inputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
         outputs=[html_preview, html_code_output],
         show_progress="full"
     )

screencoder/UIED/detect_compo/ip_region_proposal.py CHANGED Viewed

@@ -13,6 +13,38 @@ from config.CONFIG_UIED import Config
 C = Config()
 def nesting_inspection(org, grey, compos, ffl_block):
     '''
     Inspect all big compos through block division by flood-fill
@@ -87,6 +119,10 @@ def compo_detection(input_img_path, output_root, uied_params,
     # *** Step 7 *** save detection result
     Compo.compos_update(uicompos, org.shape)
     file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
     print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
     return uicompos

 C = Config()
+def resolve_uicompo_containment(uicompos):
+    """
+    Resolves containment issues among UI components.
+    If a component's bounding box is fully contained within another's, it is removed.
+    """
+    def contains(bbox_a, bbox_b):
+        """Checks if bbox_a completely contains bbox_b."""
+        return bbox_a.col_min <= bbox_b.col_min and \
+               bbox_a.row_min <= bbox_b.row_min and \
+               bbox_a.col_max >= bbox_b.col_max and \
+               bbox_a.row_max >= bbox_b.row_max
+    compos_to_remove = set()
+    for i, compo1 in enumerate(uicompos):
+        for j, compo2 in enumerate(uicompos):
+            if i == j:
+                continue
+            # Check if compo1 contains compo2
+            if contains(compo1.bbox, compo2.bbox):
+                compos_to_remove.add(j)
+    # Filter out the contained components
+    final_compos = [compo for i, compo in enumerate(uicompos) if i not in compos_to_remove]
+    if len(final_compos) < len(uicompos):
+        print(f"Containment resolved: Removed {len(uicompos) - len(final_compos)} contained components.")
+    return final_compos
 def nesting_inspection(org, grey, compos, ffl_block):
     '''
     Inspect all big compos through block division by flood-fill
     # *** Step 7 *** save detection result
     Compo.compos_update(uicompos, org.shape)
+    # *** Step 8 *** Resolve containment before saving
+    uicompos = resolve_uicompo_containment(uicompos)
     file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
     print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
     return uicompos

screencoder/UIED/detect_compo/lib_ip/ip_detection.py CHANGED Viewed

@@ -361,13 +361,13 @@ def is_block(clip, thread=0.15):
     # top border - scan top down
     blank_count = 0
     for i in range(1, 5):
-        if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
             blank_count += 1
     if blank_count > 2: return False
     # left border - scan left to right
     blank_count = 0
     for i in range(1, 5):
-        if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
             blank_count += 1
     if blank_count > 2: return False
@@ -375,13 +375,13 @@ def is_block(clip, thread=0.15):
     # bottom border - scan bottom up
     blank_count = 0
     for i in range(-1, -5, -1):
-        if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
             blank_count += 1
     if blank_count > 2: return False
     # right border - scan right to left
     blank_count = 0
     for i in range(-1, -5, -1):
-        if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
             blank_count += 1
     if blank_count > 2: return False
     return True

     # top border - scan top down
     blank_count = 0
     for i in range(1, 5):
+        if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
             blank_count += 1
     if blank_count > 2: return False
     # left border - scan left to right
     blank_count = 0
     for i in range(1, 5):
+        if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
             blank_count += 1
     if blank_count > 2: return False
     # bottom border - scan bottom up
     blank_count = 0
     for i in range(-1, -5, -1):
+        if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
             blank_count += 1
     if blank_count > 2: return False
     # right border - scan right to left
     blank_count = 0
     for i in range(-1, -5, -1):
+        if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
             blank_count += 1
     if blank_count > 2: return False
     return True

screencoder/UIED/run_single.py CHANGED Viewed

@@ -1,9 +1,14 @@
-from os.path import join as pjoin
 import cv2
 import os
 import numpy as np
 import multiprocessing
 def resize_height_by_longest_edge(img_path, resize_length=800):
     org = cv2.imread(img_path)
@@ -30,6 +35,29 @@ def color_tips():
 if __name__ == '__main__':
     # Set multiprocessing start method to 'spawn' for macOS compatibility.
     # This must be done at the very beginning of the main block.
     try:
@@ -62,11 +90,11 @@ if __name__ == '__main__':
                   'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
     # set input image path
-    input_path_img = 'data/test1.png'
-    output_root = 'data'
     resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
-    color_tips()
     is_ip = True
     is_clf = False
@@ -99,4 +127,6 @@ if __name__ == '__main__':
         compo_path = pjoin(output_root, 'ip', str(name) + '.json')
         ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
         merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
-                    is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=True)

 import cv2
 import os
 import numpy as np
 import multiprocessing
+import argparse
+from os.path import join as pjoin
+def get_args():
+    parser = argparse.ArgumentParser(description="Processes a single image for UI element detection.")
+    parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
+    return parser.parse_args()
 def resize_height_by_longest_edge(img_path, resize_length=800):
     org = cv2.imread(img_path)
 if __name__ == '__main__':
+    args = get_args()
+    # --- Dynamic Path Construction ---
+    # Construct paths based on the provided run_id
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    run_id = args.run_id
+    # The temporary directory for this specific run
+    tmp_dir = os.path.join(base_dir, '..', 'data', 'tmp', run_id)
+    # Input image path
+    input_path_img = os.path.join(tmp_dir, f"{run_id}.png")
+    # Output directory for this script's results
+    output_root = tmp_dir # All results (ip, ocr, etc.) will go into the run's tmp subdir.
+    if not os.path.exists(input_path_img):
+        print(f"Error: Input image not found at {input_path_img}")
+        exit(1)
+    print(f"--- Starting UIED processing for run_id: {run_id} ---")
+    print(f"Input image: {input_path_img}")
+    print(f"Output root: {output_root}")
     # Set multiprocessing start method to 'spawn' for macOS compatibility.
     # This must be done at the very beginning of the main block.
     try:
                   'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
     # set input image path
+    # input_path_img = 'data/test1.png'
+    # output_root = 'data'
     resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
+    # color_tips() # This shows a window, which is not suitable for a script.
     is_ip = True
     is_clf = False
         compo_path = pjoin(output_root, 'ip', str(name) + '.json')
         ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
         merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
+                    is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=False)
+    print(f"--- UIED processing complete for run_id: {run_id} ---")

screencoder/block_parsor.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import cv2
 import json
 from utils import Doubao, encode_image, image_mask
 DEFAULT_IMAGE_PATH = "data/input/test1.png"
@@ -20,47 +21,34 @@ BBOX_TAG_END = "</bbox>"
 # PROMPT_navigation = "框出网页中的navigation的位置，请你只返回对应的bounding box。"
 # PROMPT_main_content = "框出网页中的main content的位置，请你只返回对应的bounding box。"
-# simple version of bbox parsing
-def parse_bboxes(bbox_input: str, image_path: str) -> dict[str, tuple[int, int, int, int]]:
-    """Parse bounding box string to dictionary of named coordinate tuples"""
-    bboxes = {}
-    # print("Raw bbox input:", bbox_input) # Debug print
-    image = cv2.imread(image_path)
-    if image is None:
-        print(f"Error: Failed to read image {image_path}")
-        return bboxes
-    h, w = image.shape[:2]
     try:
         components = bbox_input.strip().split('\n')
-        # print("Split components:", components)  # Debug print
         for component in components:
             component = component.strip()
             if not component:
                 continue
             if ':' in component:
                 name, bbox_str = component.split(':', 1)
             else:
                 bbox_str = component
-                if 'sidebar' in component.lower():
-                    name = 'sidebar'
-                elif 'header' in component.lower():
-                    name = 'header'
-                elif 'navigation' in component.lower():
-                    name = 'navigation'
-                elif 'main content' in component.lower():
-                    name = 'main content'
-                else:
-                    name = 'unknown'
             name = name.strip().lower()
             bbox_str = bbox_str.strip()
-            # print(f"Processing component: {name}, bbox_str: {bbox_str}")  # Debug print
             if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
                 start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
                 end_idx = bbox_str.find(BBOX_TAG_END)
@@ -69,82 +57,44 @@ def parse_bboxes(bbox_input: str, image_path: str) -> dict[str, tuple[int, int,
                 try:
                     norm_coords = list(map(int, coords_str.split()))
                     if len(norm_coords) == 4:
-                        x_min = int(norm_coords[0])
-                        y_min = int(norm_coords[1])
-                        x_max = int(norm_coords[2])
-                        y_max = int(norm_coords[3])
-                        bboxes[name] = (x_min, y_min, x_max, y_max)
                         print(f"Successfully parsed {name}: {bboxes[name]}")
-                    else:
-                        print(f"Invalid number of coordinates for {name}: {norm_coords}")
                 except ValueError as e:
                     print(f"Failed to parse coordinates for {name}: {e}")
-            else:
-                print(f"No bbox tags found in: {bbox_str}")
     except Exception as e:
         print(f"Coordinate parsing failed: {str(e)}")
-        import traceback
-        traceback.print_exc()
     print("Final parsed bboxes:", bboxes)
     return bboxes
-def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]]) -> str:
-    """Draw bounding boxes on image and save with different colors for each component"""
     image = cv2.imread(image_path)
-    if image is None:
-        print(f"Error: Failed to read image {image_path}")
-        return ""
     h, w = image.shape[:2]
-    colors = {
-        'sidebar': (0, 0, 255),  # Red
-        'header': (0, 255, 0),  # Green
-        'navigation': (255, 0, 0),  # Blue
-        'main content': (255, 255, 0),  # Cyan
-        'unknown': (0, 0, 0),  # Black
-    }
     for component, norm_bbox in bboxes.items():
-        # Convert normalized coordinates to pixel coordinates for drawing
         x_min = int(norm_bbox[0] * w / 1000)
         y_min = int(norm_bbox[1] * h / 1000)
         x_max = int(norm_bbox[2] * w / 1000)
         y_max = int(norm_bbox[3] * h / 1000)
         color = colors.get(component.lower(), (0, 0, 255))
-        cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 3)
-        # Add label
-        cv2.putText(image, component, (x_min, y_min - 10),
-                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
-    # Output directory
-    output_dir = "data/tmp"
-    os.makedirs(output_dir, exist_ok=True)
-    # Get the original filename without path
-    original_filename = os.path.basename(image_path)
-    output_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_with_bboxes.png")
-    if cv2.imwrite(output_path, image):
         print(f"Successfully saved annotated image: {output_path}")
         return output_path
-    print("Error: Failed to save image")
     return ""
-def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], image_path: str) -> str:
-    """Save bounding boxes information to a JSON file"""
-    # Output directory
-    output_dir = "data/tmp"
-    os.makedirs(output_dir, exist_ok=True)
-    original_filename = os.path.basename(image_path)
-    json_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_bboxes.json")
     bboxes_dict = {k: list(v) for k, v in bboxes.items()}
     try:
         with open(json_path, 'w', encoding='utf-8') as f:
             json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
@@ -154,8 +104,38 @@ def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], image_path
         print(f"Error saving JSON file: {str(e)}")
         return ""
 # sequential version of bbox parsing: Using recursive detection with mask
-def sequential_component_detection(image_path: str, api_path: str) -> dict[str, tuple[int, int, int, int]]:
     """
     Sequential processing flow: detect each component in turn, mask the image after each detection
     """
@@ -189,7 +169,7 @@ def sequential_component_detection(image_path: str, api_path: str) -> dict[str,
             masked_image = image_mask(current_image_path, norm_bbox)
-            temp_image_path = f"data/temp_{component_name}_masked.png"
             masked_image.save(temp_image_path)
             current_image_path = temp_image_path
@@ -238,34 +218,41 @@ def main_content_processing(bboxes: dict[str, tuple[int, int, int, int]], image_
             int(bbox[3] * h / 1000))
-if __name__ == "__main__":
-    image_path = DEFAULT_IMAGE_PATH
-    api_path = DEFAULT_API_PATH
-    print("=== Starting Simple Component Detection ===")
-    print(f"Input image: {image_path}")
-    print(f"API path: {api_path}")
     client = Doubao(api_path)
     bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
-    print(f"Model response: {bbox_content}\n")
-    bboxes = parse_bboxes(bbox_content, image_path)
-    # print("=== Starting Sequential Component Detection ===")
-    # print(f"Input image: {image_path}")
-    # print(f"API path: {api_path}")
-    # bboxes = sequential_component_detection(image_path, api_path)
     if bboxes:
-        print(f"\n=== Detection Complete ===")
-        print(f"Found bounding boxes for components: {list(bboxes.keys())}")
-        print(f"Total components detected: {len(bboxes)}")
-        json_path = save_bboxes_to_json(bboxes, image_path)
-        draw_bboxes(image_path, bboxes)
-        print(f"\n=== Results ===")
-        for component, bbox in bboxes.items():
-            print(f"{component}: {bbox}")
     else:
-        print("\nNo valid bounding box coordinates found")
-        exit(1)

 import os
 import cv2
 import json
+import argparse
 from utils import Doubao, encode_image, image_mask
 DEFAULT_IMAGE_PATH = "data/input/test1.png"
 # PROMPT_navigation = "框出网页中的navigation的位置，请你只返回对应的bounding box。"
 # PROMPT_main_content = "框出网页中的main content的位置，请你只返回对应的bounding box。"
+def get_args():
+    parser = argparse.ArgumentParser(description="Parses bounding boxes from an image using a vision model.")
+    parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
+    return parser.parse_args()
+def parse_bboxes(bbox_input: str) -> dict[str, tuple[int, int, int, int]]:
+    """Parse bounding box string to a dictionary of normalized (0-1000) coordinate tuples."""
+    bboxes = {}
     try:
         components = bbox_input.strip().split('\n')
         for component in components:
             component = component.strip()
             if not component:
                 continue
             if ':' in component:
                 name, bbox_str = component.split(':', 1)
             else:
                 bbox_str = component
+                if 'sidebar' in component.lower(): name = 'sidebar'
+                elif 'header' in component.lower(): name = 'header'
+                elif 'navigation' in component.lower(): name = 'navigation'
+                elif 'main content' in component.lower(): name = 'main content'
+                else: name = 'unknown'
             name = name.strip().lower()
             bbox_str = bbox_str.strip()
             if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
                 start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
                 end_idx = bbox_str.find(BBOX_TAG_END)
                 try:
                     norm_coords = list(map(int, coords_str.split()))
                     if len(norm_coords) == 4:
+                        bboxes[name] = tuple(norm_coords) # Directly store normalized coordinates
                         print(f"Successfully parsed {name}: {bboxes[name]}")
                 except ValueError as e:
                     print(f"Failed to parse coordinates for {name}: {e}")
     except Exception as e:
         print(f"Coordinate parsing failed: {str(e)}")
     print("Final parsed bboxes:", bboxes)
     return bboxes
+def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]], output_path: str) -> str:
+    """Draws normalized (0-1000) bboxes on an image for visualization."""
     image = cv2.imread(image_path)
+    if image is None: return ""
     h, w = image.shape[:2]
+    colors = {'sidebar': (0, 0, 255), 'header': (0, 255, 0), 'navigation': (255, 0, 0), 'main content': (255, 255, 0), 'unknown': (0, 0, 0)}
+    output_image = image.copy()
     for component, norm_bbox in bboxes.items():
         x_min = int(norm_bbox[0] * w / 1000)
         y_min = int(norm_bbox[1] * h / 1000)
         x_max = int(norm_bbox[2] * w / 1000)
         y_max = int(norm_bbox[3] * h / 1000)
         color = colors.get(component.lower(), (0, 0, 255))
+        cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), color, 3)
+        cv2.putText(output_image, component, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
+    if cv2.imwrite(output_path, output_image):
         print(f"Successfully saved annotated image: {output_path}")
         return output_path
     return ""
+def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], json_path: str) -> str:
+    """Saves the normalized bboxes to a JSON file."""
+    # This is the unified format: a dictionary of lists.
     bboxes_dict = {k: list(v) for k, v in bboxes.items()}
     try:
         with open(json_path, 'w', encoding='utf-8') as f:
             json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
         print(f"Error saving JSON file: {str(e)}")
         return ""
+def resolve_containment(bboxes: dict[str, tuple[int, int, int, int]]) -> dict[str, tuple[int, int, int, int]]:
+    """
+    Resolves containment issues among bounding boxes.
+    If a box is found to be fully contained within another, it is removed.
+    This is based on the assumption that major layout components should not contain each other.
+    """
+    def contains(box_a, box_b):
+        """Checks if box_a completely contains box_b."""
+        xa1, ya1, xa2, ya2 = box_a
+        xb1, yb1, xb2, yb2 = box_b
+        return xa1 <= xb1 and ya1 <= yb1 and xa2 >= xb2 and ya2 >= yb2
+    names = list(bboxes.keys())
+    removed = set()
+    for i in range(len(names)):
+        for j in range(len(names)):
+            if i == j or names[i] in removed or names[j] in removed:
+                continue
+            name1, box1 = names[i], bboxes[names[i]]
+            name2, box2 = names[j], bboxes[names[j]]
+            if contains(box1, box2) or contains(box2, box1):
+                print(f"Containment found: '{name1}' contains '{name2}'. Removing '{name2}'.")
+                removed.add(name2)
+    return {name: bbox for name, bbox in bboxes.items() if name not in removed}
 # sequential version of bbox parsing: Using recursive detection with mask
+def sequential_component_detection(image_path: str, api_path: str, temp_dir: str) -> dict[str, tuple[int, int, int, int]]:
     """
     Sequential processing flow: detect each component in turn, mask the image after each detection
     """
             masked_image = image_mask(current_image_path, norm_bbox)
+            temp_image_path = os.path.join(temp_dir, f"temp_{component_name}_masked.png")
             masked_image.save(temp_image_path)
             current_image_path = temp_image_path
             int(bbox[3] * h / 1000))
+def main():
+    args = get_args()
+    run_id = args.run_id
+    # --- Dynamic Path Construction ---
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
+    image_path = os.path.join(tmp_dir, f"{run_id}.png")
+    api_path = os.path.join(base_dir, "doubao_api.txt")
+    json_output_path = os.path.join(tmp_dir, f"{run_id}_bboxes.json")
+    annotated_image_output_path = os.path.join(tmp_dir, f"{run_id}_with_bboxes.png")
+    if not os.path.exists(image_path) or not os.path.exists(api_path):
+        print(f"Error: Input image or API key file not found.")
+        exit(1)
+    print(f"--- Starting BBox Parsing for run_id: {run_id} ---")
     client = Doubao(api_path)
     bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
+    bboxes = parse_bboxes(bbox_content)
     if bboxes:
+        print("\n--- Resolving containment issues ---")
+        bboxes = resolve_containment(bboxes)
+        print("--- Containment resolved ---")
+        print(f"\n--- Detection Complete for run_id: {run_id} ---")
+        save_bboxes_to_json(bboxes, json_output_path)
+        draw_bboxes(image_path, bboxes, annotated_image_output_path)
     else:
+        print(f"\nNo valid bounding box coordinates found for run_id: {run_id}")
+        # Still create an empty json file so the pipeline doesn't break
+        save_bboxes_to_json({}, json_output_path)
+if __name__ == "__main__":
+    main()

screencoder/html_generator.py CHANGED Viewed

@@ -3,109 +3,65 @@ from PIL import Image
 import bs4
 from threading import Thread
 import time
-# This dictionary can now be dynamically updated by an external script.
-user_instruction = {
-    "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
-    "header": "Make the Google logo look better; change the avatar color to be more appealing.",
-    "navigation": "Please beautify the layout.",
-    "main content": "Based on the layout, please fill in appropriate English text and beautify the image blocks."
-}
-# Prompt for each component
-PROMPT_DICT = {
-    "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
-    "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
-    "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
-    "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
-}
-# PROMPT_sidebar = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
-#     <div>
-#     your code here
-#     </div>
-#     只需返回<div>和</div>标签内的代码"""
-# PROMPT_header = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
-#     <div>
-#     your code here
-#     </div>
-#     只需返回<div>和</div>标签内的代码"""
-# PROMPT_navigation = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码：
-#     <div>
-#     your code here
-#     </div>
-#     只需返回<div>和</div>标签内的代码"""
-# PROMPT_main_content = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必全部用与原始截图中对应图像同样大小的纯灰色图像块替换，不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
-#     <div>
-#     your code here
-#     </div>
-#     只需返回<div>和</div>标签内的代码"""
-# Generate code for each component
-def generate_code(bbox_tree, img_path, bot):
-    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
     img = Image.open(img_path)
     code_dict = {}
     def _generate_code(node):
-        if node["children"] == []:
             bbox = node["bbox"]
-            # bbox is already in pixel coordinates [x1, y1, x2, y2]
             cropped_img = img.crop(bbox)
-            # Select prompt based on node type
-            if "type" in node:
-                if node["type"] == "sidebar":
-                    prompt = PROMPT_DICT["sidebar"]
-                elif node["type"] == "header":
-                    prompt = PROMPT_DICT["header"]
-                elif node["type"] == "navigation":
-                    prompt = PROMPT_DICT["navigation"]
-                elif node["type"] == "main content":
-                    prompt = PROMPT_DICT["main content"]
-                else:
-                    print(f"Unknown component type: {node['type']}")
-                    return
             else:
-                print("Node type not found")
-                return
-            try:
-                code = bot.ask(prompt, encode_image(cropped_img))
-                code_dict[node["id"]] = code
-            except Exception as e:
-                print(f"Error generating code for {node.get('type', 'unknown')}: {str(e)}")
-                code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
         else:
             for child in node["children"]:
                 _generate_code(child)
@@ -113,66 +69,8 @@ def generate_code(bbox_tree, img_path, bot):
     _generate_code(bbox_tree)
     return code_dict
-# Generate code for each component in parallel
-# def generate_code_parallel(bbox_tree, img_path, prompt, bot):
-    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
-    code_dict = {}
-    t_list = []
-    def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
-        """Generate code with retry mechanism for rate limit errors"""
-        try:
-            # Create a new image instance for each thread
-            with Image.open(img_path) as img:
-                bbox = node["bbox"]
-                cropped_img = img.crop(bbox)
-                for attempt in range(max_retries):
-                    try:
-                        code = bot.ask(prompt, encode_image(cropped_img))
-                        code_dict[node["id"]] = code
-                        return
-                    except Exception as e:
-                        if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
-                            print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
-                            time.sleep(retry_delay)
-                            retry_delay *= 2  # Exponential backoff
-                        else:
-                            print(f"Error generating code for node {node['id']}: {str(e)}")
-                            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
-                            return
-        except Exception as e:
-            print(f"Error processing image for node {node['id']}: {str(e)}")
-            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
-    def _generate_code(node):
-        if node["children"] == []:
-            t = Thread(target=lambda: _generate_code_with_retry(node))
-            t.start()
-            t_list.append(t)
-        else:
-            for child in node["children"]:
-                _generate_code(child)
-    _generate_code(bbox_tree)
-    # Wait for all threads to complete
-    for t in t_list:
-        t.join()
-    return code_dict
-# Generate HTML from the bounding box tree
-def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png"):
-    """
-    Generates an HTML file with nested containers based on the bounding box tree.
-    :param bbox_tree: Dictionary representing the bounding box tree.
-    :param output_file: The name of the output HTML file.
-    """
-    # HTML and CSS templates
-    # the container class is used to create grid and position the boxes
-    # include the tailwind css in the head tag
     html_template_start = """
     <!DOCTYPE html>
     <html lang="en">
@@ -215,137 +113,110 @@ def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png
     </html>
     """
-    # Function to recursively generate HTML
-    def process_bbox(node, parent_width, parent_height, parent_left, parent_top, img):
         bbox = node['bbox']
         children = node.get('children', [])
-        id = node['id']
-        # Calculate relative positions and sizes
         left = (bbox[0] - parent_left) / parent_width * 100
         top = (bbox[1] - parent_top) / parent_height * 100
         width = (bbox[2] - bbox[0]) / parent_width * 100
         height = (bbox[3] - bbox[1]) / parent_height * 100
-        # Start the box div
-        html = f'''
-            <div id="{id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">
-        '''
         if children:
-            # If there are children, add a nested container
-            html += '''
-                <div class="container">
-            '''
-            # Get the current box's width and height in pixels for child calculations
             current_width = bbox[2] - bbox[0]
             current_height = bbox[3] - bbox[1]
             for child in children:
-                html += process_bbox(child, current_width, current_height, bbox[0], bbox[1], img)
-            html += '''
-                </div>
-            '''
-        # Close the box div
-        html += '''
-            </div>
-        '''
         return html
     root_bbox = bbox_tree['bbox']
     root_children = bbox_tree.get('children', [])
-    root_width = root_bbox[2]
-    root_height = root_bbox[3]
-    root_x = root_bbox[0]
-    root_y = root_bbox[1]
     html_content = html_template_start
     for child in root_children:
-        html_content += process_bbox(child, root_width, root_height, root_x, root_y, img)
     html_content += html_template_end
-    soup = bs4.BeautifulSoup(html_content, 'html.parser')
-    html_content = soup.prettify()
     with open(output_file, 'w') as f:
-        f.write(html_content)
-# Substitute the code in the html file
 def code_substitution(html_file, code_dict):
-    """substitute the code in the html file"""
     with open(html_file, "r") as f:
-        html = f.read()
-    soup = bs4.BeautifulSoup(html, 'html.parser')
-    for id, code in code_dict.items():
-        code = code.replace("```html", "").replace("```", "")
-        div = soup.find(id=id)
-        # replace the inner html of the div
         if div:
-            div.append(bs4.BeautifulSoup(code, 'html.parser'))
     with open(html_file, "w") as f:
         f.write(soup.prettify())
-# Main
-if __name__ == "__main__":
-    import json
-    import time
-    from PIL import Image
-    # Load bboxes from block_parsing.py output
-    boxes_data = json.load(open("data/tmp/test1_bboxes.json"))
-    img_path = "data/input/test1.png"
     with Image.open(img_path) as img:
         width, height = img.size
-    # Create root node with actual image dimensions
-    root = {
-        "bbox": [0, 0, width, height],  # Use actual image dimensions
-        "children": []
-    }
-    # Map region IDs to component types
-    region_type_mapping = {
-        "1": "sidebar",
-        "2": "header",
-        "3": "navigation",
-        "4": "main content"
-    }
-    # Add each region as a child with its type
-    for region in boxes_data.get("regions", []):
-        # Convert normalized coordinates to pixel coordinates
-        x = region["x"] * width
-        y = region["y"] * height
-        w = region["w"] * width
-        h = region["h"] * height
-        child = {
-            "bbox": [x, y, x + w, y + h],  # Convert to [x1, y1, x2, y2] format
-            "children": [],
-            "type": region_type_mapping.get(region["id"], "unknown")
-        }
-        root["children"].append(child)
-    # Assign IDs to all nodes
-    def assign_id(node, id):
-        node["id"] = id
-        for child in node.get("children", []):
-            id = assign_id(child, id+1)
-        return id
-    assign_id(root, 0)
-    # print(root)
-    # Generate initial HTML layout
-    generate_html(root, 'data/output/test1_layout.html')
-    # Initialize the bot
-    bot = Doubao("doubao_api.txt", model = "doubao-1.5-thinking-vision-pro-250428")
-    # bot = Qwen_2_5_VL("qwen_api.txt", model="qwen2.5-vl-72b-instruct")
-    # Generate code for each component
-    code_dict = generate_code(root, img_path, bot)
-    # Substitute the generated code into the HTML
-    code_substitution('data/output/test1_layout.html', code_dict)

 import bs4
 from threading import Thread
 import time
+import argparse
+import json
+import os
+# This dictionary holds the user's instructions for the current run.
+user_instruction = {"sidebar": "", "header": "", "navigation": "", "main content": ""}
+def get_args():
+    parser = argparse.ArgumentParser(description="Generates an HTML layout from bounding box data.")
+    parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
+    parser.add_argument('--instructions', type=str, help='A JSON string of instructions for different components.')
+    return parser.parse_args()
+def get_prompt_dict(instructions):
+    """Dynamically creates the prompt dictionary with the user's instructions."""
+    return {
+        "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
+        "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
+        "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
+        "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
     <div>
     your code here
     </div>,
     only return the code within the <div> and </div> tags""",
+    }
+def generate_code(bbox_tree, img_path, bot, instructions):
+    """Generates code for each leaf node in the bounding box tree."""
     img = Image.open(img_path)
     code_dict = {}
+    prompt_dict = get_prompt_dict(instructions)
     def _generate_code(node):
+        if not node.get("children"): # It's a leaf node
             bbox = node["bbox"]
             cropped_img = img.crop(bbox)
+            node_type = node.get("type")
+            if node_type and node_type in prompt_dict:
+                prompt = prompt_dict[node_type]
+                try:
+                    code = bot.ask(prompt, encode_image(cropped_img))
+                    code_dict[node["id"]] = code
+                except Exception as e:
+                    print(f"Error generating code for {node_type}: {e}")
             else:
+                print(f"Node type '{node_type}' not found or invalid.")
         else:
             for child in node["children"]:
                 _generate_code(child)
     _generate_code(bbox_tree)
     return code_dict
+def generate_html(bbox_tree, output_file):
+    """Generates an HTML file with nested containers based on the bounding box tree."""
     html_template_start = """
     <!DOCTYPE html>
     <html lang="en">
     </html>
     """
+    def process_bbox(node, parent_width, parent_height, parent_left, parent_top):
         bbox = node['bbox']
         children = node.get('children', [])
+        node_id = node['id']
         left = (bbox[0] - parent_left) / parent_width * 100
         top = (bbox[1] - parent_top) / parent_height * 100
         width = (bbox[2] - bbox[0]) / parent_width * 100
         height = (bbox[3] - bbox[1]) / parent_height * 100
+        html = f'<div id="{node_id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">'
         if children:
+            html += '<div class="container">'
             current_width = bbox[2] - bbox[0]
             current_height = bbox[3] - bbox[1]
             for child in children:
+                html += process_bbox(child, current_width, current_height, bbox[0], bbox[1])
+            html += '</div>'
+        html += '</div>'
         return html
     root_bbox = bbox_tree['bbox']
     root_children = bbox_tree.get('children', [])
+    root_width = root_bbox[2] - root_bbox[0]
+    root_height = root_bbox[3] - root_bbox[1]
     html_content = html_template_start
     for child in root_children:
+        html_content += process_bbox(child, root_width, root_height, root_bbox[0], root_bbox[1])
     html_content += html_template_end
     with open(output_file, 'w') as f:
+        f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
 def code_substitution(html_file, code_dict):
+    """Substitutes the generated code into the HTML file."""
     with open(html_file, "r") as f:
+        soup = bs4.BeautifulSoup(f.read(), 'html.parser')
+    for node_id, code in code_dict.items():
+        div = soup.find(id=node_id)
         if div:
+            div.append(bs4.BeautifulSoup(code.replace("```html", "").replace("```", ""), 'html.parser'))
     with open(html_file, "w") as f:
         f.write(soup.prettify())
+def main():
+    args = get_args()
+    if args.instructions:
+        try:
+            user_instruction.update(json.loads(args.instructions))
+        except json.JSONDecodeError:
+            print("Error: Could not decode instructions JSON.")
+    # --- Dynamic Path Construction ---
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    tmp_dir = os.path.join(base_dir, 'data', 'tmp', args.run_id)
+    output_dir = os.path.join(base_dir, 'data', 'output', args.run_id)
+    os.makedirs(output_dir, exist_ok=True)
+    input_json_path = os.path.join(tmp_dir, f"{args.run_id}_bboxes.json")
+    img_path = os.path.join(tmp_dir, f"{args.run_id}.png")
+    output_html_path = os.path.join(output_dir, f"{args.run_id}_layout.html")
+    if not os.path.exists(input_json_path) or not os.path.exists(img_path):
+        print("Error: Input bbox JSON or image file not found.")
+        exit(1)
+    print(f"--- Starting HTML Generation for run_id: {args.run_id} ---")
+    with open(input_json_path, 'r') as f:
+        boxes_data = json.load(f)
     with Image.open(img_path) as img:
         width, height = img.size
+    root = {"bbox": [0, 0, width, height], "children": [], "id": 0}
+    # Convert normalized bboxes to pixel coordinates
+    for name, norm_bbox in boxes_data.items():
+        x1 = int(norm_bbox[0] * width / 1000)
+        y1 = int(norm_bbox[1] * height / 1000)
+        x2 = int(norm_bbox[2] * width / 1000)
+        y2 = int(norm_bbox[3] * height / 1000)
+        root["children"].append({"bbox": [x1, y1, x2, y2], "type": name, "children": []})
+    # Assign unique IDs to all nodes for code substitution
+    next_id = 1
+    for child in root["children"]:
+        child["id"] = next_id
+        next_id += 1
+    generate_html(root, output_html_path)
+    api_path = os.path.join(base_dir, "doubao_api.txt")
+    if not os.path.exists(api_path):
+        print(f"Error: API key not found at {api_path}")
+        exit(1)
+    bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
+    code_dict = generate_code(root, img_path, bot, user_instruction)
+    code_substitution(output_html_path, code_dict)
+    print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
+    print(f"--- HTML Generation Complete for run_id: {args.run_id} ---")
+if __name__ == "__main__":
+    main()

screencoder/image_box_detection.py CHANGED Viewed

@@ -115,11 +115,33 @@ def draw_bboxes_on_image(img, region_bboxes, placeholder_bboxes):
     return boxed
-def main(args):
     # Read original screenshot
-    img = cv2.imread(str(args.screenshot))
     if img is None:
-        sys.exit(f"Error: Cannot read image {args.screenshot}")
     if img.std() < 5:
         print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
@@ -127,10 +149,11 @@ def main(args):
     # Parse HTML → Get bboxes
     region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
-        extract_bboxes_from_html(args.html)
     )
     if not placeholder_bboxes:
-        sys.exit("Error: No gray placeholder blocks found!")
     # Calculate separate scale factors for X and Y to handle aspect ratio differences
     scale_x = W / layout_width if layout_width > 0 else 1
@@ -163,10 +186,9 @@ def main(args):
     overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
     # Save debug image
-    out_png = args.out / "debug_gray_bboxes_test1.png"
-    out_png.parent.mkdir(parents=True, exist_ok=True)
-    cv2.imwrite(str(out_png), overlay)
-    print(f"Success: BBox overlay saved to {out_png}")
     # Convert absolute pixel coordinates to proportions for the final JSON output
@@ -195,24 +217,19 @@ def main(args):
     output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
     print(output_json)
-    if args.json:
-        args.json.parent.mkdir(parents=True, exist_ok=True)
-        args.json.write_text(output_json)
-        print(f"Success: BBox list saved to {args.json}")
 # ---------- CLI ----------
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Draw BBoxes parsed from HTML on the original screenshot"
-    )
-    parser.add_argument("--html", required=False, type=Path, default=Path("data/output/test1_layout.html"),
-                        help="Generated HTML file (with gray placeholder)")
-    parser.add_argument("--screenshot", required=False, type=Path, default=Path("data/input/test1.png"),
-                        help="Original UI screenshot (with real thumbnails)")
-    parser.add_argument("--out", default=Path("data/tmp"), type=Path,
-                        help="Output directory (save debug_gray_bboxes_test1.png)")
-    parser.add_argument("--json", type=Path, default=Path("data/tmp/test1_bboxes.json"),
-                        help="If provided, write BBox list to JSON file")
-    args = parser.parse_args()
-    main(args)

     return boxed
+def main():
+    args = get_args()
+    run_id = args.run_id
+    # --- Dynamic Path Construction ---
+    base_dir = Path(__file__).parent.resolve()
+    # Go up one level to the project root to find the data directory
+    project_root = base_dir.parent
+    tmp_dir = project_root / 'screencoder' / 'data' / 'tmp' / run_id
+    output_dir = project_root / 'screencoder' / 'data' / 'output' / run_id
+    html_path = output_dir / f"{run_id}_layout.html"
+    screenshot_path = tmp_dir / f"{run_id}.png"
+    output_json_path = tmp_dir / f"{run_id}_bboxes.json"
+    debug_image_path = tmp_dir / f"debug_gray_bboxes_{run_id}.png"
+    if not html_path.exists():
+        sys.exit(f"Error: HTML file not found at {html_path}")
+    if not screenshot_path.exists():
+        sys.exit(f"Error: Screenshot not found at {screenshot_path}")
+    print(f"--- Starting Image Box Detection for run_id: {run_id} ---")
     # Read original screenshot
+    img = cv2.imread(str(screenshot_path))
     if img is None:
+        sys.exit(f"Error: Cannot read image {screenshot_path}")
     if img.std() < 5:
         print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
     # Parse HTML → Get bboxes
     region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
+        extract_bboxes_from_html(html_path)
     )
     if not placeholder_bboxes:
+        # This is not necessarily an error; some UIs might not have placeholders.
+        print("Info: No gray placeholder blocks found.")
     # Calculate separate scale factors for X and Y to handle aspect ratio differences
     scale_x = W / layout_width if layout_width > 0 else 1
     overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
     # Save debug image
+    debug_image_path.parent.mkdir(parents=True, exist_ok=True)
+    cv2.imwrite(str(debug_image_path), overlay)
+    print(f"Success: BBox overlay saved to {debug_image_path}")
     # Convert absolute pixel coordinates to proportions for the final JSON output
     output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
     print(output_json)
+    output_json_path.parent.mkdir(parents=True, exist_ok=True)
+    output_json_path.write_text(output_json)
+    print(f"Success: BBox list saved to {output_json_path}")
+    print(f"--- Image Box Detection Complete for run_id: {run_id} ---")
+def get_args():
+    parser = argparse.ArgumentParser(
+        description="Extracts placeholder bounding boxes from an HTML file and maps them to a screenshot."
+    )
+    parser.add_argument('--run_id', required=True, type=str,
+                        help="A unique identifier for the processing run.")
+    return parser.parse_args()
 # ---------- CLI ----------
 if __name__ == "__main__":
+    main()

screencoder/image_replacer.py CHANGED Viewed

@@ -4,17 +4,45 @@ from pathlib import Path
 from bs4 import BeautifulSoup
 import cv2
 import re
-def main(args):
     # --- Phase 1: Crop and Save All Images First ---
     # 1. Load data
-    mapping_data = json.loads(args.mapping.read_text())
-    uied_data = json.loads(args.uied.read_text())
-    original_image = cv2.imread(str(args.original_image))
     if original_image is None:
-        raise ValueError(f"Could not load the original image from {args.original_image}")
     # Get image shapes to calculate a simple, global scaling factor
     H_proc, W_proc, _ = uied_data['img_shape']
@@ -29,7 +57,7 @@ def main(args):
     }
     # 2. Create a directory for cropped images
-    crop_dir = args.output_html.parent / "cropped_images"
     crop_dir.mkdir(exist_ok=True)
     print(f"Saving cropped images to: {crop_dir.resolve()}")
@@ -67,7 +95,7 @@ def main(args):
     # --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
     print("\nStarting offline HTML processing with BeautifulSoup...")
-    html_content = args.gray_html.read_text()
     soup = BeautifulSoup(html_content, 'html.parser')
     # 1. Find all placeholder elements by their class, in document order.
@@ -115,27 +143,16 @@ def main(args):
         ph_element.replace_with(img_tag)
     # Save the modified HTML
-    args.output_html.write_text(str(soup))
     print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
-    print(f"Final HTML generated at {args.output_html.resolve()}")
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
-    parser.add_argument("--mapping", type=Path, required=False, help="Path to the mapping JSON file from mapping.py.")
-    parser.add_argument("--uied", type=Path, required=False, help="Path to the UIED JSON file.")
-    parser.add_argument("--original-image", type=Path, required=False, help="Path to the original screenshot image.")
-    parser.add_argument("--gray-html", type=Path, required=False, help="Path to the input HTML file with gray placeholders.")
-    parser.add_argument("--output-html", type=Path, required=False, help="Path to save the final, modified HTML file.")
-    parser.set_defaults(
-        mapping=Path('data/tmp/mapping_full_test1.json'),
-        uied=Path('data/tmp/ip/test1.json'),
-        original_image=Path('data/input/test1.png'),
-        gray_html=Path('data/output/test1_layout.html'),
-        output_html=Path('data/output/test1_layout_final.html')
-    )
-    args = parser.parse_args()
-    main(args)

 from bs4 import BeautifulSoup
 import cv2
 import re
+import sys
+def main():
+    args = get_args()
+    run_id = args.run_id
+    # --- Dynamic Path Construction ---
+    base_dir = Path(__file__).parent.resolve()
+    tmp_dir = base_dir / 'data' / 'tmp' / run_id
+    output_dir = base_dir / 'data' / 'output' / run_id
+    mapping_path = tmp_dir / f"mapping_full_{run_id}.json"
+    uied_path = tmp_dir / "ip" / f"{run_id}.json"
+    original_image_path = tmp_dir / f"{run_id}.png"
+    # This is the input HTML with placeholders
+    gray_html_path = output_dir / f"{run_id}_layout.html"
+    # This will be the final output of the entire pipeline
+    final_html_path = output_dir / f"{run_id}_layout_final.html"
+    # --- Input Validation ---
+    if not all([p.exists() for p in [mapping_path, uied_path, original_image_path, gray_html_path]]):
+        print("Error: One or more required input files are missing.", file=sys.stderr)
+        if not mapping_path.exists(): print(f"- Missing: {mapping_path}", file=sys.stderr)
+        if not uied_path.exists(): print(f"- Missing: {uied_path}", file=sys.stderr)
+        if not original_image_path.exists(): print(f"- Missing: {original_image_path}", file=sys.stderr)
+        if not gray_html_path.exists(): print(f"- Missing: {gray_html_path}", file=sys.stderr)
+        sys.exit(1)
+    print(f"--- Starting Image Replacement for run_id: {run_id} ---")
     # --- Phase 1: Crop and Save All Images First ---
     # 1. Load data
+    mapping_data = json.loads(mapping_path.read_text())
+    uied_data = json.loads(uied_path.read_text())
+    original_image = cv2.imread(str(original_image_path))
     if original_image is None:
+        raise ValueError(f"Could not load the original image from {original_image_path}")
     # Get image shapes to calculate a simple, global scaling factor
     H_proc, W_proc, _ = uied_data['img_shape']
     }
     # 2. Create a directory for cropped images
+    crop_dir = final_html_path.parent / f"cropped_images_{run_id}"
     crop_dir.mkdir(exist_ok=True)
     print(f"Saving cropped images to: {crop_dir.resolve()}")
     # --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
     print("\nStarting offline HTML processing with BeautifulSoup...")
+    html_content = gray_html_path.read_text()
     soup = BeautifulSoup(html_content, 'html.parser')
     # 1. Find all placeholder elements by their class, in document order.
         ph_element.replace_with(img_tag)
     # Save the modified HTML
+    final_html_path.write_text(str(soup))
     print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
+    print(f"Final HTML generated at {final_html_path.resolve()}")
+    print(f"--- Image Replacement Complete for run_id: {run_id} ---")
+def get_args():
+    parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
+    parser.add_argument("--run_id", type=str, required=True, help="A unique identifier for the processing run.")
+    return parser.parse_args()
 if __name__ == "__main__":
+    main()

screencoder/main.py CHANGED Viewed

@@ -2,16 +2,15 @@ import subprocess
 import sys
 import os
 import json
-# A simple placeholder for prompt injection
-# In a real scenario, this should be a more robust mechanism
-def inject_prompt_to_generator(prompt_text):
     if not prompt_text:
         return
-    # In this example, we assume the prompt is a simple string for the "main content"
-    # A more complex implementation would parse a structured prompt
     user_instruction = {
         "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
         "header": "Make the Google logo look better; change the avatar color to be more appealing.",
@@ -19,123 +18,115 @@ def inject_prompt_to_generator(prompt_text):
         "main content": prompt_text
     }
-    generator_path = os.path.join(os.path.dirname(__file__), 'html_generator.py')
-    with open(generator_path, 'r', encoding='utf-8') as f:
-        lines = f.readlines()
-    # Find the user_instruction dictionary and replace it
-    new_lines = []
-    in_dict = False
-    for line in lines:
-        if 'user_instruction = {' in line:
-            in_dict = True
-            new_lines.append(f"user_instruction = {json.dumps(user_instruction, indent=4)}\n")
-        elif in_dict and '}' in line:
-            in_dict = False
-            continue # Skip the closing brace of the old dict
-        elif not in_dict:
-            new_lines.append(line)
-    with open(generator_path, 'w', encoding='utf-8') as f:
-        f.writelines(new_lines)
-def run_script(script_path):
-    script_path = os.path.normpath(script_path)
-    print(f"\n{'='*20}")
-    print(f"Executing: python {script_path}")
-    print(f"{'='*20}")
     try:
-        result = subprocess.run(
-            [sys.executable, script_path],
-            check=True,
-            capture_output=True,
-            text=True
-        )
-        print("Success!")
-        print("Output:")
         print(result.stdout)
         if result.stderr:
-            print("Stderr:")
             print(result.stderr)
-    except FileNotFoundError:
-        print(f"ERROR: Script not found at '{script_path}'")
-        sys.exit(1)
     except subprocess.CalledProcessError as e:
-        print(f"ERROR: Script '{script_path}' failed with exit code {e.returncode}")
-        print("Stdout:")
         print(e.stdout)
-        print("Stderr:")
         print(e.stderr)
-        sys.exit(1)
-    except Exception as e:
-        print(f"An unexpected error occurred while running '{script_path}': {e}")
-        sys.exit(1)
-def generate_html_for_demo(image_path, prompt, output_dir="screencoder/data/output"):
     """
-    A modified workflow for the Gradio demo.
-    It takes an image path and a prompt, and returns the path to the final HTML file.
     """
-    print("Starting the Screencoder demo workflow...")
-    # Setup paths
-    project_root = os.path.dirname(__file__)
-    # The block_parsor script expects a specific input file name, so we must place our image there.
-    # IMPORTANT: This assumes a single-user-at-a-time workflow.
-    # For multi-user, you'd need isolated temp directories.
-    target_input_path = os.path.join(project_root, "data/input/test1.png")
-    # Ensure the input directory exists
-    os.makedirs(os.path.dirname(target_input_path), exist_ok=True)
-    # Copy the user-uploaded image to the location the script expects
-    import shutil
-    shutil.copy(image_path, target_input_path)
-    # --- Part 1: Initial Generation with Placeholders ---
-    print("\n--- Part 1: Initial Generation with Placeholders ---")
-    inject_prompt_to_generator(prompt)
-    run_script(os.path.join(project_root, "block_parsor.py"))
-    run_script(os.path.join(project_root, "html_generator.py"))
-    # --- Part 2: Final HTML Code Generation ---
-    print("\n--- Part 2: Final HTML Code Generation ---")
-    run_script(os.path.join(project_root, "image_box_detection.py"))
-    run_script(os.path.join(project_root, "UIED/run_single.py"))
-    run_script(os.path.join(project_root, "mapping.py"))
-    run_script(os.path.join(project_root, "image_replacer.py"))
-    final_html_path = os.path.join(output_dir, "test1_layout_final.html")
-    print(f"\nScreencoder demo workflow completed! Final HTML at: {final_html_path}")
-    # Check if the final file exists
-    if os.path.exists(final_html_path):
-        with open(final_html_path, 'r', encoding='utf-8') as f:
-            return f.read()
-    else:
-        return "<html><body><h1>Error: Final HTML not generated.</h1></body></html>"
 def main():
-    """Main function to run the entire Screencoder workflow."""
-    print("Starting the Screencoder full workflow...")
-    # --- Part 1: Initial Generation with Placeholders ---
-    print("\n--- Part 1: Initial Generation with Placeholders ---")
-    run_script("block_parsor.py")
-    run_script("html_generator.py")
-    # --- Part 2: Final HTML Code Generation ---
-    print("\n--- Part 2: Final HTML Code Generation ---")
-    run_script("image_box_detection.py")
-    run_script("UIED/run_single.py")
-    run_script("mapping.py")
-    run_script("image_replacer.py")
     print("\nScreencoder workflow completed successfully!")
 if __name__ == "__main__":

 import sys
 import os
 import json
+import uuid
+import shutil
+from PIL import Image
+# This function is now more robust, injecting the prompt into a temporary copy of the generator.
+def inject_prompt_to_generator(prompt_text, temp_generator_path):
     if not prompt_text:
         return
     user_instruction = {
         "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
         "header": "Make the Google logo look better; change the avatar color to be more appealing.",
         "main content": prompt_text
     }
+    with open(temp_generator_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+    start_marker = "user_instruction = {"
+    end_marker = "}"
+    start_index = content.find(start_marker)
+    end_index = content.find(end_marker, start_index)
+    if start_index != -1 and end_index != -1:
+        dict_str = f"user_instruction = {json.dumps(user_instruction, indent=4)}"
+        content = content[:start_index] + dict_str + content[end_index+1:]
+    with open(temp_generator_path, 'w', encoding='utf-8') as f:
+        f.write(content)
+def run_script_with_run_id(script_name, run_id, instructions=None):
+    """Executes a script with a specific run_id and optional instructions."""
+    screencoder_dir = os.path.dirname(os.path.abspath(__file__))
+    script_path = os.path.join(screencoder_dir, script_name)
+    if not os.path.exists(script_path):
+        # Handle scripts inside subdirectories like UIED/
+        script_path = os.path.join(screencoder_dir, "UIED", script_name)
+    command = ["python", script_path, "--run_id", run_id]
+    # Add instructions to the command if provided
+    if instructions and script_name == "html_generator.py":
+        instructions_json = json.dumps(instructions)
+        command.extend(["--instructions", instructions_json])
+    print(f"\n--- Running script: {script_name} ---")
     try:
+        # Pass the current environment variables to the subprocess
+        result = subprocess.run(command, check=True, capture_output=True, text=True, env=os.environ)
         print(result.stdout)
         if result.stderr:
+            print("Error:")
             print(result.stderr)
     except subprocess.CalledProcessError as e:
+        print(f"Error executing {script_name}:")
         print(e.stdout)
         print(e.stderr)
+        raise  # Re-raise the exception to stop the workflow if a script fails
+def generate_html_for_demo(image_path, instructions):
     """
+    A refactored main function for Gradio demo integration.
+    It orchestrates the script executions for a single image processing run.
+    - Creates a unique run_id for each call.
+    - Sets up temporary directories for input and output.
+    - Cleans up temporary directories after execution.
     """
+    run_id = str(uuid.uuid4())
+    print(f"--- Starting Screencoder workflow for run_id: {run_id} ---")
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
+    output_dir = os.path.join(base_dir, 'data', 'output', run_id)
+    os.makedirs(tmp_dir, exist_ok=True)
+    os.makedirs(output_dir, exist_ok=True)
+    try:
+        # 1. Copy user-uploaded image to the temp input directory
+        new_image_path = os.path.join(tmp_dir, f"{run_id}.png")
+        img = Image.open(image_path)
+        img.save(new_image_path, "PNG")
+        # 2. Run the processing scripts in sequence
+        run_script_with_run_id("UIED/run_single.py", run_id)
+        run_script_with_run_id("block_parsor.py", run_id)
+        run_script_with_run_id("html_generator.py", run_id, instructions)
+        run_script_with_run_id("image_box_detection.py", run_id)
+        run_script_with_run_id("mapping.py", run_id)
+        run_script_with_run_id("image_replacer.py", run_id)
+        # 3. Read the final generated HTML
+        final_html_path = os.path.join(output_dir, f"{run_id}_layout_final.html")
+        if os.path.exists(final_html_path):
+            with open(final_html_path, 'r', encoding='utf-8') as f:
+                html_content = f.read()
+            print(f"Successfully generated HTML for run_id: {run_id}")
+            return html_content
+        else:
+            return f"Error: Final HTML file not found for run_id: {run_id}"
+    except Exception as e:
+        print(f"An error occurred during the workflow for run_id {run_id}: {e}")
+        return f"An error occurred: {e}"
+    finally:
+        # 4. Cleanup: Remove temporary directories
+        try:
+            # shutil.rmtree(tmp_dir)
+            # shutil.rmtree(output_dir)
+            print(f"Cleaned up temporary files for run_id: {run_id}")
+        except OSError as e:
+            print(f"Error cleaning up temporary files for run_id {run_id}: {e}")
 def main():
+    """Main function to run the entire Screencoder workflow (legacy)."""
+    print("Starting the Screencoder full workflow (legacy)...")
+    # This main function is now considered legacy and does not use dynamic run_ids.
+    # It will continue to use the hardcoded paths.
+    run_id = "test1"  # Hardcoded for legacy main
+    run_script_with_run_id("UIED/run_single.py", run_id)
+    run_script_with_run_id("block_parsor.py", run_id)
+    run_script_with_run_id("html_generator.py", run_id)
+    run_script_with_run_id("image_box_detection.py", run_id)
+    run_script_with_run_id("mapping.py", run_id)
+    run_script_with_run_id("image_replacer.py", run_id)
     print("\nScreencoder workflow completed successfully!")
 if __name__ == "__main__":

screencoder/mapping.py CHANGED Viewed

@@ -227,21 +227,41 @@ def generate_debug_overlay(img_path, all_uied_boxes, region_results, uied_shape,
     cv2.imwrite(str(out_png), canvas)
-def main(args):
-    # 1. Load the original screenshot to get its absolute dimensions
-    if not args.debug_src or not args.debug_src.exists():
-        sys.exit("Error: A valid --debug-src image path must be provided for coordinate conversion.")
-    orig_img = cv2.imread(str(args.debug_src))
     if orig_img is None:
-        sys.exit(f"Error: Could not read debug source image at {args.debug_src}.")
     H_orig, W_orig, _ = orig_img.shape
     # 2. Load proportional data and convert to absolute pixel coordinates
-    pixel_regions, pixel_placeholders = load_regions_and_placeholders(args.gray, W_orig, H_orig)
     # 3. Load UIED data
-    all_uied_boxes, uied_shape = load_uied_boxes(args.uied)
     if not pixel_placeholders or not all_uied_boxes:
         print("Error: Could not proceed without placeholder and UIED data.")
@@ -294,21 +314,19 @@ def main(args):
     # 6. Report and save results
     print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
-    args.out.write_text(json.dumps(final_results, indent=2, ensure_ascii=False))
-    print(f"Mapping data written to {args.out}")
-    if args.debug:
-        if not args.debug_src or not args.debug_src.exists():
-            print("Error: A valid --debug-src image path must be provided when using --debug.")
-            return
-        generate_debug_overlay(args.debug_src, all_uied_boxes, final_results, uied_shape, args.debug)
-        print(f"Debug image written to {args.debug}")
 if __name__ == "__main__":
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--gray", type=Path, default=Path("data/tmp/test1_bboxes.json"), help="Path to the JSON file with gray placeholder boxes.")
-    ap.add_argument("--uied", type=Path, default=Path("data/tmp/ip/test1.json"), help="Path to the JSON file with UIED detected boxes.")
-    ap.add_argument("--out", default=Path("data/tmp/mapping_full_test1.json"), type=Path, help="Output path for the mapping JSON file.")
-    ap.add_argument("--debug", type=Path, default=Path("data/tmp/overlay_test_test1.png"), help="Output path for the debug overlay PNG.")
-    ap.add_argument("--debug-src", type=Path, default=Path("data/input/test1.png"), help="Path to the original screenshot for the debug overlay background.")
-    main(ap.parse_args())

     cv2.imwrite(str(out_png), canvas)
+def main():
+    args = get_args()
+    run_id = args.run_id
+    # --- Dynamic Path Construction ---
+    base_dir = Path(__file__).parent.resolve()
+    tmp_dir = base_dir / 'data' / 'tmp' / run_id
+    gray_json_path = tmp_dir / f"{run_id}_bboxes.json"
+    uied_json_path = tmp_dir / "ip" / f"{run_id}.json"
+    mapping_output_path = tmp_dir / f"mapping_full_{run_id}.json"
+    debug_src_path = tmp_dir / f"{run_id}.png"
+    debug_overlay_path = tmp_dir / f"overlay_test_{run_id}.png"
+    # --- Input Validation ---
+    if not gray_json_path.exists():
+        sys.exit(f"Error: Placeholder JSON not found at {gray_json_path}")
+    if not uied_json_path.exists():
+        sys.exit(f"Error: UIED JSON not found at {uied_json_path}")
+    if not debug_src_path.exists():
+        sys.exit(f"Error: Source image for coordinate conversion not found at {debug_src_path}")
+    print(f"--- Starting Mapping for run_id: {run_id} ---")
+    # 1. Load the original screenshot to get its absolute dimensions
+    orig_img = cv2.imread(str(debug_src_path))
     if orig_img is None:
+        sys.exit(f"Error: Could not read debug source image at {debug_src_path}.")
     H_orig, W_orig, _ = orig_img.shape
     # 2. Load proportional data and convert to absolute pixel coordinates
+    pixel_regions, pixel_placeholders = load_regions_and_placeholders(gray_json_path, W_orig, H_orig)
     # 3. Load UIED data
+    all_uied_boxes, uied_shape = load_uied_boxes(uied_json_path)
     if not pixel_placeholders or not all_uied_boxes:
         print("Error: Could not proceed without placeholder and UIED data.")
     # 6. Report and save results
     print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
+    mapping_output_path.write_text(json.dumps(final_results, indent=2, ensure_ascii=False))
+    print(f"Mapping data written to {mapping_output_path}")
+    # Always generate the debug image if the source exists
+    generate_debug_overlay(debug_src_path, all_uied_boxes, final_results, uied_shape, debug_overlay_path)
+    print(f"Debug image written to {debug_overlay_path}")
+    print(f"--- Mapping Complete for run_id: {run_id} ---")
+def get_args():
+    ap = argparse.ArgumentParser(description="Map UIED components to placeholder boxes.")
+    ap.add_argument('--run_id', required=True, type=str, help="A unique identifier for the processing run.")
+    return ap.parse_args()
 if __name__ == "__main__":
+    main()