Spaces:
Running
Running
Commit
·
0246ff9
1
Parent(s):
1bcdbad
Update
Browse files- app.py +53 -30
- screencoder/UIED/detect_compo/ip_region_proposal.py +36 -0
- screencoder/UIED/detect_compo/lib_ip/ip_detection.py +4 -4
- screencoder/UIED/run_single.py +35 -5
- screencoder/block_parsor.py +90 -103
- screencoder/html_generator.py +105 -234
- screencoder/image_box_detection.py +43 -26
- screencoder/image_replacer.py +43 -26
- screencoder/main.py +96 -105
- screencoder/mapping.py +41 -23
app.py
CHANGED
@@ -5,20 +5,35 @@ import cv2
|
|
5 |
import numpy as np
|
6 |
from screencoder.main import generate_html_for_demo
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
def process_image_and_prompt(image_np, image_path_from_state,
|
22 |
final_image_path = ""
|
23 |
is_temp_file = False
|
24 |
|
@@ -35,8 +50,15 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
|
|
35 |
else:
|
36 |
return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
|
37 |
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
if is_temp_file:
|
42 |
os.unlink(final_image_path)
|
@@ -44,7 +66,7 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
|
|
44 |
return html_content, html_content
|
45 |
|
46 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
|
47 |
-
active_image_path_state = gr.State(value=examples_data[0][
|
48 |
|
49 |
gr.Markdown("# ScreenCoder: Screenshot to Code")
|
50 |
|
@@ -60,14 +82,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
|
|
60 |
|
61 |
upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
|
62 |
|
63 |
-
gr.Markdown("### Step 2: Write
|
64 |
-
|
65 |
-
label="Instructions",
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
|
72 |
|
73 |
with gr.Column(scale=2):
|
@@ -80,24 +101,26 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
|
|
80 |
if examples_data:
|
81 |
gr.Examples(
|
82 |
examples=examples_data,
|
83 |
-
|
|
|
|
|
84 |
label="Click an example to try it out",
|
|
|
85 |
)
|
86 |
|
87 |
def handle_upload(uploaded_image_np):
|
88 |
-
"""On upload, update image, clear state, and set
|
89 |
-
|
90 |
-
return uploaded_image_np, None, default_prompt
|
91 |
|
92 |
upload_button.upload(
|
93 |
fn=handle_upload,
|
94 |
inputs=upload_button,
|
95 |
-
outputs=[active_image, active_image_path_state,
|
96 |
)
|
97 |
|
98 |
generate_btn.click(
|
99 |
fn=process_image_and_prompt,
|
100 |
-
inputs=[active_image, active_image_path_state,
|
101 |
outputs=[html_preview, html_code_output],
|
102 |
show_progress="full"
|
103 |
)
|
|
|
5 |
import numpy as np
|
6 |
from screencoder.main import generate_html_for_demo
|
7 |
|
8 |
+
# Manually defined examples
|
9 |
+
examples_data = [
|
10 |
+
[
|
11 |
+
"screencoder/data/input/test1.png",
|
12 |
+
"",
|
13 |
+
"",
|
14 |
+
"",
|
15 |
+
"",
|
16 |
+
"screencoder/data/input/test1.png"
|
17 |
+
],
|
18 |
+
[
|
19 |
+
"screencoder/data/input/test2.png",
|
20 |
+
"",
|
21 |
+
"",
|
22 |
+
"",
|
23 |
+
"",
|
24 |
+
"screencoder/data/input/test2.png"
|
25 |
+
],
|
26 |
+
[
|
27 |
+
"screencoder/data/input/test3.png",
|
28 |
+
"",
|
29 |
+
"",
|
30 |
+
"",
|
31 |
+
"",
|
32 |
+
"screencoder/data/input/test3.png"
|
33 |
+
],
|
34 |
+
]
|
35 |
|
36 |
+
def process_image_and_prompt(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
|
37 |
final_image_path = ""
|
38 |
is_temp_file = False
|
39 |
|
|
|
50 |
else:
|
51 |
return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
|
52 |
|
53 |
+
instructions = {
|
54 |
+
"sidebar": sidebar_prompt,
|
55 |
+
"header": header_prompt,
|
56 |
+
"navigation": navigation_prompt,
|
57 |
+
"main content": main_content_prompt
|
58 |
+
}
|
59 |
+
|
60 |
+
print(f"With instructions: {instructions}")
|
61 |
+
html_content = generate_html_for_demo(final_image_path, instructions)
|
62 |
|
63 |
if is_temp_file:
|
64 |
os.unlink(final_image_path)
|
|
|
66 |
return html_content, html_content
|
67 |
|
68 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
|
69 |
+
active_image_path_state = gr.State(value=examples_data[0][5] if examples_data else None)
|
70 |
|
71 |
gr.Markdown("# ScreenCoder: Screenshot to Code")
|
72 |
|
|
|
82 |
|
83 |
upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
|
84 |
|
85 |
+
gr.Markdown("### Step 2: Write Prompts (Optional)")
|
86 |
+
with gr.Accordion("Component-specific Prompts", open=True):
|
87 |
+
sidebar_prompt = gr.Textbox(label="Sidebar Prompt", placeholder="Instructions for the sidebar...", value="")
|
88 |
+
header_prompt = gr.Textbox(label="Header Prompt", placeholder="Instructions for the header...", value="")
|
89 |
+
navigation_prompt = gr.Textbox(label="Navigation Prompt", placeholder="Instructions for the navigation...", value="")
|
90 |
+
main_content_prompt = gr.Textbox(label="Main Content Prompt", placeholder="Instructions for the main content...", value="")
|
91 |
+
|
|
|
92 |
generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
|
93 |
|
94 |
with gr.Column(scale=2):
|
|
|
101 |
if examples_data:
|
102 |
gr.Examples(
|
103 |
examples=examples_data,
|
104 |
+
fn=lambda *args: args, # Simply return all inputs
|
105 |
+
inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
|
106 |
+
outputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
|
107 |
label="Click an example to try it out",
|
108 |
+
cache_examples=False,
|
109 |
)
|
110 |
|
111 |
def handle_upload(uploaded_image_np):
|
112 |
+
"""On upload, update image, clear state, and set empty prompts."""
|
113 |
+
return uploaded_image_np, None, "", "", "", ""
|
|
|
114 |
|
115 |
upload_button.upload(
|
116 |
fn=handle_upload,
|
117 |
inputs=upload_button,
|
118 |
+
outputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt]
|
119 |
)
|
120 |
|
121 |
generate_btn.click(
|
122 |
fn=process_image_and_prompt,
|
123 |
+
inputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
|
124 |
outputs=[html_preview, html_code_output],
|
125 |
show_progress="full"
|
126 |
)
|
screencoder/UIED/detect_compo/ip_region_proposal.py
CHANGED
@@ -13,6 +13,38 @@ from config.CONFIG_UIED import Config
|
|
13 |
C = Config()
|
14 |
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
def nesting_inspection(org, grey, compos, ffl_block):
|
17 |
'''
|
18 |
Inspect all big compos through block division by flood-fill
|
@@ -87,6 +119,10 @@ def compo_detection(input_img_path, output_root, uied_params,
|
|
87 |
|
88 |
# *** Step 7 *** save detection result
|
89 |
Compo.compos_update(uicompos, org.shape)
|
|
|
|
|
|
|
|
|
90 |
file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
|
91 |
print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
|
92 |
return uicompos
|
|
|
13 |
C = Config()
|
14 |
|
15 |
|
16 |
+
def resolve_uicompo_containment(uicompos):
|
17 |
+
"""
|
18 |
+
Resolves containment issues among UI components.
|
19 |
+
If a component's bounding box is fully contained within another's, it is removed.
|
20 |
+
"""
|
21 |
+
|
22 |
+
def contains(bbox_a, bbox_b):
|
23 |
+
"""Checks if bbox_a completely contains bbox_b."""
|
24 |
+
return bbox_a.col_min <= bbox_b.col_min and \
|
25 |
+
bbox_a.row_min <= bbox_b.row_min and \
|
26 |
+
bbox_a.col_max >= bbox_b.col_max and \
|
27 |
+
bbox_a.row_max >= bbox_b.row_max
|
28 |
+
|
29 |
+
compos_to_remove = set()
|
30 |
+
for i, compo1 in enumerate(uicompos):
|
31 |
+
for j, compo2 in enumerate(uicompos):
|
32 |
+
if i == j:
|
33 |
+
continue
|
34 |
+
|
35 |
+
# Check if compo1 contains compo2
|
36 |
+
if contains(compo1.bbox, compo2.bbox):
|
37 |
+
compos_to_remove.add(j)
|
38 |
+
|
39 |
+
# Filter out the contained components
|
40 |
+
final_compos = [compo for i, compo in enumerate(uicompos) if i not in compos_to_remove]
|
41 |
+
|
42 |
+
if len(final_compos) < len(uicompos):
|
43 |
+
print(f"Containment resolved: Removed {len(uicompos) - len(final_compos)} contained components.")
|
44 |
+
|
45 |
+
return final_compos
|
46 |
+
|
47 |
+
|
48 |
def nesting_inspection(org, grey, compos, ffl_block):
|
49 |
'''
|
50 |
Inspect all big compos through block division by flood-fill
|
|
|
119 |
|
120 |
# *** Step 7 *** save detection result
|
121 |
Compo.compos_update(uicompos, org.shape)
|
122 |
+
|
123 |
+
# *** Step 8 *** Resolve containment before saving
|
124 |
+
uicompos = resolve_uicompo_containment(uicompos)
|
125 |
+
|
126 |
file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
|
127 |
print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
|
128 |
return uicompos
|
screencoder/UIED/detect_compo/lib_ip/ip_detection.py
CHANGED
@@ -361,13 +361,13 @@ def is_block(clip, thread=0.15):
|
|
361 |
# top border - scan top down
|
362 |
blank_count = 0
|
363 |
for i in range(1, 5):
|
364 |
-
if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
|
365 |
blank_count += 1
|
366 |
if blank_count > 2: return False
|
367 |
# left border - scan left to right
|
368 |
blank_count = 0
|
369 |
for i in range(1, 5):
|
370 |
-
if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
|
371 |
blank_count += 1
|
372 |
if blank_count > 2: return False
|
373 |
|
@@ -375,13 +375,13 @@ def is_block(clip, thread=0.15):
|
|
375 |
# bottom border - scan bottom up
|
376 |
blank_count = 0
|
377 |
for i in range(-1, -5, -1):
|
378 |
-
if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
|
379 |
blank_count += 1
|
380 |
if blank_count > 2: return False
|
381 |
# right border - scan right to left
|
382 |
blank_count = 0
|
383 |
for i in range(-1, -5, -1):
|
384 |
-
if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
|
385 |
blank_count += 1
|
386 |
if blank_count > 2: return False
|
387 |
return True
|
|
|
361 |
# top border - scan top down
|
362 |
blank_count = 0
|
363 |
for i in range(1, 5):
|
364 |
+
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
|
365 |
blank_count += 1
|
366 |
if blank_count > 2: return False
|
367 |
# left border - scan left to right
|
368 |
blank_count = 0
|
369 |
for i in range(1, 5):
|
370 |
+
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
|
371 |
blank_count += 1
|
372 |
if blank_count > 2: return False
|
373 |
|
|
|
375 |
# bottom border - scan bottom up
|
376 |
blank_count = 0
|
377 |
for i in range(-1, -5, -1):
|
378 |
+
if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
|
379 |
blank_count += 1
|
380 |
if blank_count > 2: return False
|
381 |
# right border - scan right to left
|
382 |
blank_count = 0
|
383 |
for i in range(-1, -5, -1):
|
384 |
+
if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
|
385 |
blank_count += 1
|
386 |
if blank_count > 2: return False
|
387 |
return True
|
screencoder/UIED/run_single.py
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
-
from os.path import join as pjoin
|
2 |
import cv2
|
3 |
import os
|
4 |
import numpy as np
|
5 |
import multiprocessing
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def resize_height_by_longest_edge(img_path, resize_length=800):
|
9 |
org = cv2.imread(img_path)
|
@@ -30,6 +35,29 @@ def color_tips():
|
|
30 |
|
31 |
|
32 |
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Set multiprocessing start method to 'spawn' for macOS compatibility.
|
34 |
# This must be done at the very beginning of the main block.
|
35 |
try:
|
@@ -62,11 +90,11 @@ if __name__ == '__main__':
|
|
62 |
'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
|
63 |
|
64 |
# set input image path
|
65 |
-
input_path_img = 'data/test1.png'
|
66 |
-
output_root = 'data'
|
67 |
|
68 |
resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
|
69 |
-
color_tips()
|
70 |
|
71 |
is_ip = True
|
72 |
is_clf = False
|
@@ -99,4 +127,6 @@ if __name__ == '__main__':
|
|
99 |
compo_path = pjoin(output_root, 'ip', str(name) + '.json')
|
100 |
ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
|
101 |
merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
|
102 |
-
is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
import os
|
3 |
import numpy as np
|
4 |
import multiprocessing
|
5 |
+
import argparse
|
6 |
+
from os.path import join as pjoin
|
7 |
|
8 |
+
def get_args():
|
9 |
+
parser = argparse.ArgumentParser(description="Processes a single image for UI element detection.")
|
10 |
+
parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
|
11 |
+
return parser.parse_args()
|
12 |
|
13 |
def resize_height_by_longest_edge(img_path, resize_length=800):
|
14 |
org = cv2.imread(img_path)
|
|
|
35 |
|
36 |
|
37 |
if __name__ == '__main__':
|
38 |
+
args = get_args()
|
39 |
+
|
40 |
+
# --- Dynamic Path Construction ---
|
41 |
+
# Construct paths based on the provided run_id
|
42 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
43 |
+
run_id = args.run_id
|
44 |
+
|
45 |
+
# The temporary directory for this specific run
|
46 |
+
tmp_dir = os.path.join(base_dir, '..', 'data', 'tmp', run_id)
|
47 |
+
|
48 |
+
# Input image path
|
49 |
+
input_path_img = os.path.join(tmp_dir, f"{run_id}.png")
|
50 |
+
|
51 |
+
# Output directory for this script's results
|
52 |
+
output_root = tmp_dir # All results (ip, ocr, etc.) will go into the run's tmp subdir.
|
53 |
+
|
54 |
+
if not os.path.exists(input_path_img):
|
55 |
+
print(f"Error: Input image not found at {input_path_img}")
|
56 |
+
exit(1)
|
57 |
+
|
58 |
+
print(f"--- Starting UIED processing for run_id: {run_id} ---")
|
59 |
+
print(f"Input image: {input_path_img}")
|
60 |
+
print(f"Output root: {output_root}")
|
61 |
# Set multiprocessing start method to 'spawn' for macOS compatibility.
|
62 |
# This must be done at the very beginning of the main block.
|
63 |
try:
|
|
|
90 |
'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
|
91 |
|
92 |
# set input image path
|
93 |
+
# input_path_img = 'data/test1.png'
|
94 |
+
# output_root = 'data'
|
95 |
|
96 |
resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
|
97 |
+
# color_tips() # This shows a window, which is not suitable for a script.
|
98 |
|
99 |
is_ip = True
|
100 |
is_clf = False
|
|
|
127 |
compo_path = pjoin(output_root, 'ip', str(name) + '.json')
|
128 |
ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
|
129 |
merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
|
130 |
+
is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=False)
|
131 |
+
|
132 |
+
print(f"--- UIED processing complete for run_id: {run_id} ---")
|
screencoder/block_parsor.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import cv2
|
3 |
import json
|
|
|
4 |
from utils import Doubao, encode_image, image_mask
|
5 |
|
6 |
DEFAULT_IMAGE_PATH = "data/input/test1.png"
|
@@ -20,47 +21,34 @@ BBOX_TAG_END = "</bbox>"
|
|
20 |
# PROMPT_navigation = "框出网页中的navigation的位置,请你只返回对应的bounding box。"
|
21 |
# PROMPT_main_content = "框出网页中的main content的位置,请你只返回对应的bounding box。"
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
# print("Raw bbox input:", bbox_input) # Debug print
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
return bboxes
|
33 |
-
h, w = image.shape[:2]
|
34 |
-
|
35 |
try:
|
36 |
components = bbox_input.strip().split('\n')
|
37 |
-
# print("Split components:", components) # Debug print
|
38 |
-
|
39 |
for component in components:
|
40 |
component = component.strip()
|
41 |
if not component:
|
42 |
continue
|
43 |
-
|
44 |
if ':' in component:
|
45 |
name, bbox_str = component.split(':', 1)
|
46 |
else:
|
47 |
bbox_str = component
|
48 |
-
if 'sidebar' in component.lower():
|
49 |
-
|
50 |
-
elif '
|
51 |
-
|
52 |
-
|
53 |
-
name = 'navigation'
|
54 |
-
elif 'main content' in component.lower():
|
55 |
-
name = 'main content'
|
56 |
-
else:
|
57 |
-
name = 'unknown'
|
58 |
|
59 |
name = name.strip().lower()
|
60 |
bbox_str = bbox_str.strip()
|
61 |
|
62 |
-
# print(f"Processing component: {name}, bbox_str: {bbox_str}") # Debug print
|
63 |
-
|
64 |
if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
|
65 |
start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
|
66 |
end_idx = bbox_str.find(BBOX_TAG_END)
|
@@ -69,82 +57,44 @@ def parse_bboxes(bbox_input: str, image_path: str) -> dict[str, tuple[int, int,
|
|
69 |
try:
|
70 |
norm_coords = list(map(int, coords_str.split()))
|
71 |
if len(norm_coords) == 4:
|
72 |
-
|
73 |
-
y_min = int(norm_coords[1])
|
74 |
-
x_max = int(norm_coords[2])
|
75 |
-
y_max = int(norm_coords[3])
|
76 |
-
bboxes[name] = (x_min, y_min, x_max, y_max)
|
77 |
print(f"Successfully parsed {name}: {bboxes[name]}")
|
78 |
-
else:
|
79 |
-
print(f"Invalid number of coordinates for {name}: {norm_coords}")
|
80 |
except ValueError as e:
|
81 |
print(f"Failed to parse coordinates for {name}: {e}")
|
82 |
-
else:
|
83 |
-
print(f"No bbox tags found in: {bbox_str}")
|
84 |
-
|
85 |
except Exception as e:
|
86 |
print(f"Coordinate parsing failed: {str(e)}")
|
87 |
-
|
88 |
-
traceback.print_exc()
|
89 |
-
|
90 |
print("Final parsed bboxes:", bboxes)
|
91 |
return bboxes
|
92 |
|
93 |
-
def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]]) -> str:
|
94 |
-
"""
|
95 |
image = cv2.imread(image_path)
|
96 |
-
if image is None:
|
97 |
-
print(f"Error: Failed to read image {image_path}")
|
98 |
-
return ""
|
99 |
|
100 |
h, w = image.shape[:2]
|
101 |
-
colors = {
|
102 |
-
'sidebar': (0, 0, 255), # Red
|
103 |
-
'header': (0, 255, 0), # Green
|
104 |
-
'navigation': (255, 0, 0), # Blue
|
105 |
-
'main content': (255, 255, 0), # Cyan
|
106 |
-
'unknown': (0, 0, 0), # Black
|
107 |
-
}
|
108 |
|
|
|
109 |
for component, norm_bbox in bboxes.items():
|
110 |
-
# Convert normalized coordinates to pixel coordinates for drawing
|
111 |
x_min = int(norm_bbox[0] * w / 1000)
|
112 |
y_min = int(norm_bbox[1] * h / 1000)
|
113 |
x_max = int(norm_bbox[2] * w / 1000)
|
114 |
y_max = int(norm_bbox[3] * h / 1000)
|
115 |
|
116 |
color = colors.get(component.lower(), (0, 0, 255))
|
117 |
-
cv2.rectangle(
|
118 |
-
|
119 |
-
# Add label
|
120 |
-
cv2.putText(image, component, (x_min, y_min - 10),
|
121 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
|
122 |
-
|
123 |
-
# Output directory
|
124 |
-
output_dir = "data/tmp"
|
125 |
-
os.makedirs(output_dir, exist_ok=True)
|
126 |
|
127 |
-
|
128 |
-
original_filename = os.path.basename(image_path)
|
129 |
-
output_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_with_bboxes.png")
|
130 |
-
|
131 |
-
if cv2.imwrite(output_path, image):
|
132 |
print(f"Successfully saved annotated image: {output_path}")
|
133 |
return output_path
|
134 |
-
print("Error: Failed to save image")
|
135 |
return ""
|
136 |
|
137 |
-
def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]],
|
138 |
-
"""
|
139 |
-
#
|
140 |
-
output_dir = "data/tmp"
|
141 |
-
os.makedirs(output_dir, exist_ok=True)
|
142 |
-
|
143 |
-
original_filename = os.path.basename(image_path)
|
144 |
-
json_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_bboxes.json")
|
145 |
-
|
146 |
bboxes_dict = {k: list(v) for k, v in bboxes.items()}
|
147 |
-
|
148 |
try:
|
149 |
with open(json_path, 'w', encoding='utf-8') as f:
|
150 |
json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
|
@@ -154,8 +104,38 @@ def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], image_path
|
|
154 |
print(f"Error saving JSON file: {str(e)}")
|
155 |
return ""
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
# sequential version of bbox parsing: Using recursive detection with mask
|
158 |
-
def sequential_component_detection(image_path: str, api_path: str) -> dict[str, tuple[int, int, int, int]]:
|
159 |
"""
|
160 |
Sequential processing flow: detect each component in turn, mask the image after each detection
|
161 |
"""
|
@@ -189,7 +169,7 @@ def sequential_component_detection(image_path: str, api_path: str) -> dict[str,
|
|
189 |
|
190 |
masked_image = image_mask(current_image_path, norm_bbox)
|
191 |
|
192 |
-
temp_image_path = f"
|
193 |
masked_image.save(temp_image_path)
|
194 |
current_image_path = temp_image_path
|
195 |
|
@@ -238,34 +218,41 @@ def main_content_processing(bboxes: dict[str, tuple[int, int, int, int]], image_
|
|
238 |
int(bbox[3] * h / 1000))
|
239 |
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
-
print("
|
246 |
-
|
247 |
-
print(f"API path: {api_path}")
|
248 |
client = Doubao(api_path)
|
249 |
bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
|
250 |
-
|
251 |
-
bboxes = parse_bboxes(bbox_content, image_path)
|
252 |
-
|
253 |
-
# print("=== Starting Sequential Component Detection ===")
|
254 |
-
# print(f"Input image: {image_path}")
|
255 |
-
# print(f"API path: {api_path}")
|
256 |
-
# bboxes = sequential_component_detection(image_path, api_path)
|
257 |
|
258 |
if bboxes:
|
259 |
-
print(
|
260 |
-
|
261 |
-
print(
|
262 |
-
|
263 |
-
json_path = save_bboxes_to_json(bboxes, image_path)
|
264 |
-
draw_bboxes(image_path, bboxes)
|
265 |
|
266 |
-
print(f"\n
|
267 |
-
|
268 |
-
|
269 |
else:
|
270 |
-
print("\nNo valid bounding box coordinates found")
|
271 |
-
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import cv2
|
3 |
import json
|
4 |
+
import argparse
|
5 |
from utils import Doubao, encode_image, image_mask
|
6 |
|
7 |
DEFAULT_IMAGE_PATH = "data/input/test1.png"
|
|
|
21 |
# PROMPT_navigation = "框出网页中的navigation的位置,请你只返回对应的bounding box。"
|
22 |
# PROMPT_main_content = "框出网页中的main content的位置,请你只返回对应的bounding box。"
|
23 |
|
24 |
+
def get_args():
|
25 |
+
parser = argparse.ArgumentParser(description="Parses bounding boxes from an image using a vision model.")
|
26 |
+
parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
|
27 |
+
return parser.parse_args()
|
|
|
28 |
|
29 |
+
def parse_bboxes(bbox_input: str) -> dict[str, tuple[int, int, int, int]]:
|
30 |
+
"""Parse bounding box string to a dictionary of normalized (0-1000) coordinate tuples."""
|
31 |
+
bboxes = {}
|
|
|
|
|
|
|
32 |
try:
|
33 |
components = bbox_input.strip().split('\n')
|
|
|
|
|
34 |
for component in components:
|
35 |
component = component.strip()
|
36 |
if not component:
|
37 |
continue
|
38 |
+
|
39 |
if ':' in component:
|
40 |
name, bbox_str = component.split(':', 1)
|
41 |
else:
|
42 |
bbox_str = component
|
43 |
+
if 'sidebar' in component.lower(): name = 'sidebar'
|
44 |
+
elif 'header' in component.lower(): name = 'header'
|
45 |
+
elif 'navigation' in component.lower(): name = 'navigation'
|
46 |
+
elif 'main content' in component.lower(): name = 'main content'
|
47 |
+
else: name = 'unknown'
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
name = name.strip().lower()
|
50 |
bbox_str = bbox_str.strip()
|
51 |
|
|
|
|
|
52 |
if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
|
53 |
start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
|
54 |
end_idx = bbox_str.find(BBOX_TAG_END)
|
|
|
57 |
try:
|
58 |
norm_coords = list(map(int, coords_str.split()))
|
59 |
if len(norm_coords) == 4:
|
60 |
+
bboxes[name] = tuple(norm_coords) # Directly store normalized coordinates
|
|
|
|
|
|
|
|
|
61 |
print(f"Successfully parsed {name}: {bboxes[name]}")
|
|
|
|
|
62 |
except ValueError as e:
|
63 |
print(f"Failed to parse coordinates for {name}: {e}")
|
|
|
|
|
|
|
64 |
except Exception as e:
|
65 |
print(f"Coordinate parsing failed: {str(e)}")
|
66 |
+
|
|
|
|
|
67 |
print("Final parsed bboxes:", bboxes)
|
68 |
return bboxes
|
69 |
|
70 |
+
def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]], output_path: str) -> str:
|
71 |
+
"""Draws normalized (0-1000) bboxes on an image for visualization."""
|
72 |
image = cv2.imread(image_path)
|
73 |
+
if image is None: return ""
|
|
|
|
|
74 |
|
75 |
h, w = image.shape[:2]
|
76 |
+
colors = {'sidebar': (0, 0, 255), 'header': (0, 255, 0), 'navigation': (255, 0, 0), 'main content': (255, 255, 0), 'unknown': (0, 0, 0)}
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
+
output_image = image.copy()
|
79 |
for component, norm_bbox in bboxes.items():
|
|
|
80 |
x_min = int(norm_bbox[0] * w / 1000)
|
81 |
y_min = int(norm_bbox[1] * h / 1000)
|
82 |
x_max = int(norm_bbox[2] * w / 1000)
|
83 |
y_max = int(norm_bbox[3] * h / 1000)
|
84 |
|
85 |
color = colors.get(component.lower(), (0, 0, 255))
|
86 |
+
cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), color, 3)
|
87 |
+
cv2.putText(output_image, component, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
+
if cv2.imwrite(output_path, output_image):
|
|
|
|
|
|
|
|
|
90 |
print(f"Successfully saved annotated image: {output_path}")
|
91 |
return output_path
|
|
|
92 |
return ""
|
93 |
|
94 |
+
def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], json_path: str) -> str:
|
95 |
+
"""Saves the normalized bboxes to a JSON file."""
|
96 |
+
# This is the unified format: a dictionary of lists.
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
bboxes_dict = {k: list(v) for k, v in bboxes.items()}
|
|
|
98 |
try:
|
99 |
with open(json_path, 'w', encoding='utf-8') as f:
|
100 |
json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
|
|
|
104 |
print(f"Error saving JSON file: {str(e)}")
|
105 |
return ""
|
106 |
|
107 |
+
def resolve_containment(bboxes: dict[str, tuple[int, int, int, int]]) -> dict[str, tuple[int, int, int, int]]:
|
108 |
+
"""
|
109 |
+
Resolves containment issues among bounding boxes.
|
110 |
+
If a box is found to be fully contained within another, it is removed.
|
111 |
+
This is based on the assumption that major layout components should not contain each other.
|
112 |
+
"""
|
113 |
+
|
114 |
+
def contains(box_a, box_b):
|
115 |
+
"""Checks if box_a completely contains box_b."""
|
116 |
+
xa1, ya1, xa2, ya2 = box_a
|
117 |
+
xb1, yb1, xb2, yb2 = box_b
|
118 |
+
return xa1 <= xb1 and ya1 <= yb1 and xa2 >= xb2 and ya2 >= yb2
|
119 |
+
|
120 |
+
names = list(bboxes.keys())
|
121 |
+
removed = set()
|
122 |
+
|
123 |
+
for i in range(len(names)):
|
124 |
+
for j in range(len(names)):
|
125 |
+
if i == j or names[i] in removed or names[j] in removed:
|
126 |
+
continue
|
127 |
+
|
128 |
+
name1, box1 = names[i], bboxes[names[i]]
|
129 |
+
name2, box2 = names[j], bboxes[names[j]]
|
130 |
+
|
131 |
+
if contains(box1, box2) or contains(box2, box1):
|
132 |
+
print(f"Containment found: '{name1}' contains '{name2}'. Removing '{name2}'.")
|
133 |
+
removed.add(name2)
|
134 |
+
|
135 |
+
return {name: bbox for name, bbox in bboxes.items() if name not in removed}
|
136 |
+
|
137 |
# sequential version of bbox parsing: Using recursive detection with mask
|
138 |
+
def sequential_component_detection(image_path: str, api_path: str, temp_dir: str) -> dict[str, tuple[int, int, int, int]]:
|
139 |
"""
|
140 |
Sequential processing flow: detect each component in turn, mask the image after each detection
|
141 |
"""
|
|
|
169 |
|
170 |
masked_image = image_mask(current_image_path, norm_bbox)
|
171 |
|
172 |
+
temp_image_path = os.path.join(temp_dir, f"temp_{component_name}_masked.png")
|
173 |
masked_image.save(temp_image_path)
|
174 |
current_image_path = temp_image_path
|
175 |
|
|
|
218 |
int(bbox[3] * h / 1000))
|
219 |
|
220 |
|
221 |
+
def main():
|
222 |
+
args = get_args()
|
223 |
+
run_id = args.run_id
|
224 |
+
|
225 |
+
# --- Dynamic Path Construction ---
|
226 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
227 |
+
tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
|
228 |
+
|
229 |
+
image_path = os.path.join(tmp_dir, f"{run_id}.png")
|
230 |
+
api_path = os.path.join(base_dir, "doubao_api.txt")
|
231 |
+
json_output_path = os.path.join(tmp_dir, f"{run_id}_bboxes.json")
|
232 |
+
annotated_image_output_path = os.path.join(tmp_dir, f"{run_id}_with_bboxes.png")
|
233 |
+
|
234 |
+
if not os.path.exists(image_path) or not os.path.exists(api_path):
|
235 |
+
print(f"Error: Input image or API key file not found.")
|
236 |
+
exit(1)
|
237 |
|
238 |
+
print(f"--- Starting BBox Parsing for run_id: {run_id} ---")
|
239 |
+
|
|
|
240 |
client = Doubao(api_path)
|
241 |
bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
|
242 |
+
bboxes = parse_bboxes(bbox_content)
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
|
244 |
if bboxes:
|
245 |
+
print("\n--- Resolving containment issues ---")
|
246 |
+
bboxes = resolve_containment(bboxes)
|
247 |
+
print("--- Containment resolved ---")
|
|
|
|
|
|
|
248 |
|
249 |
+
print(f"\n--- Detection Complete for run_id: {run_id} ---")
|
250 |
+
save_bboxes_to_json(bboxes, json_output_path)
|
251 |
+
draw_bboxes(image_path, bboxes, annotated_image_output_path)
|
252 |
else:
|
253 |
+
print(f"\nNo valid bounding box coordinates found for run_id: {run_id}")
|
254 |
+
# Still create an empty json file so the pipeline doesn't break
|
255 |
+
save_bboxes_to_json({}, json_output_path)
|
256 |
+
|
257 |
+
if __name__ == "__main__":
|
258 |
+
main()
|
screencoder/html_generator.py
CHANGED
@@ -3,109 +3,65 @@ from PIL import Image
|
|
3 |
import bs4
|
4 |
from threading import Thread
|
5 |
import time
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
18 |
<div>
|
19 |
your code here
|
20 |
</div>,
|
21 |
only return the code within the <div> and </div> tags""",
|
22 |
-
|
23 |
-
"header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
|
24 |
<div>
|
25 |
your code here
|
26 |
</div>,
|
27 |
only return the code within the <div> and </div> tags""",
|
28 |
-
|
29 |
-
"navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. The following is the code for filling in:
|
30 |
<div>
|
31 |
your code here
|
32 |
</div>,
|
33 |
only return the code within the <div> and </div> tags""",
|
34 |
-
|
35 |
-
"main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
|
36 |
<div>
|
37 |
your code here
|
38 |
</div>,
|
39 |
only return the code within the <div> and </div> tags""",
|
40 |
-
}
|
41 |
-
|
42 |
-
# PROMPT_sidebar = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
|
43 |
-
|
44 |
-
# <div>
|
45 |
-
# your code here
|
46 |
-
# </div>
|
47 |
-
|
48 |
-
# 只需返回<div>和</div>标签内的代码"""
|
49 |
-
|
50 |
-
# PROMPT_header = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
|
51 |
-
|
52 |
-
# <div>
|
53 |
-
# your code here
|
54 |
-
# </div>
|
55 |
-
|
56 |
-
# 只需返回<div>和</div>标签内的代码"""
|
57 |
-
|
58 |
-
# PROMPT_navigation = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码:
|
59 |
-
|
60 |
-
# <div>
|
61 |
-
# your code here
|
62 |
-
# </div>
|
63 |
-
|
64 |
-
# 只需返回<div>和</div>标签内的代码"""
|
65 |
-
|
66 |
-
# PROMPT_main_content = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必全部用与原始截图中对应图像同样大小的纯灰色图像块替换,不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
|
67 |
-
|
68 |
-
# <div>
|
69 |
-
# your code here
|
70 |
-
# </div>
|
71 |
-
|
72 |
-
# 只需返回<div>和</div>标签内的代码"""
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
"""generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
|
77 |
img = Image.open(img_path)
|
78 |
code_dict = {}
|
79 |
-
|
|
|
80 |
def _generate_code(node):
|
81 |
-
if node
|
82 |
bbox = node["bbox"]
|
83 |
-
# bbox is already in pixel coordinates [x1, y1, x2, y2]
|
84 |
cropped_img = img.crop(bbox)
|
85 |
|
86 |
-
|
87 |
-
if
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
elif node["type"] == "main content":
|
95 |
-
prompt = PROMPT_DICT["main content"]
|
96 |
-
else:
|
97 |
-
print(f"Unknown component type: {node['type']}")
|
98 |
-
return
|
99 |
else:
|
100 |
-
print("Node type not found")
|
101 |
-
return
|
102 |
-
|
103 |
-
try:
|
104 |
-
code = bot.ask(prompt, encode_image(cropped_img))
|
105 |
-
code_dict[node["id"]] = code
|
106 |
-
except Exception as e:
|
107 |
-
print(f"Error generating code for {node.get('type', 'unknown')}: {str(e)}")
|
108 |
-
code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
|
109 |
else:
|
110 |
for child in node["children"]:
|
111 |
_generate_code(child)
|
@@ -113,66 +69,8 @@ def generate_code(bbox_tree, img_path, bot):
|
|
113 |
_generate_code(bbox_tree)
|
114 |
return code_dict
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
"""generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
|
119 |
-
code_dict = {}
|
120 |
-
t_list = []
|
121 |
-
|
122 |
-
def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
|
123 |
-
"""Generate code with retry mechanism for rate limit errors"""
|
124 |
-
try:
|
125 |
-
# Create a new image instance for each thread
|
126 |
-
with Image.open(img_path) as img:
|
127 |
-
bbox = node["bbox"]
|
128 |
-
cropped_img = img.crop(bbox)
|
129 |
-
|
130 |
-
for attempt in range(max_retries):
|
131 |
-
try:
|
132 |
-
code = bot.ask(prompt, encode_image(cropped_img))
|
133 |
-
code_dict[node["id"]] = code
|
134 |
-
return
|
135 |
-
except Exception as e:
|
136 |
-
if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
|
137 |
-
print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
|
138 |
-
time.sleep(retry_delay)
|
139 |
-
retry_delay *= 2 # Exponential backoff
|
140 |
-
else:
|
141 |
-
print(f"Error generating code for node {node['id']}: {str(e)}")
|
142 |
-
code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
|
143 |
-
return
|
144 |
-
except Exception as e:
|
145 |
-
print(f"Error processing image for node {node['id']}: {str(e)}")
|
146 |
-
code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
|
147 |
-
|
148 |
-
def _generate_code(node):
|
149 |
-
if node["children"] == []:
|
150 |
-
t = Thread(target=lambda: _generate_code_with_retry(node))
|
151 |
-
t.start()
|
152 |
-
t_list.append(t)
|
153 |
-
else:
|
154 |
-
for child in node["children"]:
|
155 |
-
_generate_code(child)
|
156 |
-
|
157 |
-
_generate_code(bbox_tree)
|
158 |
-
|
159 |
-
# Wait for all threads to complete
|
160 |
-
for t in t_list:
|
161 |
-
t.join()
|
162 |
-
|
163 |
-
return code_dict
|
164 |
-
|
165 |
-
# Generate HTML from the bounding box tree
|
166 |
-
def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png"):
|
167 |
-
"""
|
168 |
-
Generates an HTML file with nested containers based on the bounding box tree.
|
169 |
-
|
170 |
-
:param bbox_tree: Dictionary representing the bounding box tree.
|
171 |
-
:param output_file: The name of the output HTML file.
|
172 |
-
"""
|
173 |
-
# HTML and CSS templates
|
174 |
-
# the container class is used to create grid and position the boxes
|
175 |
-
# include the tailwind css in the head tag
|
176 |
html_template_start = """
|
177 |
<!DOCTYPE html>
|
178 |
<html lang="en">
|
@@ -215,137 +113,110 @@ def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png
|
|
215 |
</html>
|
216 |
"""
|
217 |
|
218 |
-
|
219 |
-
def process_bbox(node, parent_width, parent_height, parent_left, parent_top, img):
|
220 |
bbox = node['bbox']
|
221 |
children = node.get('children', [])
|
222 |
-
|
223 |
-
|
224 |
-
# Calculate relative positions and sizes
|
225 |
left = (bbox[0] - parent_left) / parent_width * 100
|
226 |
top = (bbox[1] - parent_top) / parent_height * 100
|
227 |
width = (bbox[2] - bbox[0]) / parent_width * 100
|
228 |
height = (bbox[3] - bbox[1]) / parent_height * 100
|
229 |
|
230 |
-
|
231 |
-
html = f'''
|
232 |
-
<div id="{id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">
|
233 |
-
'''
|
234 |
-
|
235 |
if children:
|
236 |
-
|
237 |
-
html += '''
|
238 |
-
<div class="container">
|
239 |
-
'''
|
240 |
-
# Get the current box's width and height in pixels for child calculations
|
241 |
current_width = bbox[2] - bbox[0]
|
242 |
current_height = bbox[3] - bbox[1]
|
243 |
for child in children:
|
244 |
-
html += process_bbox(child, current_width, current_height, bbox[0], bbox[1]
|
245 |
-
html += ''
|
246 |
-
|
247 |
-
'''
|
248 |
-
|
249 |
-
# Close the box div
|
250 |
-
html += '''
|
251 |
-
</div>
|
252 |
-
'''
|
253 |
return html
|
254 |
|
255 |
root_bbox = bbox_tree['bbox']
|
256 |
root_children = bbox_tree.get('children', [])
|
257 |
-
root_width = root_bbox[2]
|
258 |
-
root_height = root_bbox[3]
|
259 |
-
root_x = root_bbox[0]
|
260 |
-
root_y = root_bbox[1]
|
261 |
|
262 |
html_content = html_template_start
|
263 |
for child in root_children:
|
264 |
-
html_content += process_bbox(child, root_width, root_height,
|
265 |
html_content += html_template_end
|
266 |
|
267 |
-
soup = bs4.BeautifulSoup(html_content, 'html.parser')
|
268 |
-
html_content = soup.prettify()
|
269 |
-
|
270 |
with open(output_file, 'w') as f:
|
271 |
-
f.write(html_content)
|
272 |
|
273 |
-
# Substitute the code in the html file
|
274 |
def code_substitution(html_file, code_dict):
|
275 |
-
"""
|
276 |
with open(html_file, "r") as f:
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
code = code.replace("```html", "").replace("```", "")
|
281 |
-
div = soup.find(id=id)
|
282 |
-
# replace the inner html of the div
|
283 |
if div:
|
284 |
-
div.append(bs4.BeautifulSoup(code, 'html.parser'))
|
285 |
with open(html_file, "w") as f:
|
286 |
f.write(soup.prettify())
|
287 |
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
-
|
295 |
-
|
296 |
|
297 |
-
img_path = "data/input/test1.png"
|
298 |
with Image.open(img_path) as img:
|
299 |
width, height = img.size
|
300 |
|
301 |
-
|
302 |
-
root = {
|
303 |
-
"bbox": [0, 0, width, height], # Use actual image dimensions
|
304 |
-
"children": []
|
305 |
-
}
|
306 |
|
307 |
-
#
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
# Add each region as a child with its type
|
316 |
-
for region in boxes_data.get("regions", []):
|
317 |
-
# Convert normalized coordinates to pixel coordinates
|
318 |
-
x = region["x"] * width
|
319 |
-
y = region["y"] * height
|
320 |
-
w = region["w"] * width
|
321 |
-
h = region["h"] * height
|
322 |
-
|
323 |
-
child = {
|
324 |
-
"bbox": [x, y, x + w, y + h], # Convert to [x1, y1, x2, y2] format
|
325 |
-
"children": [],
|
326 |
-
"type": region_type_mapping.get(region["id"], "unknown")
|
327 |
-
}
|
328 |
-
root["children"].append(child)
|
329 |
|
330 |
-
# Assign IDs to all nodes
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
return id
|
336 |
|
337 |
-
|
|
|
|
|
|
|
|
|
|
|
338 |
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
# bot = Qwen_2_5_VL("qwen_api.txt", model="qwen2.5-vl-72b-instruct")
|
346 |
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
# Substitute the generated code into the HTML
|
351 |
-
code_substitution('data/output/test1_layout.html', code_dict)
|
|
|
3 |
import bs4
|
4 |
from threading import Thread
|
5 |
import time
|
6 |
+
import argparse
|
7 |
+
import json
|
8 |
+
import os
|
9 |
+
|
10 |
+
# This dictionary holds the user's instructions for the current run.
|
11 |
+
user_instruction = {"sidebar": "", "header": "", "navigation": "", "main content": ""}
|
12 |
+
|
13 |
+
def get_args():
|
14 |
+
parser = argparse.ArgumentParser(description="Generates an HTML layout from bounding box data.")
|
15 |
+
parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
|
16 |
+
parser.add_argument('--instructions', type=str, help='A JSON string of instructions for different components.')
|
17 |
+
return parser.parse_args()
|
18 |
+
|
19 |
+
def get_prompt_dict(instructions):
|
20 |
+
"""Dynamically creates the prompt dictionary with the user's instructions."""
|
21 |
+
return {
|
22 |
+
"sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
|
23 |
<div>
|
24 |
your code here
|
25 |
</div>,
|
26 |
only return the code within the <div> and </div> tags""",
|
27 |
+
"header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
|
|
|
28 |
<div>
|
29 |
your code here
|
30 |
</div>,
|
31 |
only return the code within the <div> and </div> tags""",
|
32 |
+
"navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
|
|
|
33 |
<div>
|
34 |
your code here
|
35 |
</div>,
|
36 |
only return the code within the <div> and </div> tags""",
|
37 |
+
"main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
|
|
|
38 |
<div>
|
39 |
your code here
|
40 |
</div>,
|
41 |
only return the code within the <div> and </div> tags""",
|
42 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
def generate_code(bbox_tree, img_path, bot, instructions):
|
45 |
+
"""Generates code for each leaf node in the bounding box tree."""
|
|
|
46 |
img = Image.open(img_path)
|
47 |
code_dict = {}
|
48 |
+
prompt_dict = get_prompt_dict(instructions)
|
49 |
+
|
50 |
def _generate_code(node):
|
51 |
+
if not node.get("children"): # It's a leaf node
|
52 |
bbox = node["bbox"]
|
|
|
53 |
cropped_img = img.crop(bbox)
|
54 |
|
55 |
+
node_type = node.get("type")
|
56 |
+
if node_type and node_type in prompt_dict:
|
57 |
+
prompt = prompt_dict[node_type]
|
58 |
+
try:
|
59 |
+
code = bot.ask(prompt, encode_image(cropped_img))
|
60 |
+
code_dict[node["id"]] = code
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error generating code for {node_type}: {e}")
|
|
|
|
|
|
|
|
|
|
|
63 |
else:
|
64 |
+
print(f"Node type '{node_type}' not found or invalid.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
else:
|
66 |
for child in node["children"]:
|
67 |
_generate_code(child)
|
|
|
69 |
_generate_code(bbox_tree)
|
70 |
return code_dict
|
71 |
|
72 |
+
def generate_html(bbox_tree, output_file):
|
73 |
+
"""Generates an HTML file with nested containers based on the bounding box tree."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
html_template_start = """
|
75 |
<!DOCTYPE html>
|
76 |
<html lang="en">
|
|
|
113 |
</html>
|
114 |
"""
|
115 |
|
116 |
+
def process_bbox(node, parent_width, parent_height, parent_left, parent_top):
|
|
|
117 |
bbox = node['bbox']
|
118 |
children = node.get('children', [])
|
119 |
+
node_id = node['id']
|
120 |
+
|
|
|
121 |
left = (bbox[0] - parent_left) / parent_width * 100
|
122 |
top = (bbox[1] - parent_top) / parent_height * 100
|
123 |
width = (bbox[2] - bbox[0]) / parent_width * 100
|
124 |
height = (bbox[3] - bbox[1]) / parent_height * 100
|
125 |
|
126 |
+
html = f'<div id="{node_id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">'
|
|
|
|
|
|
|
|
|
127 |
if children:
|
128 |
+
html += '<div class="container">'
|
|
|
|
|
|
|
|
|
129 |
current_width = bbox[2] - bbox[0]
|
130 |
current_height = bbox[3] - bbox[1]
|
131 |
for child in children:
|
132 |
+
html += process_bbox(child, current_width, current_height, bbox[0], bbox[1])
|
133 |
+
html += '</div>'
|
134 |
+
html += '</div>'
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
return html
|
136 |
|
137 |
root_bbox = bbox_tree['bbox']
|
138 |
root_children = bbox_tree.get('children', [])
|
139 |
+
root_width = root_bbox[2] - root_bbox[0]
|
140 |
+
root_height = root_bbox[3] - root_bbox[1]
|
|
|
|
|
141 |
|
142 |
html_content = html_template_start
|
143 |
for child in root_children:
|
144 |
+
html_content += process_bbox(child, root_width, root_height, root_bbox[0], root_bbox[1])
|
145 |
html_content += html_template_end
|
146 |
|
|
|
|
|
|
|
147 |
with open(output_file, 'w') as f:
|
148 |
+
f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
|
149 |
|
|
|
150 |
def code_substitution(html_file, code_dict):
|
151 |
+
"""Substitutes the generated code into the HTML file."""
|
152 |
with open(html_file, "r") as f:
|
153 |
+
soup = bs4.BeautifulSoup(f.read(), 'html.parser')
|
154 |
+
for node_id, code in code_dict.items():
|
155 |
+
div = soup.find(id=node_id)
|
|
|
|
|
|
|
156 |
if div:
|
157 |
+
div.append(bs4.BeautifulSoup(code.replace("```html", "").replace("```", ""), 'html.parser'))
|
158 |
with open(html_file, "w") as f:
|
159 |
f.write(soup.prettify())
|
160 |
|
161 |
+
def main():
|
162 |
+
args = get_args()
|
163 |
+
if args.instructions:
|
164 |
+
try:
|
165 |
+
user_instruction.update(json.loads(args.instructions))
|
166 |
+
except json.JSONDecodeError:
|
167 |
+
print("Error: Could not decode instructions JSON.")
|
168 |
+
|
169 |
+
# --- Dynamic Path Construction ---
|
170 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
171 |
+
tmp_dir = os.path.join(base_dir, 'data', 'tmp', args.run_id)
|
172 |
+
output_dir = os.path.join(base_dir, 'data', 'output', args.run_id)
|
173 |
+
os.makedirs(output_dir, exist_ok=True)
|
174 |
+
|
175 |
+
input_json_path = os.path.join(tmp_dir, f"{args.run_id}_bboxes.json")
|
176 |
+
img_path = os.path.join(tmp_dir, f"{args.run_id}.png")
|
177 |
+
output_html_path = os.path.join(output_dir, f"{args.run_id}_layout.html")
|
178 |
+
|
179 |
+
if not os.path.exists(input_json_path) or not os.path.exists(img_path):
|
180 |
+
print("Error: Input bbox JSON or image file not found.")
|
181 |
+
exit(1)
|
182 |
+
|
183 |
+
print(f"--- Starting HTML Generation for run_id: {args.run_id} ---")
|
184 |
|
185 |
+
with open(input_json_path, 'r') as f:
|
186 |
+
boxes_data = json.load(f)
|
187 |
|
|
|
188 |
with Image.open(img_path) as img:
|
189 |
width, height = img.size
|
190 |
|
191 |
+
root = {"bbox": [0, 0, width, height], "children": [], "id": 0}
|
|
|
|
|
|
|
|
|
192 |
|
193 |
+
# Convert normalized bboxes to pixel coordinates
|
194 |
+
for name, norm_bbox in boxes_data.items():
|
195 |
+
x1 = int(norm_bbox[0] * width / 1000)
|
196 |
+
y1 = int(norm_bbox[1] * height / 1000)
|
197 |
+
x2 = int(norm_bbox[2] * width / 1000)
|
198 |
+
y2 = int(norm_bbox[3] * height / 1000)
|
199 |
+
root["children"].append({"bbox": [x1, y1, x2, y2], "type": name, "children": []})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
# Assign unique IDs to all nodes for code substitution
|
202 |
+
next_id = 1
|
203 |
+
for child in root["children"]:
|
204 |
+
child["id"] = next_id
|
205 |
+
next_id += 1
|
|
|
206 |
|
207 |
+
generate_html(root, output_html_path)
|
208 |
+
|
209 |
+
api_path = os.path.join(base_dir, "doubao_api.txt")
|
210 |
+
if not os.path.exists(api_path):
|
211 |
+
print(f"Error: API key not found at {api_path}")
|
212 |
+
exit(1)
|
213 |
|
214 |
+
bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
|
215 |
+
code_dict = generate_code(root, img_path, bot, user_instruction)
|
216 |
+
code_substitution(output_html_path, code_dict)
|
217 |
|
218 |
+
print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
|
219 |
+
print(f"--- HTML Generation Complete for run_id: {args.run_id} ---")
|
|
|
220 |
|
221 |
+
if __name__ == "__main__":
|
222 |
+
main()
|
|
|
|
|
|
screencoder/image_box_detection.py
CHANGED
@@ -115,11 +115,33 @@ def draw_bboxes_on_image(img, region_bboxes, placeholder_bboxes):
|
|
115 |
return boxed
|
116 |
|
117 |
|
118 |
-
def main(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
# Read original screenshot
|
120 |
-
img = cv2.imread(str(
|
121 |
if img is None:
|
122 |
-
sys.exit(f"Error: Cannot read image {
|
123 |
if img.std() < 5:
|
124 |
print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
|
125 |
|
@@ -127,10 +149,11 @@ def main(args):
|
|
127 |
|
128 |
# Parse HTML → Get bboxes
|
129 |
region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
|
130 |
-
extract_bboxes_from_html(
|
131 |
)
|
132 |
if not placeholder_bboxes:
|
133 |
-
|
|
|
134 |
|
135 |
# Calculate separate scale factors for X and Y to handle aspect ratio differences
|
136 |
scale_x = W / layout_width if layout_width > 0 else 1
|
@@ -163,10 +186,9 @@ def main(args):
|
|
163 |
overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
|
164 |
|
165 |
# Save debug image
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
print(f"Success: BBox overlay saved to {out_png}")
|
170 |
|
171 |
|
172 |
# Convert absolute pixel coordinates to proportions for the final JSON output
|
@@ -195,24 +217,19 @@ def main(args):
|
|
195 |
output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
|
196 |
print(output_json)
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
# ---------- CLI ----------
|
205 |
if __name__ == "__main__":
|
206 |
-
|
207 |
-
description="Draw BBoxes parsed from HTML on the original screenshot"
|
208 |
-
)
|
209 |
-
parser.add_argument("--html", required=False, type=Path, default=Path("data/output/test1_layout.html"),
|
210 |
-
help="Generated HTML file (with gray placeholder)")
|
211 |
-
parser.add_argument("--screenshot", required=False, type=Path, default=Path("data/input/test1.png"),
|
212 |
-
help="Original UI screenshot (with real thumbnails)")
|
213 |
-
parser.add_argument("--out", default=Path("data/tmp"), type=Path,
|
214 |
-
help="Output directory (save debug_gray_bboxes_test1.png)")
|
215 |
-
parser.add_argument("--json", type=Path, default=Path("data/tmp/test1_bboxes.json"),
|
216 |
-
help="If provided, write BBox list to JSON file")
|
217 |
-
args = parser.parse_args()
|
218 |
-
main(args)
|
|
|
115 |
return boxed
|
116 |
|
117 |
|
118 |
+
def main():
|
119 |
+
args = get_args()
|
120 |
+
run_id = args.run_id
|
121 |
+
|
122 |
+
# --- Dynamic Path Construction ---
|
123 |
+
base_dir = Path(__file__).parent.resolve()
|
124 |
+
# Go up one level to the project root to find the data directory
|
125 |
+
project_root = base_dir.parent
|
126 |
+
tmp_dir = project_root / 'screencoder' / 'data' / 'tmp' / run_id
|
127 |
+
output_dir = project_root / 'screencoder' / 'data' / 'output' / run_id
|
128 |
+
|
129 |
+
html_path = output_dir / f"{run_id}_layout.html"
|
130 |
+
screenshot_path = tmp_dir / f"{run_id}.png"
|
131 |
+
output_json_path = tmp_dir / f"{run_id}_bboxes.json"
|
132 |
+
debug_image_path = tmp_dir / f"debug_gray_bboxes_{run_id}.png"
|
133 |
+
|
134 |
+
if not html_path.exists():
|
135 |
+
sys.exit(f"Error: HTML file not found at {html_path}")
|
136 |
+
if not screenshot_path.exists():
|
137 |
+
sys.exit(f"Error: Screenshot not found at {screenshot_path}")
|
138 |
+
|
139 |
+
print(f"--- Starting Image Box Detection for run_id: {run_id} ---")
|
140 |
+
|
141 |
# Read original screenshot
|
142 |
+
img = cv2.imread(str(screenshot_path))
|
143 |
if img is None:
|
144 |
+
sys.exit(f"Error: Cannot read image {screenshot_path}")
|
145 |
if img.std() < 5:
|
146 |
print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
|
147 |
|
|
|
149 |
|
150 |
# Parse HTML → Get bboxes
|
151 |
region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
|
152 |
+
extract_bboxes_from_html(html_path)
|
153 |
)
|
154 |
if not placeholder_bboxes:
|
155 |
+
# This is not necessarily an error; some UIs might not have placeholders.
|
156 |
+
print("Info: No gray placeholder blocks found.")
|
157 |
|
158 |
# Calculate separate scale factors for X and Y to handle aspect ratio differences
|
159 |
scale_x = W / layout_width if layout_width > 0 else 1
|
|
|
186 |
overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
|
187 |
|
188 |
# Save debug image
|
189 |
+
debug_image_path.parent.mkdir(parents=True, exist_ok=True)
|
190 |
+
cv2.imwrite(str(debug_image_path), overlay)
|
191 |
+
print(f"Success: BBox overlay saved to {debug_image_path}")
|
|
|
192 |
|
193 |
|
194 |
# Convert absolute pixel coordinates to proportions for the final JSON output
|
|
|
217 |
output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
|
218 |
print(output_json)
|
219 |
|
220 |
+
output_json_path.parent.mkdir(parents=True, exist_ok=True)
|
221 |
+
output_json_path.write_text(output_json)
|
222 |
+
print(f"Success: BBox list saved to {output_json_path}")
|
223 |
+
print(f"--- Image Box Detection Complete for run_id: {run_id} ---")
|
224 |
|
225 |
+
def get_args():
|
226 |
+
parser = argparse.ArgumentParser(
|
227 |
+
description="Extracts placeholder bounding boxes from an HTML file and maps them to a screenshot."
|
228 |
+
)
|
229 |
+
parser.add_argument('--run_id', required=True, type=str,
|
230 |
+
help="A unique identifier for the processing run.")
|
231 |
+
return parser.parse_args()
|
232 |
|
233 |
# ---------- CLI ----------
|
234 |
if __name__ == "__main__":
|
235 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
screencoder/image_replacer.py
CHANGED
@@ -4,17 +4,45 @@ from pathlib import Path
|
|
4 |
from bs4 import BeautifulSoup
|
5 |
import cv2
|
6 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
def main(args):
|
9 |
# --- Phase 1: Crop and Save All Images First ---
|
10 |
|
11 |
# 1. Load data
|
12 |
-
mapping_data = json.loads(
|
13 |
-
uied_data = json.loads(
|
14 |
-
original_image = cv2.imread(str(
|
15 |
|
16 |
if original_image is None:
|
17 |
-
raise ValueError(f"Could not load the original image from {
|
18 |
|
19 |
# Get image shapes to calculate a simple, global scaling factor
|
20 |
H_proc, W_proc, _ = uied_data['img_shape']
|
@@ -29,7 +57,7 @@ def main(args):
|
|
29 |
}
|
30 |
|
31 |
# 2. Create a directory for cropped images
|
32 |
-
crop_dir =
|
33 |
crop_dir.mkdir(exist_ok=True)
|
34 |
print(f"Saving cropped images to: {crop_dir.resolve()}")
|
35 |
|
@@ -67,7 +95,7 @@ def main(args):
|
|
67 |
# --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
|
68 |
|
69 |
print("\nStarting offline HTML processing with BeautifulSoup...")
|
70 |
-
html_content =
|
71 |
soup = BeautifulSoup(html_content, 'html.parser')
|
72 |
|
73 |
# 1. Find all placeholder elements by their class, in document order.
|
@@ -115,27 +143,16 @@ def main(args):
|
|
115 |
ph_element.replace_with(img_tag)
|
116 |
|
117 |
# Save the modified HTML
|
118 |
-
|
119 |
|
120 |
print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
|
121 |
-
print(f"Final HTML generated at {
|
|
|
122 |
|
|
|
|
|
|
|
|
|
123 |
|
124 |
if __name__ == "__main__":
|
125 |
-
|
126 |
-
parser.add_argument("--mapping", type=Path, required=False, help="Path to the mapping JSON file from mapping.py.")
|
127 |
-
parser.add_argument("--uied", type=Path, required=False, help="Path to the UIED JSON file.")
|
128 |
-
parser.add_argument("--original-image", type=Path, required=False, help="Path to the original screenshot image.")
|
129 |
-
parser.add_argument("--gray-html", type=Path, required=False, help="Path to the input HTML file with gray placeholders.")
|
130 |
-
parser.add_argument("--output-html", type=Path, required=False, help="Path to save the final, modified HTML file.")
|
131 |
-
|
132 |
-
parser.set_defaults(
|
133 |
-
mapping=Path('data/tmp/mapping_full_test1.json'),
|
134 |
-
uied=Path('data/tmp/ip/test1.json'),
|
135 |
-
original_image=Path('data/input/test1.png'),
|
136 |
-
gray_html=Path('data/output/test1_layout.html'),
|
137 |
-
output_html=Path('data/output/test1_layout_final.html')
|
138 |
-
)
|
139 |
-
|
140 |
-
args = parser.parse_args()
|
141 |
-
main(args)
|
|
|
4 |
from bs4 import BeautifulSoup
|
5 |
import cv2
|
6 |
import re
|
7 |
+
import sys
|
8 |
+
|
9 |
+
def main():
|
10 |
+
args = get_args()
|
11 |
+
run_id = args.run_id
|
12 |
+
|
13 |
+
# --- Dynamic Path Construction ---
|
14 |
+
base_dir = Path(__file__).parent.resolve()
|
15 |
+
tmp_dir = base_dir / 'data' / 'tmp' / run_id
|
16 |
+
output_dir = base_dir / 'data' / 'output' / run_id
|
17 |
+
|
18 |
+
mapping_path = tmp_dir / f"mapping_full_{run_id}.json"
|
19 |
+
uied_path = tmp_dir / "ip" / f"{run_id}.json"
|
20 |
+
original_image_path = tmp_dir / f"{run_id}.png"
|
21 |
+
# This is the input HTML with placeholders
|
22 |
+
gray_html_path = output_dir / f"{run_id}_layout.html"
|
23 |
+
# This will be the final output of the entire pipeline
|
24 |
+
final_html_path = output_dir / f"{run_id}_layout_final.html"
|
25 |
+
|
26 |
+
# --- Input Validation ---
|
27 |
+
if not all([p.exists() for p in [mapping_path, uied_path, original_image_path, gray_html_path]]):
|
28 |
+
print("Error: One or more required input files are missing.", file=sys.stderr)
|
29 |
+
if not mapping_path.exists(): print(f"- Missing: {mapping_path}", file=sys.stderr)
|
30 |
+
if not uied_path.exists(): print(f"- Missing: {uied_path}", file=sys.stderr)
|
31 |
+
if not original_image_path.exists(): print(f"- Missing: {original_image_path}", file=sys.stderr)
|
32 |
+
if not gray_html_path.exists(): print(f"- Missing: {gray_html_path}", file=sys.stderr)
|
33 |
+
sys.exit(1)
|
34 |
+
|
35 |
+
print(f"--- Starting Image Replacement for run_id: {run_id} ---")
|
36 |
|
|
|
37 |
# --- Phase 1: Crop and Save All Images First ---
|
38 |
|
39 |
# 1. Load data
|
40 |
+
mapping_data = json.loads(mapping_path.read_text())
|
41 |
+
uied_data = json.loads(uied_path.read_text())
|
42 |
+
original_image = cv2.imread(str(original_image_path))
|
43 |
|
44 |
if original_image is None:
|
45 |
+
raise ValueError(f"Could not load the original image from {original_image_path}")
|
46 |
|
47 |
# Get image shapes to calculate a simple, global scaling factor
|
48 |
H_proc, W_proc, _ = uied_data['img_shape']
|
|
|
57 |
}
|
58 |
|
59 |
# 2. Create a directory for cropped images
|
60 |
+
crop_dir = final_html_path.parent / f"cropped_images_{run_id}"
|
61 |
crop_dir.mkdir(exist_ok=True)
|
62 |
print(f"Saving cropped images to: {crop_dir.resolve()}")
|
63 |
|
|
|
95 |
# --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
|
96 |
|
97 |
print("\nStarting offline HTML processing with BeautifulSoup...")
|
98 |
+
html_content = gray_html_path.read_text()
|
99 |
soup = BeautifulSoup(html_content, 'html.parser')
|
100 |
|
101 |
# 1. Find all placeholder elements by their class, in document order.
|
|
|
143 |
ph_element.replace_with(img_tag)
|
144 |
|
145 |
# Save the modified HTML
|
146 |
+
final_html_path.write_text(str(soup))
|
147 |
|
148 |
print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
|
149 |
+
print(f"Final HTML generated at {final_html_path.resolve()}")
|
150 |
+
print(f"--- Image Replacement Complete for run_id: {run_id} ---")
|
151 |
|
152 |
+
def get_args():
|
153 |
+
parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
|
154 |
+
parser.add_argument("--run_id", type=str, required=True, help="A unique identifier for the processing run.")
|
155 |
+
return parser.parse_args()
|
156 |
|
157 |
if __name__ == "__main__":
|
158 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
screencoder/main.py
CHANGED
@@ -2,16 +2,15 @@ import subprocess
|
|
2 |
import sys
|
3 |
import os
|
4 |
import json
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
# In a real scenario, this should be a more robust mechanism
|
9 |
-
def inject_prompt_to_generator(prompt_text):
|
10 |
if not prompt_text:
|
11 |
return
|
12 |
|
13 |
-
# In this example, we assume the prompt is a simple string for the "main content"
|
14 |
-
# A more complex implementation would parse a structured prompt
|
15 |
user_instruction = {
|
16 |
"sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
|
17 |
"header": "Make the Google logo look better; change the avatar color to be more appealing.",
|
@@ -19,123 +18,115 @@ def inject_prompt_to_generator(prompt_text):
|
|
19 |
"main content": prompt_text
|
20 |
}
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
lines = f.readlines()
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
with
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
|
46 |
-
|
47 |
-
print(f"Executing: python {script_path}")
|
48 |
-
print(f"{'='*20}")
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
try:
|
51 |
-
|
52 |
-
|
53 |
-
check=True,
|
54 |
-
capture_output=True,
|
55 |
-
text=True
|
56 |
-
)
|
57 |
-
print("Success!")
|
58 |
-
print("Output:")
|
59 |
print(result.stdout)
|
60 |
if result.stderr:
|
61 |
-
print("
|
62 |
print(result.stderr)
|
63 |
-
except FileNotFoundError:
|
64 |
-
print(f"ERROR: Script not found at '{script_path}'")
|
65 |
-
sys.exit(1)
|
66 |
except subprocess.CalledProcessError as e:
|
67 |
-
print(f"
|
68 |
-
print("Stdout:")
|
69 |
print(e.stdout)
|
70 |
-
print("Stderr:")
|
71 |
print(e.stderr)
|
72 |
-
|
73 |
-
except Exception as e:
|
74 |
-
print(f"An unexpected error occurred while running '{script_path}': {e}")
|
75 |
-
sys.exit(1)
|
76 |
|
77 |
-
|
78 |
-
def generate_html_for_demo(image_path, prompt, output_dir="screencoder/data/output"):
|
79 |
"""
|
80 |
-
A
|
81 |
-
It
|
|
|
|
|
|
|
82 |
"""
|
83 |
-
|
84 |
-
|
85 |
-
# Setup paths
|
86 |
-
project_root = os.path.dirname(__file__)
|
87 |
-
# The block_parsor script expects a specific input file name, so we must place our image there.
|
88 |
-
# IMPORTANT: This assumes a single-user-at-a-time workflow.
|
89 |
-
# For multi-user, you'd need isolated temp directories.
|
90 |
-
target_input_path = os.path.join(project_root, "data/input/test1.png")
|
91 |
|
92 |
-
|
93 |
-
os.
|
|
|
|
|
|
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
return f.read()
|
119 |
-
else:
|
120 |
-
return "<html><body><h1>Error: Final HTML not generated.</h1></body></html>"
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
def main():
|
124 |
-
"""Main function to run the entire Screencoder workflow."""
|
125 |
-
print("Starting the Screencoder full workflow...")
|
126 |
-
|
127 |
-
#
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
run_script("UIED/run_single.py")
|
136 |
-
run_script("mapping.py")
|
137 |
-
run_script("image_replacer.py")
|
138 |
-
|
139 |
print("\nScreencoder workflow completed successfully!")
|
140 |
|
141 |
if __name__ == "__main__":
|
|
|
2 |
import sys
|
3 |
import os
|
4 |
import json
|
5 |
+
import uuid
|
6 |
+
import shutil
|
7 |
+
from PIL import Image
|
8 |
|
9 |
+
# This function is now more robust, injecting the prompt into a temporary copy of the generator.
|
10 |
+
def inject_prompt_to_generator(prompt_text, temp_generator_path):
|
|
|
|
|
11 |
if not prompt_text:
|
12 |
return
|
13 |
|
|
|
|
|
14 |
user_instruction = {
|
15 |
"sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
|
16 |
"header": "Make the Google logo look better; change the avatar color to be more appealing.",
|
|
|
18 |
"main content": prompt_text
|
19 |
}
|
20 |
|
21 |
+
with open(temp_generator_path, 'r', encoding='utf-8') as f:
|
22 |
+
content = f.read()
|
|
|
23 |
|
24 |
+
start_marker = "user_instruction = {"
|
25 |
+
end_marker = "}"
|
26 |
+
start_index = content.find(start_marker)
|
27 |
+
end_index = content.find(end_marker, start_index)
|
28 |
+
|
29 |
+
if start_index != -1 and end_index != -1:
|
30 |
+
dict_str = f"user_instruction = {json.dumps(user_instruction, indent=4)}"
|
31 |
+
content = content[:start_index] + dict_str + content[end_index+1:]
|
32 |
+
|
33 |
+
with open(temp_generator_path, 'w', encoding='utf-8') as f:
|
34 |
+
f.write(content)
|
35 |
+
|
36 |
+
def run_script_with_run_id(script_name, run_id, instructions=None):
|
37 |
+
"""Executes a script with a specific run_id and optional instructions."""
|
38 |
+
screencoder_dir = os.path.dirname(os.path.abspath(__file__))
|
39 |
+
script_path = os.path.join(screencoder_dir, script_name)
|
40 |
+
if not os.path.exists(script_path):
|
41 |
+
# Handle scripts inside subdirectories like UIED/
|
42 |
+
script_path = os.path.join(screencoder_dir, "UIED", script_name)
|
43 |
|
44 |
+
command = ["python", script_path, "--run_id", run_id]
|
|
|
|
|
45 |
|
46 |
+
# Add instructions to the command if provided
|
47 |
+
if instructions and script_name == "html_generator.py":
|
48 |
+
instructions_json = json.dumps(instructions)
|
49 |
+
command.extend(["--instructions", instructions_json])
|
50 |
+
|
51 |
+
print(f"\n--- Running script: {script_name} ---")
|
52 |
try:
|
53 |
+
# Pass the current environment variables to the subprocess
|
54 |
+
result = subprocess.run(command, check=True, capture_output=True, text=True, env=os.environ)
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
print(result.stdout)
|
56 |
if result.stderr:
|
57 |
+
print("Error:")
|
58 |
print(result.stderr)
|
|
|
|
|
|
|
59 |
except subprocess.CalledProcessError as e:
|
60 |
+
print(f"Error executing {script_name}:")
|
|
|
61 |
print(e.stdout)
|
|
|
62 |
print(e.stderr)
|
63 |
+
raise # Re-raise the exception to stop the workflow if a script fails
|
|
|
|
|
|
|
64 |
|
65 |
+
def generate_html_for_demo(image_path, instructions):
|
|
|
66 |
"""
|
67 |
+
A refactored main function for Gradio demo integration.
|
68 |
+
It orchestrates the script executions for a single image processing run.
|
69 |
+
- Creates a unique run_id for each call.
|
70 |
+
- Sets up temporary directories for input and output.
|
71 |
+
- Cleans up temporary directories after execution.
|
72 |
"""
|
73 |
+
run_id = str(uuid.uuid4())
|
74 |
+
print(f"--- Starting Screencoder workflow for run_id: {run_id} ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
+
base_dir = os.path.dirname(os.path.abspath(__file__))
|
77 |
+
tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
|
78 |
+
output_dir = os.path.join(base_dir, 'data', 'output', run_id)
|
79 |
+
os.makedirs(tmp_dir, exist_ok=True)
|
80 |
+
os.makedirs(output_dir, exist_ok=True)
|
81 |
|
82 |
+
try:
|
83 |
+
# 1. Copy user-uploaded image to the temp input directory
|
84 |
+
new_image_path = os.path.join(tmp_dir, f"{run_id}.png")
|
85 |
+
img = Image.open(image_path)
|
86 |
+
img.save(new_image_path, "PNG")
|
87 |
+
|
88 |
+
# 2. Run the processing scripts in sequence
|
89 |
+
run_script_with_run_id("UIED/run_single.py", run_id)
|
90 |
+
run_script_with_run_id("block_parsor.py", run_id)
|
91 |
+
run_script_with_run_id("html_generator.py", run_id, instructions)
|
92 |
+
run_script_with_run_id("image_box_detection.py", run_id)
|
93 |
+
run_script_with_run_id("mapping.py", run_id)
|
94 |
+
run_script_with_run_id("image_replacer.py", run_id)
|
95 |
+
|
96 |
+
# 3. Read the final generated HTML
|
97 |
+
final_html_path = os.path.join(output_dir, f"{run_id}_layout_final.html")
|
98 |
+
if os.path.exists(final_html_path):
|
99 |
+
with open(final_html_path, 'r', encoding='utf-8') as f:
|
100 |
+
html_content = f.read()
|
101 |
+
print(f"Successfully generated HTML for run_id: {run_id}")
|
102 |
+
return html_content
|
103 |
+
else:
|
104 |
+
return f"Error: Final HTML file not found for run_id: {run_id}"
|
|
|
|
|
|
|
105 |
|
106 |
+
except Exception as e:
|
107 |
+
print(f"An error occurred during the workflow for run_id {run_id}: {e}")
|
108 |
+
return f"An error occurred: {e}"
|
109 |
+
finally:
|
110 |
+
# 4. Cleanup: Remove temporary directories
|
111 |
+
try:
|
112 |
+
# shutil.rmtree(tmp_dir)
|
113 |
+
# shutil.rmtree(output_dir)
|
114 |
+
print(f"Cleaned up temporary files for run_id: {run_id}")
|
115 |
+
except OSError as e:
|
116 |
+
print(f"Error cleaning up temporary files for run_id {run_id}: {e}")
|
117 |
|
118 |
def main():
|
119 |
+
"""Main function to run the entire Screencoder workflow (legacy)."""
|
120 |
+
print("Starting the Screencoder full workflow (legacy)...")
|
121 |
+
# This main function is now considered legacy and does not use dynamic run_ids.
|
122 |
+
# It will continue to use the hardcoded paths.
|
123 |
+
run_id = "test1" # Hardcoded for legacy main
|
124 |
+
run_script_with_run_id("UIED/run_single.py", run_id)
|
125 |
+
run_script_with_run_id("block_parsor.py", run_id)
|
126 |
+
run_script_with_run_id("html_generator.py", run_id)
|
127 |
+
run_script_with_run_id("image_box_detection.py", run_id)
|
128 |
+
run_script_with_run_id("mapping.py", run_id)
|
129 |
+
run_script_with_run_id("image_replacer.py", run_id)
|
|
|
|
|
|
|
|
|
130 |
print("\nScreencoder workflow completed successfully!")
|
131 |
|
132 |
if __name__ == "__main__":
|
screencoder/mapping.py
CHANGED
@@ -227,21 +227,41 @@ def generate_debug_overlay(img_path, all_uied_boxes, region_results, uied_shape,
|
|
227 |
cv2.imwrite(str(out_png), canvas)
|
228 |
|
229 |
|
230 |
-
def main(
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
|
|
|
|
234 |
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
if orig_img is None:
|
237 |
-
sys.exit(f"Error: Could not read debug source image at {
|
238 |
H_orig, W_orig, _ = orig_img.shape
|
239 |
|
240 |
# 2. Load proportional data and convert to absolute pixel coordinates
|
241 |
-
pixel_regions, pixel_placeholders = load_regions_and_placeholders(
|
242 |
|
243 |
# 3. Load UIED data
|
244 |
-
all_uied_boxes, uied_shape = load_uied_boxes(
|
245 |
|
246 |
if not pixel_placeholders or not all_uied_boxes:
|
247 |
print("Error: Could not proceed without placeholder and UIED data.")
|
@@ -294,21 +314,19 @@ def main(args):
|
|
294 |
# 6. Report and save results
|
295 |
print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
|
296 |
|
297 |
-
|
298 |
-
print(f"Mapping data written to {
|
299 |
|
300 |
-
if
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
306 |
|
307 |
if __name__ == "__main__":
|
308 |
-
|
309 |
-
ap.add_argument("--gray", type=Path, default=Path("data/tmp/test1_bboxes.json"), help="Path to the JSON file with gray placeholder boxes.")
|
310 |
-
ap.add_argument("--uied", type=Path, default=Path("data/tmp/ip/test1.json"), help="Path to the JSON file with UIED detected boxes.")
|
311 |
-
ap.add_argument("--out", default=Path("data/tmp/mapping_full_test1.json"), type=Path, help="Output path for the mapping JSON file.")
|
312 |
-
ap.add_argument("--debug", type=Path, default=Path("data/tmp/overlay_test_test1.png"), help="Output path for the debug overlay PNG.")
|
313 |
-
ap.add_argument("--debug-src", type=Path, default=Path("data/input/test1.png"), help="Path to the original screenshot for the debug overlay background.")
|
314 |
-
main(ap.parse_args())
|
|
|
227 |
cv2.imwrite(str(out_png), canvas)
|
228 |
|
229 |
|
230 |
+
def main():
|
231 |
+
args = get_args()
|
232 |
+
run_id = args.run_id
|
233 |
+
|
234 |
+
# --- Dynamic Path Construction ---
|
235 |
+
base_dir = Path(__file__).parent.resolve()
|
236 |
+
tmp_dir = base_dir / 'data' / 'tmp' / run_id
|
237 |
|
238 |
+
gray_json_path = tmp_dir / f"{run_id}_bboxes.json"
|
239 |
+
uied_json_path = tmp_dir / "ip" / f"{run_id}.json"
|
240 |
+
mapping_output_path = tmp_dir / f"mapping_full_{run_id}.json"
|
241 |
+
debug_src_path = tmp_dir / f"{run_id}.png"
|
242 |
+
debug_overlay_path = tmp_dir / f"overlay_test_{run_id}.png"
|
243 |
+
|
244 |
+
# --- Input Validation ---
|
245 |
+
if not gray_json_path.exists():
|
246 |
+
sys.exit(f"Error: Placeholder JSON not found at {gray_json_path}")
|
247 |
+
if not uied_json_path.exists():
|
248 |
+
sys.exit(f"Error: UIED JSON not found at {uied_json_path}")
|
249 |
+
if not debug_src_path.exists():
|
250 |
+
sys.exit(f"Error: Source image for coordinate conversion not found at {debug_src_path}")
|
251 |
+
|
252 |
+
print(f"--- Starting Mapping for run_id: {run_id} ---")
|
253 |
+
|
254 |
+
# 1. Load the original screenshot to get its absolute dimensions
|
255 |
+
orig_img = cv2.imread(str(debug_src_path))
|
256 |
if orig_img is None:
|
257 |
+
sys.exit(f"Error: Could not read debug source image at {debug_src_path}.")
|
258 |
H_orig, W_orig, _ = orig_img.shape
|
259 |
|
260 |
# 2. Load proportional data and convert to absolute pixel coordinates
|
261 |
+
pixel_regions, pixel_placeholders = load_regions_and_placeholders(gray_json_path, W_orig, H_orig)
|
262 |
|
263 |
# 3. Load UIED data
|
264 |
+
all_uied_boxes, uied_shape = load_uied_boxes(uied_json_path)
|
265 |
|
266 |
if not pixel_placeholders or not all_uied_boxes:
|
267 |
print("Error: Could not proceed without placeholder and UIED data.")
|
|
|
314 |
# 6. Report and save results
|
315 |
print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
|
316 |
|
317 |
+
mapping_output_path.write_text(json.dumps(final_results, indent=2, ensure_ascii=False))
|
318 |
+
print(f"Mapping data written to {mapping_output_path}")
|
319 |
|
320 |
+
# Always generate the debug image if the source exists
|
321 |
+
generate_debug_overlay(debug_src_path, all_uied_boxes, final_results, uied_shape, debug_overlay_path)
|
322 |
+
print(f"Debug image written to {debug_overlay_path}")
|
323 |
+
|
324 |
+
print(f"--- Mapping Complete for run_id: {run_id} ---")
|
325 |
+
|
326 |
+
def get_args():
|
327 |
+
ap = argparse.ArgumentParser(description="Map UIED components to placeholder boxes.")
|
328 |
+
ap.add_argument('--run_id', required=True, type=str, help="A unique identifier for the processing run.")
|
329 |
+
return ap.parse_args()
|
330 |
|
331 |
if __name__ == "__main__":
|
332 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|