Jimmyzheng-10 commited on
Commit
0246ff9
·
1 Parent(s): 1bcdbad
app.py CHANGED
@@ -5,20 +5,35 @@ import cv2
5
  import numpy as np
6
  from screencoder.main import generate_html_for_demo
7
 
8
- # Default Demo Examples
9
- SAMPLE_IMAGES_DIR = "screencoder/data/input"
10
- examples_data = []
11
- if os.path.exists(SAMPLE_IMAGES_DIR):
12
- sample_files = [f for f in sorted(os.listdir(SAMPLE_IMAGES_DIR)) if f.endswith(('.png', '.jpg', '.jpeg')) and not f.startswith('.')]
13
-
14
- for filename in sample_files:
15
- path = os.path.join(SAMPLE_IMAGES_DIR, filename)
16
- prompt = f"Generate a modern UI based on the '{filename}' example, focusing on a clean and intuitive layout."
17
- examples_data.append([path, prompt, path])
18
- else:
19
- print(f"Warning: Sample images directory not found at {SAMPLE_IMAGES_DIR}. Examples will be empty.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def process_image_and_prompt(image_np, image_path_from_state, prompt):
22
  final_image_path = ""
23
  is_temp_file = False
24
 
@@ -35,8 +50,15 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
35
  else:
36
  return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
37
 
38
- print(f"With prompt: '{prompt}'")
39
- html_content = generate_html_for_demo(final_image_path, prompt)
 
 
 
 
 
 
 
40
 
41
  if is_temp_file:
42
  os.unlink(final_image_path)
@@ -44,7 +66,7 @@ def process_image_and_prompt(image_np, image_path_from_state, prompt):
44
  return html_content, html_content
45
 
46
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
47
- active_image_path_state = gr.State(value=examples_data[0][2] if examples_data else None)
48
 
49
  gr.Markdown("# ScreenCoder: Screenshot to Code")
50
 
@@ -60,14 +82,13 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
60
 
61
  upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
62
 
63
- gr.Markdown("### Step 2: Write a Prompt (Optional)")
64
- prompt_input = gr.Textbox(
65
- label="Instructions",
66
- placeholder="e.g., 'Make this a dark theme and change the text.'",
67
- lines=3,
68
- value=examples_data[0][1] if examples_data else "Based on the layout, please fill in appropriate English text and beautify the image blocks."
69
- )
70
-
71
  generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
72
 
73
  with gr.Column(scale=2):
@@ -80,24 +101,26 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fi
80
  if examples_data:
81
  gr.Examples(
82
  examples=examples_data,
83
- inputs=[active_image],
 
 
84
  label="Click an example to try it out",
 
85
  )
86
 
87
  def handle_upload(uploaded_image_np):
88
- """On upload, update image, clear state, and set a generic prompt for user input."""
89
- default_prompt = "Based on the layout, please fill in appropriate English text and beautify the image blocks."
90
- return uploaded_image_np, None, default_prompt
91
 
92
  upload_button.upload(
93
  fn=handle_upload,
94
  inputs=upload_button,
95
- outputs=[active_image, active_image_path_state, prompt_input]
96
  )
97
 
98
  generate_btn.click(
99
  fn=process_image_and_prompt,
100
- inputs=[active_image, active_image_path_state, prompt_input],
101
  outputs=[html_preview, html_code_output],
102
  show_progress="full"
103
  )
 
5
  import numpy as np
6
  from screencoder.main import generate_html_for_demo
7
 
8
+ # Manually defined examples
9
+ examples_data = [
10
+ [
11
+ "screencoder/data/input/test1.png",
12
+ "",
13
+ "",
14
+ "",
15
+ "",
16
+ "screencoder/data/input/test1.png"
17
+ ],
18
+ [
19
+ "screencoder/data/input/test2.png",
20
+ "",
21
+ "",
22
+ "",
23
+ "",
24
+ "screencoder/data/input/test2.png"
25
+ ],
26
+ [
27
+ "screencoder/data/input/test3.png",
28
+ "",
29
+ "",
30
+ "",
31
+ "",
32
+ "screencoder/data/input/test3.png"
33
+ ],
34
+ ]
35
 
36
+ def process_image_and_prompt(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
37
  final_image_path = ""
38
  is_temp_file = False
39
 
 
50
  else:
51
  return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
52
 
53
+ instructions = {
54
+ "sidebar": sidebar_prompt,
55
+ "header": header_prompt,
56
+ "navigation": navigation_prompt,
57
+ "main content": main_content_prompt
58
+ }
59
+
60
+ print(f"With instructions: {instructions}")
61
+ html_content = generate_html_for_demo(final_image_path, instructions)
62
 
63
  if is_temp_file:
64
  os.unlink(final_image_path)
 
66
  return html_content, html_content
67
 
68
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
69
+ active_image_path_state = gr.State(value=examples_data[0][5] if examples_data else None)
70
 
71
  gr.Markdown("# ScreenCoder: Screenshot to Code")
72
 
 
82
 
83
  upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
84
 
85
+ gr.Markdown("### Step 2: Write Prompts (Optional)")
86
+ with gr.Accordion("Component-specific Prompts", open=True):
87
+ sidebar_prompt = gr.Textbox(label="Sidebar Prompt", placeholder="Instructions for the sidebar...", value="")
88
+ header_prompt = gr.Textbox(label="Header Prompt", placeholder="Instructions for the header...", value="")
89
+ navigation_prompt = gr.Textbox(label="Navigation Prompt", placeholder="Instructions for the navigation...", value="")
90
+ main_content_prompt = gr.Textbox(label="Main Content Prompt", placeholder="Instructions for the main content...", value="")
91
+
 
92
  generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
93
 
94
  with gr.Column(scale=2):
 
101
  if examples_data:
102
  gr.Examples(
103
  examples=examples_data,
104
+ fn=lambda *args: args, # Simply return all inputs
105
+ inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
106
+ outputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
107
  label="Click an example to try it out",
108
+ cache_examples=False,
109
  )
110
 
111
  def handle_upload(uploaded_image_np):
112
+ """On upload, update image, clear state, and set empty prompts."""
113
+ return uploaded_image_np, None, "", "", "", ""
 
114
 
115
  upload_button.upload(
116
  fn=handle_upload,
117
  inputs=upload_button,
118
+ outputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt]
119
  )
120
 
121
  generate_btn.click(
122
  fn=process_image_and_prompt,
123
+ inputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
124
  outputs=[html_preview, html_code_output],
125
  show_progress="full"
126
  )
screencoder/UIED/detect_compo/ip_region_proposal.py CHANGED
@@ -13,6 +13,38 @@ from config.CONFIG_UIED import Config
13
  C = Config()
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def nesting_inspection(org, grey, compos, ffl_block):
17
  '''
18
  Inspect all big compos through block division by flood-fill
@@ -87,6 +119,10 @@ def compo_detection(input_img_path, output_root, uied_params,
87
 
88
  # *** Step 7 *** save detection result
89
  Compo.compos_update(uicompos, org.shape)
 
 
 
 
90
  file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
91
  print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
92
  return uicompos
 
13
  C = Config()
14
 
15
 
16
+ def resolve_uicompo_containment(uicompos):
17
+ """
18
+ Resolves containment issues among UI components.
19
+ If a component's bounding box is fully contained within another's, it is removed.
20
+ """
21
+
22
+ def contains(bbox_a, bbox_b):
23
+ """Checks if bbox_a completely contains bbox_b."""
24
+ return bbox_a.col_min <= bbox_b.col_min and \
25
+ bbox_a.row_min <= bbox_b.row_min and \
26
+ bbox_a.col_max >= bbox_b.col_max and \
27
+ bbox_a.row_max >= bbox_b.row_max
28
+
29
+ compos_to_remove = set()
30
+ for i, compo1 in enumerate(uicompos):
31
+ for j, compo2 in enumerate(uicompos):
32
+ if i == j:
33
+ continue
34
+
35
+ # Check if compo1 contains compo2
36
+ if contains(compo1.bbox, compo2.bbox):
37
+ compos_to_remove.add(j)
38
+
39
+ # Filter out the contained components
40
+ final_compos = [compo for i, compo in enumerate(uicompos) if i not in compos_to_remove]
41
+
42
+ if len(final_compos) < len(uicompos):
43
+ print(f"Containment resolved: Removed {len(uicompos) - len(final_compos)} contained components.")
44
+
45
+ return final_compos
46
+
47
+
48
  def nesting_inspection(org, grey, compos, ffl_block):
49
  '''
50
  Inspect all big compos through block division by flood-fill
 
119
 
120
  # *** Step 7 *** save detection result
121
  Compo.compos_update(uicompos, org.shape)
122
+
123
+ # *** Step 8 *** Resolve containment before saving
124
+ uicompos = resolve_uicompo_containment(uicompos)
125
+
126
  file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
127
  print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
128
  return uicompos
screencoder/UIED/detect_compo/lib_ip/ip_detection.py CHANGED
@@ -361,13 +361,13 @@ def is_block(clip, thread=0.15):
361
  # top border - scan top down
362
  blank_count = 0
363
  for i in range(1, 5):
364
- if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
365
  blank_count += 1
366
  if blank_count > 2: return False
367
  # left border - scan left to right
368
  blank_count = 0
369
  for i in range(1, 5):
370
- if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
371
  blank_count += 1
372
  if blank_count > 2: return False
373
 
@@ -375,13 +375,13 @@ def is_block(clip, thread=0.15):
375
  # bottom border - scan bottom up
376
  blank_count = 0
377
  for i in range(-1, -5, -1):
378
- if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
379
  blank_count += 1
380
  if blank_count > 2: return False
381
  # right border - scan right to left
382
  blank_count = 0
383
  for i in range(-1, -5, -1):
384
- if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
385
  blank_count += 1
386
  if blank_count > 2: return False
387
  return True
 
361
  # top border - scan top down
362
  blank_count = 0
363
  for i in range(1, 5):
364
+ if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
365
  blank_count += 1
366
  if blank_count > 2: return False
367
  # left border - scan left to right
368
  blank_count = 0
369
  for i in range(1, 5):
370
+ if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
371
  blank_count += 1
372
  if blank_count > 2: return False
373
 
 
375
  # bottom border - scan bottom up
376
  blank_count = 0
377
  for i in range(-1, -5, -1):
378
+ if sum(clip[side + i].astype(np.int64)) / 255 > thread * clip.shape[1]:
379
  blank_count += 1
380
  if blank_count > 2: return False
381
  # right border - scan right to left
382
  blank_count = 0
383
  for i in range(-1, -5, -1):
384
+ if sum(clip[:, side + i].astype(np.int64)) / 255 > thread * clip.shape[0]:
385
  blank_count += 1
386
  if blank_count > 2: return False
387
  return True
screencoder/UIED/run_single.py CHANGED
@@ -1,9 +1,14 @@
1
- from os.path import join as pjoin
2
  import cv2
3
  import os
4
  import numpy as np
5
  import multiprocessing
 
 
6
 
 
 
 
 
7
 
8
  def resize_height_by_longest_edge(img_path, resize_length=800):
9
  org = cv2.imread(img_path)
@@ -30,6 +35,29 @@ def color_tips():
30
 
31
 
32
  if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Set multiprocessing start method to 'spawn' for macOS compatibility.
34
  # This must be done at the very beginning of the main block.
35
  try:
@@ -62,11 +90,11 @@ if __name__ == '__main__':
62
  'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
63
 
64
  # set input image path
65
- input_path_img = 'data/test1.png'
66
- output_root = 'data'
67
 
68
  resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
69
- color_tips()
70
 
71
  is_ip = True
72
  is_clf = False
@@ -99,4 +127,6 @@ if __name__ == '__main__':
99
  compo_path = pjoin(output_root, 'ip', str(name) + '.json')
100
  ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
101
  merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
102
- is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=True)
 
 
 
 
1
  import cv2
2
  import os
3
  import numpy as np
4
  import multiprocessing
5
+ import argparse
6
+ from os.path import join as pjoin
7
 
8
+ def get_args():
9
+ parser = argparse.ArgumentParser(description="Processes a single image for UI element detection.")
10
+ parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
11
+ return parser.parse_args()
12
 
13
  def resize_height_by_longest_edge(img_path, resize_length=800):
14
  org = cv2.imread(img_path)
 
35
 
36
 
37
  if __name__ == '__main__':
38
+ args = get_args()
39
+
40
+ # --- Dynamic Path Construction ---
41
+ # Construct paths based on the provided run_id
42
+ base_dir = os.path.dirname(os.path.abspath(__file__))
43
+ run_id = args.run_id
44
+
45
+ # The temporary directory for this specific run
46
+ tmp_dir = os.path.join(base_dir, '..', 'data', 'tmp', run_id)
47
+
48
+ # Input image path
49
+ input_path_img = os.path.join(tmp_dir, f"{run_id}.png")
50
+
51
+ # Output directory for this script's results
52
+ output_root = tmp_dir # All results (ip, ocr, etc.) will go into the run's tmp subdir.
53
+
54
+ if not os.path.exists(input_path_img):
55
+ print(f"Error: Input image not found at {input_path_img}")
56
+ exit(1)
57
+
58
+ print(f"--- Starting UIED processing for run_id: {run_id} ---")
59
+ print(f"Input image: {input_path_img}")
60
+ print(f"Output root: {output_root}")
61
  # Set multiprocessing start method to 'spawn' for macOS compatibility.
62
  # This must be done at the very beginning of the main block.
63
  try:
 
90
  'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}
91
 
92
  # set input image path
93
+ # input_path_img = 'data/test1.png'
94
+ # output_root = 'data'
95
 
96
  resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
97
+ # color_tips() # This shows a window, which is not suitable for a script.
98
 
99
  is_ip = True
100
  is_clf = False
 
127
  compo_path = pjoin(output_root, 'ip', str(name) + '.json')
128
  ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
129
  merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
130
+ is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=False)
131
+
132
+ print(f"--- UIED processing complete for run_id: {run_id} ---")
screencoder/block_parsor.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import cv2
3
  import json
 
4
  from utils import Doubao, encode_image, image_mask
5
 
6
  DEFAULT_IMAGE_PATH = "data/input/test1.png"
@@ -20,47 +21,34 @@ BBOX_TAG_END = "</bbox>"
20
  # PROMPT_navigation = "框出网页中的navigation的位置,请你只返回对应的bounding box。"
21
  # PROMPT_main_content = "框出网页中的main content的位置,请你只返回对应的bounding box。"
22
 
23
- # simple version of bbox parsing
24
- def parse_bboxes(bbox_input: str, image_path: str) -> dict[str, tuple[int, int, int, int]]:
25
- """Parse bounding box string to dictionary of named coordinate tuples"""
26
- bboxes = {}
27
- # print("Raw bbox input:", bbox_input) # Debug print
28
 
29
- image = cv2.imread(image_path)
30
- if image is None:
31
- print(f"Error: Failed to read image {image_path}")
32
- return bboxes
33
- h, w = image.shape[:2]
34
-
35
  try:
36
  components = bbox_input.strip().split('\n')
37
- # print("Split components:", components) # Debug print
38
-
39
  for component in components:
40
  component = component.strip()
41
  if not component:
42
  continue
43
-
44
  if ':' in component:
45
  name, bbox_str = component.split(':', 1)
46
  else:
47
  bbox_str = component
48
- if 'sidebar' in component.lower():
49
- name = 'sidebar'
50
- elif 'header' in component.lower():
51
- name = 'header'
52
- elif 'navigation' in component.lower():
53
- name = 'navigation'
54
- elif 'main content' in component.lower():
55
- name = 'main content'
56
- else:
57
- name = 'unknown'
58
 
59
  name = name.strip().lower()
60
  bbox_str = bbox_str.strip()
61
 
62
- # print(f"Processing component: {name}, bbox_str: {bbox_str}") # Debug print
63
-
64
  if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
65
  start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
66
  end_idx = bbox_str.find(BBOX_TAG_END)
@@ -69,82 +57,44 @@ def parse_bboxes(bbox_input: str, image_path: str) -> dict[str, tuple[int, int,
69
  try:
70
  norm_coords = list(map(int, coords_str.split()))
71
  if len(norm_coords) == 4:
72
- x_min = int(norm_coords[0])
73
- y_min = int(norm_coords[1])
74
- x_max = int(norm_coords[2])
75
- y_max = int(norm_coords[3])
76
- bboxes[name] = (x_min, y_min, x_max, y_max)
77
  print(f"Successfully parsed {name}: {bboxes[name]}")
78
- else:
79
- print(f"Invalid number of coordinates for {name}: {norm_coords}")
80
  except ValueError as e:
81
  print(f"Failed to parse coordinates for {name}: {e}")
82
- else:
83
- print(f"No bbox tags found in: {bbox_str}")
84
-
85
  except Exception as e:
86
  print(f"Coordinate parsing failed: {str(e)}")
87
- import traceback
88
- traceback.print_exc()
89
-
90
  print("Final parsed bboxes:", bboxes)
91
  return bboxes
92
 
93
- def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]]) -> str:
94
- """Draw bounding boxes on image and save with different colors for each component"""
95
  image = cv2.imread(image_path)
96
- if image is None:
97
- print(f"Error: Failed to read image {image_path}")
98
- return ""
99
 
100
  h, w = image.shape[:2]
101
- colors = {
102
- 'sidebar': (0, 0, 255), # Red
103
- 'header': (0, 255, 0), # Green
104
- 'navigation': (255, 0, 0), # Blue
105
- 'main content': (255, 255, 0), # Cyan
106
- 'unknown': (0, 0, 0), # Black
107
- }
108
 
 
109
  for component, norm_bbox in bboxes.items():
110
- # Convert normalized coordinates to pixel coordinates for drawing
111
  x_min = int(norm_bbox[0] * w / 1000)
112
  y_min = int(norm_bbox[1] * h / 1000)
113
  x_max = int(norm_bbox[2] * w / 1000)
114
  y_max = int(norm_bbox[3] * h / 1000)
115
 
116
  color = colors.get(component.lower(), (0, 0, 255))
117
- cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 3)
118
-
119
- # Add label
120
- cv2.putText(image, component, (x_min, y_min - 10),
121
- cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
122
-
123
- # Output directory
124
- output_dir = "data/tmp"
125
- os.makedirs(output_dir, exist_ok=True)
126
 
127
- # Get the original filename without path
128
- original_filename = os.path.basename(image_path)
129
- output_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_with_bboxes.png")
130
-
131
- if cv2.imwrite(output_path, image):
132
  print(f"Successfully saved annotated image: {output_path}")
133
  return output_path
134
- print("Error: Failed to save image")
135
  return ""
136
 
137
- def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], image_path: str) -> str:
138
- """Save bounding boxes information to a JSON file"""
139
- # Output directory
140
- output_dir = "data/tmp"
141
- os.makedirs(output_dir, exist_ok=True)
142
-
143
- original_filename = os.path.basename(image_path)
144
- json_path = os.path.join(output_dir, os.path.splitext(original_filename)[0] + "_bboxes.json")
145
-
146
  bboxes_dict = {k: list(v) for k, v in bboxes.items()}
147
-
148
  try:
149
  with open(json_path, 'w', encoding='utf-8') as f:
150
  json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
@@ -154,8 +104,38 @@ def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], image_path
154
  print(f"Error saving JSON file: {str(e)}")
155
  return ""
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  # sequential version of bbox parsing: Using recursive detection with mask
158
- def sequential_component_detection(image_path: str, api_path: str) -> dict[str, tuple[int, int, int, int]]:
159
  """
160
  Sequential processing flow: detect each component in turn, mask the image after each detection
161
  """
@@ -189,7 +169,7 @@ def sequential_component_detection(image_path: str, api_path: str) -> dict[str,
189
 
190
  masked_image = image_mask(current_image_path, norm_bbox)
191
 
192
- temp_image_path = f"data/temp_{component_name}_masked.png"
193
  masked_image.save(temp_image_path)
194
  current_image_path = temp_image_path
195
 
@@ -238,34 +218,41 @@ def main_content_processing(bboxes: dict[str, tuple[int, int, int, int]], image_
238
  int(bbox[3] * h / 1000))
239
 
240
 
241
- if __name__ == "__main__":
242
- image_path = DEFAULT_IMAGE_PATH
243
- api_path = DEFAULT_API_PATH
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- print("=== Starting Simple Component Detection ===")
246
- print(f"Input image: {image_path}")
247
- print(f"API path: {api_path}")
248
  client = Doubao(api_path)
249
  bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
250
- print(f"Model response: {bbox_content}\n")
251
- bboxes = parse_bboxes(bbox_content, image_path)
252
-
253
- # print("=== Starting Sequential Component Detection ===")
254
- # print(f"Input image: {image_path}")
255
- # print(f"API path: {api_path}")
256
- # bboxes = sequential_component_detection(image_path, api_path)
257
 
258
  if bboxes:
259
- print(f"\n=== Detection Complete ===")
260
- print(f"Found bounding boxes for components: {list(bboxes.keys())}")
261
- print(f"Total components detected: {len(bboxes)}")
262
-
263
- json_path = save_bboxes_to_json(bboxes, image_path)
264
- draw_bboxes(image_path, bboxes)
265
 
266
- print(f"\n=== Results ===")
267
- for component, bbox in bboxes.items():
268
- print(f"{component}: {bbox}")
269
  else:
270
- print("\nNo valid bounding box coordinates found")
271
- exit(1)
 
 
 
 
 
1
  import os
2
  import cv2
3
  import json
4
+ import argparse
5
  from utils import Doubao, encode_image, image_mask
6
 
7
  DEFAULT_IMAGE_PATH = "data/input/test1.png"
 
21
  # PROMPT_navigation = "框出网页中的navigation的位置,请你只返回对应的bounding box。"
22
  # PROMPT_main_content = "框出网页中的main content的位置,请你只返回对应的bounding box。"
23
 
24
+ def get_args():
25
+ parser = argparse.ArgumentParser(description="Parses bounding boxes from an image using a vision model.")
26
+ parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
27
+ return parser.parse_args()
 
28
 
29
+ def parse_bboxes(bbox_input: str) -> dict[str, tuple[int, int, int, int]]:
30
+ """Parse bounding box string to a dictionary of normalized (0-1000) coordinate tuples."""
31
+ bboxes = {}
 
 
 
32
  try:
33
  components = bbox_input.strip().split('\n')
 
 
34
  for component in components:
35
  component = component.strip()
36
  if not component:
37
  continue
38
+
39
  if ':' in component:
40
  name, bbox_str = component.split(':', 1)
41
  else:
42
  bbox_str = component
43
+ if 'sidebar' in component.lower(): name = 'sidebar'
44
+ elif 'header' in component.lower(): name = 'header'
45
+ elif 'navigation' in component.lower(): name = 'navigation'
46
+ elif 'main content' in component.lower(): name = 'main content'
47
+ else: name = 'unknown'
 
 
 
 
 
48
 
49
  name = name.strip().lower()
50
  bbox_str = bbox_str.strip()
51
 
 
 
52
  if BBOX_TAG_START in bbox_str and BBOX_TAG_END in bbox_str:
53
  start_idx = bbox_str.find(BBOX_TAG_START) + len(BBOX_TAG_START)
54
  end_idx = bbox_str.find(BBOX_TAG_END)
 
57
  try:
58
  norm_coords = list(map(int, coords_str.split()))
59
  if len(norm_coords) == 4:
60
+ bboxes[name] = tuple(norm_coords) # Directly store normalized coordinates
 
 
 
 
61
  print(f"Successfully parsed {name}: {bboxes[name]}")
 
 
62
  except ValueError as e:
63
  print(f"Failed to parse coordinates for {name}: {e}")
 
 
 
64
  except Exception as e:
65
  print(f"Coordinate parsing failed: {str(e)}")
66
+
 
 
67
  print("Final parsed bboxes:", bboxes)
68
  return bboxes
69
 
70
+ def draw_bboxes(image_path: str, bboxes: dict[str, tuple[int, int, int, int]], output_path: str) -> str:
71
+ """Draws normalized (0-1000) bboxes on an image for visualization."""
72
  image = cv2.imread(image_path)
73
+ if image is None: return ""
 
 
74
 
75
  h, w = image.shape[:2]
76
+ colors = {'sidebar': (0, 0, 255), 'header': (0, 255, 0), 'navigation': (255, 0, 0), 'main content': (255, 255, 0), 'unknown': (0, 0, 0)}
 
 
 
 
 
 
77
 
78
+ output_image = image.copy()
79
  for component, norm_bbox in bboxes.items():
 
80
  x_min = int(norm_bbox[0] * w / 1000)
81
  y_min = int(norm_bbox[1] * h / 1000)
82
  x_max = int(norm_bbox[2] * w / 1000)
83
  y_max = int(norm_bbox[3] * h / 1000)
84
 
85
  color = colors.get(component.lower(), (0, 0, 255))
86
+ cv2.rectangle(output_image, (x_min, y_min), (x_max, y_max), color, 3)
87
+ cv2.putText(output_image, component, (x_min, y_min - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
 
 
 
 
 
 
 
88
 
89
+ if cv2.imwrite(output_path, output_image):
 
 
 
 
90
  print(f"Successfully saved annotated image: {output_path}")
91
  return output_path
 
92
  return ""
93
 
94
+ def save_bboxes_to_json(bboxes: dict[str, tuple[int, int, int, int]], json_path: str) -> str:
95
+ """Saves the normalized bboxes to a JSON file."""
96
+ # This is the unified format: a dictionary of lists.
 
 
 
 
 
 
97
  bboxes_dict = {k: list(v) for k, v in bboxes.items()}
 
98
  try:
99
  with open(json_path, 'w', encoding='utf-8') as f:
100
  json.dump(bboxes_dict, f, indent=4, ensure_ascii=False)
 
104
  print(f"Error saving JSON file: {str(e)}")
105
  return ""
106
 
107
+ def resolve_containment(bboxes: dict[str, tuple[int, int, int, int]]) -> dict[str, tuple[int, int, int, int]]:
108
+ """
109
+ Resolves containment issues among bounding boxes.
110
+ If a box is found to be fully contained within another, it is removed.
111
+ This is based on the assumption that major layout components should not contain each other.
112
+ """
113
+
114
+ def contains(box_a, box_b):
115
+ """Checks if box_a completely contains box_b."""
116
+ xa1, ya1, xa2, ya2 = box_a
117
+ xb1, yb1, xb2, yb2 = box_b
118
+ return xa1 <= xb1 and ya1 <= yb1 and xa2 >= xb2 and ya2 >= yb2
119
+
120
+ names = list(bboxes.keys())
121
+ removed = set()
122
+
123
+ for i in range(len(names)):
124
+ for j in range(len(names)):
125
+ if i == j or names[i] in removed or names[j] in removed:
126
+ continue
127
+
128
+ name1, box1 = names[i], bboxes[names[i]]
129
+ name2, box2 = names[j], bboxes[names[j]]
130
+
131
+ if contains(box1, box2) or contains(box2, box1):
132
+ print(f"Containment found: '{name1}' contains '{name2}'. Removing '{name2}'.")
133
+ removed.add(name2)
134
+
135
+ return {name: bbox for name, bbox in bboxes.items() if name not in removed}
136
+
137
  # sequential version of bbox parsing: Using recursive detection with mask
138
+ def sequential_component_detection(image_path: str, api_path: str, temp_dir: str) -> dict[str, tuple[int, int, int, int]]:
139
  """
140
  Sequential processing flow: detect each component in turn, mask the image after each detection
141
  """
 
169
 
170
  masked_image = image_mask(current_image_path, norm_bbox)
171
 
172
+ temp_image_path = os.path.join(temp_dir, f"temp_{component_name}_masked.png")
173
  masked_image.save(temp_image_path)
174
  current_image_path = temp_image_path
175
 
 
218
  int(bbox[3] * h / 1000))
219
 
220
 
221
+ def main():
222
+ args = get_args()
223
+ run_id = args.run_id
224
+
225
+ # --- Dynamic Path Construction ---
226
+ base_dir = os.path.dirname(os.path.abspath(__file__))
227
+ tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
228
+
229
+ image_path = os.path.join(tmp_dir, f"{run_id}.png")
230
+ api_path = os.path.join(base_dir, "doubao_api.txt")
231
+ json_output_path = os.path.join(tmp_dir, f"{run_id}_bboxes.json")
232
+ annotated_image_output_path = os.path.join(tmp_dir, f"{run_id}_with_bboxes.png")
233
+
234
+ if not os.path.exists(image_path) or not os.path.exists(api_path):
235
+ print(f"Error: Input image or API key file not found.")
236
+ exit(1)
237
 
238
+ print(f"--- Starting BBox Parsing for run_id: {run_id} ---")
239
+
 
240
  client = Doubao(api_path)
241
  bbox_content = client.ask(PROMPT_MERGE, encode_image(image_path))
242
+ bboxes = parse_bboxes(bbox_content)
 
 
 
 
 
 
243
 
244
  if bboxes:
245
+ print("\n--- Resolving containment issues ---")
246
+ bboxes = resolve_containment(bboxes)
247
+ print("--- Containment resolved ---")
 
 
 
248
 
249
+ print(f"\n--- Detection Complete for run_id: {run_id} ---")
250
+ save_bboxes_to_json(bboxes, json_output_path)
251
+ draw_bboxes(image_path, bboxes, annotated_image_output_path)
252
  else:
253
+ print(f"\nNo valid bounding box coordinates found for run_id: {run_id}")
254
+ # Still create an empty json file so the pipeline doesn't break
255
+ save_bboxes_to_json({}, json_output_path)
256
+
257
+ if __name__ == "__main__":
258
+ main()
screencoder/html_generator.py CHANGED
@@ -3,109 +3,65 @@ from PIL import Image
3
  import bs4
4
  from threading import Thread
5
  import time
6
-
7
- # This dictionary can now be dynamically updated by an external script.
8
- user_instruction = {
9
- "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
10
- "header": "Make the Google logo look better; change the avatar color to be more appealing.",
11
- "navigation": "Please beautify the layout.",
12
- "main content": "Based on the layout, please fill in appropriate English text and beautify the image blocks."
13
- }
14
-
15
- # Prompt for each component
16
- PROMPT_DICT = {
17
- "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
 
 
 
 
 
18
  <div>
19
  your code here
20
  </div>,
21
  only return the code within the <div> and </div> tags""",
22
-
23
- "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
24
  <div>
25
  your code here
26
  </div>,
27
  only return the code within the <div> and </div> tags""",
28
-
29
- "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. The following is the code for filling in:
30
  <div>
31
  your code here
32
  </div>,
33
  only return the code within the <div> and </div> tags""",
34
-
35
- "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
36
  <div>
37
  your code here
38
  </div>,
39
  only return the code within the <div> and </div> tags""",
40
- }
41
-
42
- # PROMPT_sidebar = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
43
-
44
- # <div>
45
- # your code here
46
- # </div>
47
-
48
- # 只需返回<div>和</div>标签内的代码"""
49
-
50
- # PROMPT_header = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
51
-
52
- # <div>
53
- # your code here
54
- # </div>
55
-
56
- # 只需返回<div>和</div>标签内的代码"""
57
-
58
- # PROMPT_navigation = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码:
59
-
60
- # <div>
61
- # your code here
62
- # </div>
63
-
64
- # 只需返回<div>和</div>标签内的代码"""
65
-
66
- # PROMPT_main_content = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必全部用与原始截图中对应图像同样大小的纯灰色图像块替换,不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
67
-
68
- # <div>
69
- # your code here
70
- # </div>
71
-
72
- # 只需返回<div>和</div>标签内的代码"""
73
 
74
- # Generate code for each component
75
- def generate_code(bbox_tree, img_path, bot):
76
- """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
77
  img = Image.open(img_path)
78
  code_dict = {}
79
-
 
80
  def _generate_code(node):
81
- if node["children"] == []:
82
  bbox = node["bbox"]
83
- # bbox is already in pixel coordinates [x1, y1, x2, y2]
84
  cropped_img = img.crop(bbox)
85
 
86
- # Select prompt based on node type
87
- if "type" in node:
88
- if node["type"] == "sidebar":
89
- prompt = PROMPT_DICT["sidebar"]
90
- elif node["type"] == "header":
91
- prompt = PROMPT_DICT["header"]
92
- elif node["type"] == "navigation":
93
- prompt = PROMPT_DICT["navigation"]
94
- elif node["type"] == "main content":
95
- prompt = PROMPT_DICT["main content"]
96
- else:
97
- print(f"Unknown component type: {node['type']}")
98
- return
99
  else:
100
- print("Node type not found")
101
- return
102
-
103
- try:
104
- code = bot.ask(prompt, encode_image(cropped_img))
105
- code_dict[node["id"]] = code
106
- except Exception as e:
107
- print(f"Error generating code for {node.get('type', 'unknown')}: {str(e)}")
108
- code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
109
  else:
110
  for child in node["children"]:
111
  _generate_code(child)
@@ -113,66 +69,8 @@ def generate_code(bbox_tree, img_path, bot):
113
  _generate_code(bbox_tree)
114
  return code_dict
115
 
116
- # Generate code for each component in parallel
117
- # def generate_code_parallel(bbox_tree, img_path, prompt, bot):
118
- """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
119
- code_dict = {}
120
- t_list = []
121
-
122
- def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
123
- """Generate code with retry mechanism for rate limit errors"""
124
- try:
125
- # Create a new image instance for each thread
126
- with Image.open(img_path) as img:
127
- bbox = node["bbox"]
128
- cropped_img = img.crop(bbox)
129
-
130
- for attempt in range(max_retries):
131
- try:
132
- code = bot.ask(prompt, encode_image(cropped_img))
133
- code_dict[node["id"]] = code
134
- return
135
- except Exception as e:
136
- if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
137
- print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
138
- time.sleep(retry_delay)
139
- retry_delay *= 2 # Exponential backoff
140
- else:
141
- print(f"Error generating code for node {node['id']}: {str(e)}")
142
- code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
143
- return
144
- except Exception as e:
145
- print(f"Error processing image for node {node['id']}: {str(e)}")
146
- code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
147
-
148
- def _generate_code(node):
149
- if node["children"] == []:
150
- t = Thread(target=lambda: _generate_code_with_retry(node))
151
- t.start()
152
- t_list.append(t)
153
- else:
154
- for child in node["children"]:
155
- _generate_code(child)
156
-
157
- _generate_code(bbox_tree)
158
-
159
- # Wait for all threads to complete
160
- for t in t_list:
161
- t.join()
162
-
163
- return code_dict
164
-
165
- # Generate HTML from the bounding box tree
166
- def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png"):
167
- """
168
- Generates an HTML file with nested containers based on the bounding box tree.
169
-
170
- :param bbox_tree: Dictionary representing the bounding box tree.
171
- :param output_file: The name of the output HTML file.
172
- """
173
- # HTML and CSS templates
174
- # the container class is used to create grid and position the boxes
175
- # include the tailwind css in the head tag
176
  html_template_start = """
177
  <!DOCTYPE html>
178
  <html lang="en">
@@ -215,137 +113,110 @@ def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png
215
  </html>
216
  """
217
 
218
- # Function to recursively generate HTML
219
- def process_bbox(node, parent_width, parent_height, parent_left, parent_top, img):
220
  bbox = node['bbox']
221
  children = node.get('children', [])
222
- id = node['id']
223
-
224
- # Calculate relative positions and sizes
225
  left = (bbox[0] - parent_left) / parent_width * 100
226
  top = (bbox[1] - parent_top) / parent_height * 100
227
  width = (bbox[2] - bbox[0]) / parent_width * 100
228
  height = (bbox[3] - bbox[1]) / parent_height * 100
229
 
230
- # Start the box div
231
- html = f'''
232
- <div id="{id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">
233
- '''
234
-
235
  if children:
236
- # If there are children, add a nested container
237
- html += '''
238
- <div class="container">
239
- '''
240
- # Get the current box's width and height in pixels for child calculations
241
  current_width = bbox[2] - bbox[0]
242
  current_height = bbox[3] - bbox[1]
243
  for child in children:
244
- html += process_bbox(child, current_width, current_height, bbox[0], bbox[1], img)
245
- html += '''
246
- </div>
247
- '''
248
-
249
- # Close the box div
250
- html += '''
251
- </div>
252
- '''
253
  return html
254
 
255
  root_bbox = bbox_tree['bbox']
256
  root_children = bbox_tree.get('children', [])
257
- root_width = root_bbox[2]
258
- root_height = root_bbox[3]
259
- root_x = root_bbox[0]
260
- root_y = root_bbox[1]
261
 
262
  html_content = html_template_start
263
  for child in root_children:
264
- html_content += process_bbox(child, root_width, root_height, root_x, root_y, img)
265
  html_content += html_template_end
266
 
267
- soup = bs4.BeautifulSoup(html_content, 'html.parser')
268
- html_content = soup.prettify()
269
-
270
  with open(output_file, 'w') as f:
271
- f.write(html_content)
272
 
273
- # Substitute the code in the html file
274
  def code_substitution(html_file, code_dict):
275
- """substitute the code in the html file"""
276
  with open(html_file, "r") as f:
277
- html = f.read()
278
- soup = bs4.BeautifulSoup(html, 'html.parser')
279
- for id, code in code_dict.items():
280
- code = code.replace("```html", "").replace("```", "")
281
- div = soup.find(id=id)
282
- # replace the inner html of the div
283
  if div:
284
- div.append(bs4.BeautifulSoup(code, 'html.parser'))
285
  with open(html_file, "w") as f:
286
  f.write(soup.prettify())
287
 
288
- # Main
289
- if __name__ == "__main__":
290
- import json
291
- import time
292
- from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- # Load bboxes from block_parsing.py output
295
- boxes_data = json.load(open("data/tmp/test1_bboxes.json"))
296
 
297
- img_path = "data/input/test1.png"
298
  with Image.open(img_path) as img:
299
  width, height = img.size
300
 
301
- # Create root node with actual image dimensions
302
- root = {
303
- "bbox": [0, 0, width, height], # Use actual image dimensions
304
- "children": []
305
- }
306
 
307
- # Map region IDs to component types
308
- region_type_mapping = {
309
- "1": "sidebar",
310
- "2": "header",
311
- "3": "navigation",
312
- "4": "main content"
313
- }
314
-
315
- # Add each region as a child with its type
316
- for region in boxes_data.get("regions", []):
317
- # Convert normalized coordinates to pixel coordinates
318
- x = region["x"] * width
319
- y = region["y"] * height
320
- w = region["w"] * width
321
- h = region["h"] * height
322
-
323
- child = {
324
- "bbox": [x, y, x + w, y + h], # Convert to [x1, y1, x2, y2] format
325
- "children": [],
326
- "type": region_type_mapping.get(region["id"], "unknown")
327
- }
328
- root["children"].append(child)
329
 
330
- # Assign IDs to all nodes
331
- def assign_id(node, id):
332
- node["id"] = id
333
- for child in node.get("children", []):
334
- id = assign_id(child, id+1)
335
- return id
336
 
337
- assign_id(root, 0)
 
 
 
 
 
338
 
339
- # print(root)
340
- # Generate initial HTML layout
341
- generate_html(root, 'data/output/test1_layout.html')
342
 
343
- # Initialize the bot
344
- bot = Doubao("doubao_api.txt", model = "doubao-1.5-thinking-vision-pro-250428")
345
- # bot = Qwen_2_5_VL("qwen_api.txt", model="qwen2.5-vl-72b-instruct")
346
 
347
- # Generate code for each component
348
- code_dict = generate_code(root, img_path, bot)
349
-
350
- # Substitute the generated code into the HTML
351
- code_substitution('data/output/test1_layout.html', code_dict)
 
3
  import bs4
4
  from threading import Thread
5
  import time
6
+ import argparse
7
+ import json
8
+ import os
9
+
10
+ # This dictionary holds the user's instructions for the current run.
11
+ user_instruction = {"sidebar": "", "header": "", "navigation": "", "main content": ""}
12
+
13
+ def get_args():
14
+ parser = argparse.ArgumentParser(description="Generates an HTML layout from bounding box data.")
15
+ parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
16
+ parser.add_argument('--instructions', type=str, help='A JSON string of instructions for different components.')
17
+ return parser.parse_args()
18
+
19
+ def get_prompt_dict(instructions):
20
+ """Dynamically creates the prompt dictionary with the user's instructions."""
21
+ return {
22
+ "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
23
  <div>
24
  your code here
25
  </div>,
26
  only return the code within the <div> and </div> tags""",
27
+ "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
 
28
  <div>
29
  your code here
30
  </div>,
31
  only return the code within the <div> and </div> tags""",
32
+ "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
 
33
  <div>
34
  your code here
35
  </div>,
36
  only return the code within the <div> and </div> tags""",
37
+ "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
 
38
  <div>
39
  your code here
40
  </div>,
41
  only return the code within the <div> and </div> tags""",
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ def generate_code(bbox_tree, img_path, bot, instructions):
45
+ """Generates code for each leaf node in the bounding box tree."""
 
46
  img = Image.open(img_path)
47
  code_dict = {}
48
+ prompt_dict = get_prompt_dict(instructions)
49
+
50
  def _generate_code(node):
51
+ if not node.get("children"): # It's a leaf node
52
  bbox = node["bbox"]
 
53
  cropped_img = img.crop(bbox)
54
 
55
+ node_type = node.get("type")
56
+ if node_type and node_type in prompt_dict:
57
+ prompt = prompt_dict[node_type]
58
+ try:
59
+ code = bot.ask(prompt, encode_image(cropped_img))
60
+ code_dict[node["id"]] = code
61
+ except Exception as e:
62
+ print(f"Error generating code for {node_type}: {e}")
 
 
 
 
 
63
  else:
64
+ print(f"Node type '{node_type}' not found or invalid.")
 
 
 
 
 
 
 
 
65
  else:
66
  for child in node["children"]:
67
  _generate_code(child)
 
69
  _generate_code(bbox_tree)
70
  return code_dict
71
 
72
+ def generate_html(bbox_tree, output_file):
73
+ """Generates an HTML file with nested containers based on the bounding box tree."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  html_template_start = """
75
  <!DOCTYPE html>
76
  <html lang="en">
 
113
  </html>
114
  """
115
 
116
+ def process_bbox(node, parent_width, parent_height, parent_left, parent_top):
 
117
  bbox = node['bbox']
118
  children = node.get('children', [])
119
+ node_id = node['id']
120
+
 
121
  left = (bbox[0] - parent_left) / parent_width * 100
122
  top = (bbox[1] - parent_top) / parent_height * 100
123
  width = (bbox[2] - bbox[0]) / parent_width * 100
124
  height = (bbox[3] - bbox[1]) / parent_height * 100
125
 
126
+ html = f'<div id="{node_id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">'
 
 
 
 
127
  if children:
128
+ html += '<div class="container">'
 
 
 
 
129
  current_width = bbox[2] - bbox[0]
130
  current_height = bbox[3] - bbox[1]
131
  for child in children:
132
+ html += process_bbox(child, current_width, current_height, bbox[0], bbox[1])
133
+ html += '</div>'
134
+ html += '</div>'
 
 
 
 
 
 
135
  return html
136
 
137
  root_bbox = bbox_tree['bbox']
138
  root_children = bbox_tree.get('children', [])
139
+ root_width = root_bbox[2] - root_bbox[0]
140
+ root_height = root_bbox[3] - root_bbox[1]
 
 
141
 
142
  html_content = html_template_start
143
  for child in root_children:
144
+ html_content += process_bbox(child, root_width, root_height, root_bbox[0], root_bbox[1])
145
  html_content += html_template_end
146
 
 
 
 
147
  with open(output_file, 'w') as f:
148
+ f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
149
 
 
150
  def code_substitution(html_file, code_dict):
151
+ """Substitutes the generated code into the HTML file."""
152
  with open(html_file, "r") as f:
153
+ soup = bs4.BeautifulSoup(f.read(), 'html.parser')
154
+ for node_id, code in code_dict.items():
155
+ div = soup.find(id=node_id)
 
 
 
156
  if div:
157
+ div.append(bs4.BeautifulSoup(code.replace("```html", "").replace("```", ""), 'html.parser'))
158
  with open(html_file, "w") as f:
159
  f.write(soup.prettify())
160
 
161
+ def main():
162
+ args = get_args()
163
+ if args.instructions:
164
+ try:
165
+ user_instruction.update(json.loads(args.instructions))
166
+ except json.JSONDecodeError:
167
+ print("Error: Could not decode instructions JSON.")
168
+
169
+ # --- Dynamic Path Construction ---
170
+ base_dir = os.path.dirname(os.path.abspath(__file__))
171
+ tmp_dir = os.path.join(base_dir, 'data', 'tmp', args.run_id)
172
+ output_dir = os.path.join(base_dir, 'data', 'output', args.run_id)
173
+ os.makedirs(output_dir, exist_ok=True)
174
+
175
+ input_json_path = os.path.join(tmp_dir, f"{args.run_id}_bboxes.json")
176
+ img_path = os.path.join(tmp_dir, f"{args.run_id}.png")
177
+ output_html_path = os.path.join(output_dir, f"{args.run_id}_layout.html")
178
+
179
+ if not os.path.exists(input_json_path) or not os.path.exists(img_path):
180
+ print("Error: Input bbox JSON or image file not found.")
181
+ exit(1)
182
+
183
+ print(f"--- Starting HTML Generation for run_id: {args.run_id} ---")
184
 
185
+ with open(input_json_path, 'r') as f:
186
+ boxes_data = json.load(f)
187
 
 
188
  with Image.open(img_path) as img:
189
  width, height = img.size
190
 
191
+ root = {"bbox": [0, 0, width, height], "children": [], "id": 0}
 
 
 
 
192
 
193
+ # Convert normalized bboxes to pixel coordinates
194
+ for name, norm_bbox in boxes_data.items():
195
+ x1 = int(norm_bbox[0] * width / 1000)
196
+ y1 = int(norm_bbox[1] * height / 1000)
197
+ x2 = int(norm_bbox[2] * width / 1000)
198
+ y2 = int(norm_bbox[3] * height / 1000)
199
+ root["children"].append({"bbox": [x1, y1, x2, y2], "type": name, "children": []})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ # Assign unique IDs to all nodes for code substitution
202
+ next_id = 1
203
+ for child in root["children"]:
204
+ child["id"] = next_id
205
+ next_id += 1
 
206
 
207
+ generate_html(root, output_html_path)
208
+
209
+ api_path = os.path.join(base_dir, "doubao_api.txt")
210
+ if not os.path.exists(api_path):
211
+ print(f"Error: API key not found at {api_path}")
212
+ exit(1)
213
 
214
+ bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
215
+ code_dict = generate_code(root, img_path, bot, user_instruction)
216
+ code_substitution(output_html_path, code_dict)
217
 
218
+ print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
219
+ print(f"--- HTML Generation Complete for run_id: {args.run_id} ---")
 
220
 
221
+ if __name__ == "__main__":
222
+ main()
 
 
 
screencoder/image_box_detection.py CHANGED
@@ -115,11 +115,33 @@ def draw_bboxes_on_image(img, region_bboxes, placeholder_bboxes):
115
  return boxed
116
 
117
 
118
- def main(args):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  # Read original screenshot
120
- img = cv2.imread(str(args.screenshot))
121
  if img is None:
122
- sys.exit(f"Error: Cannot read image {args.screenshot}")
123
  if img.std() < 5:
124
  print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
125
 
@@ -127,10 +149,11 @@ def main(args):
127
 
128
  # Parse HTML → Get bboxes
129
  region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
130
- extract_bboxes_from_html(args.html)
131
  )
132
  if not placeholder_bboxes:
133
- sys.exit("Error: No gray placeholder blocks found!")
 
134
 
135
  # Calculate separate scale factors for X and Y to handle aspect ratio differences
136
  scale_x = W / layout_width if layout_width > 0 else 1
@@ -163,10 +186,9 @@ def main(args):
163
  overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
164
 
165
  # Save debug image
166
- out_png = args.out / "debug_gray_bboxes_test1.png"
167
- out_png.parent.mkdir(parents=True, exist_ok=True)
168
- cv2.imwrite(str(out_png), overlay)
169
- print(f"Success: BBox overlay saved to {out_png}")
170
 
171
 
172
  # Convert absolute pixel coordinates to proportions for the final JSON output
@@ -195,24 +217,19 @@ def main(args):
195
  output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
196
  print(output_json)
197
 
198
- if args.json:
199
- args.json.parent.mkdir(parents=True, exist_ok=True)
200
- args.json.write_text(output_json)
201
- print(f"Success: BBox list saved to {args.json}")
202
 
 
 
 
 
 
 
 
203
 
204
  # ---------- CLI ----------
205
  if __name__ == "__main__":
206
- parser = argparse.ArgumentParser(
207
- description="Draw BBoxes parsed from HTML on the original screenshot"
208
- )
209
- parser.add_argument("--html", required=False, type=Path, default=Path("data/output/test1_layout.html"),
210
- help="Generated HTML file (with gray placeholder)")
211
- parser.add_argument("--screenshot", required=False, type=Path, default=Path("data/input/test1.png"),
212
- help="Original UI screenshot (with real thumbnails)")
213
- parser.add_argument("--out", default=Path("data/tmp"), type=Path,
214
- help="Output directory (save debug_gray_bboxes_test1.png)")
215
- parser.add_argument("--json", type=Path, default=Path("data/tmp/test1_bboxes.json"),
216
- help="If provided, write BBox list to JSON file")
217
- args = parser.parse_args()
218
- main(args)
 
115
  return boxed
116
 
117
 
118
+ def main():
119
+ args = get_args()
120
+ run_id = args.run_id
121
+
122
+ # --- Dynamic Path Construction ---
123
+ base_dir = Path(__file__).parent.resolve()
124
+ # Go up one level to the project root to find the data directory
125
+ project_root = base_dir.parent
126
+ tmp_dir = project_root / 'screencoder' / 'data' / 'tmp' / run_id
127
+ output_dir = project_root / 'screencoder' / 'data' / 'output' / run_id
128
+
129
+ html_path = output_dir / f"{run_id}_layout.html"
130
+ screenshot_path = tmp_dir / f"{run_id}.png"
131
+ output_json_path = tmp_dir / f"{run_id}_bboxes.json"
132
+ debug_image_path = tmp_dir / f"debug_gray_bboxes_{run_id}.png"
133
+
134
+ if not html_path.exists():
135
+ sys.exit(f"Error: HTML file not found at {html_path}")
136
+ if not screenshot_path.exists():
137
+ sys.exit(f"Error: Screenshot not found at {screenshot_path}")
138
+
139
+ print(f"--- Starting Image Box Detection for run_id: {run_id} ---")
140
+
141
  # Read original screenshot
142
+ img = cv2.imread(str(screenshot_path))
143
  if img is None:
144
+ sys.exit(f"Error: Cannot read image {screenshot_path}")
145
  if img.std() < 5:
146
  print("Warning: The screenshot is almost pure color, it may not be the original screenshot with real thumbnails.")
147
 
 
149
 
150
  # Parse HTML → Get bboxes
151
  region_bboxes, placeholder_bboxes, layout_width, layout_height = asyncio.run(
152
+ extract_bboxes_from_html(html_path)
153
  )
154
  if not placeholder_bboxes:
155
+ # This is not necessarily an error; some UIs might not have placeholders.
156
+ print("Info: No gray placeholder blocks found.")
157
 
158
  # Calculate separate scale factors for X and Y to handle aspect ratio differences
159
  scale_x = W / layout_width if layout_width > 0 else 1
 
186
  overlay = draw_bboxes_on_image(img, scaled_regions, scaled_placeholders)
187
 
188
  # Save debug image
189
+ debug_image_path.parent.mkdir(parents=True, exist_ok=True)
190
+ cv2.imwrite(str(debug_image_path), overlay)
191
+ print(f"Success: BBox overlay saved to {debug_image_path}")
 
192
 
193
 
194
  # Convert absolute pixel coordinates to proportions for the final JSON output
 
217
  output_json = json.dumps(output_data, indent=2, ensure_ascii=False)
218
  print(output_json)
219
 
220
+ output_json_path.parent.mkdir(parents=True, exist_ok=True)
221
+ output_json_path.write_text(output_json)
222
+ print(f"Success: BBox list saved to {output_json_path}")
223
+ print(f"--- Image Box Detection Complete for run_id: {run_id} ---")
224
 
225
+ def get_args():
226
+ parser = argparse.ArgumentParser(
227
+ description="Extracts placeholder bounding boxes from an HTML file and maps them to a screenshot."
228
+ )
229
+ parser.add_argument('--run_id', required=True, type=str,
230
+ help="A unique identifier for the processing run.")
231
+ return parser.parse_args()
232
 
233
  # ---------- CLI ----------
234
  if __name__ == "__main__":
235
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
screencoder/image_replacer.py CHANGED
@@ -4,17 +4,45 @@ from pathlib import Path
4
  from bs4 import BeautifulSoup
5
  import cv2
6
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def main(args):
9
  # --- Phase 1: Crop and Save All Images First ---
10
 
11
  # 1. Load data
12
- mapping_data = json.loads(args.mapping.read_text())
13
- uied_data = json.loads(args.uied.read_text())
14
- original_image = cv2.imread(str(args.original_image))
15
 
16
  if original_image is None:
17
- raise ValueError(f"Could not load the original image from {args.original_image}")
18
 
19
  # Get image shapes to calculate a simple, global scaling factor
20
  H_proc, W_proc, _ = uied_data['img_shape']
@@ -29,7 +57,7 @@ def main(args):
29
  }
30
 
31
  # 2. Create a directory for cropped images
32
- crop_dir = args.output_html.parent / "cropped_images"
33
  crop_dir.mkdir(exist_ok=True)
34
  print(f"Saving cropped images to: {crop_dir.resolve()}")
35
 
@@ -67,7 +95,7 @@ def main(args):
67
  # --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
68
 
69
  print("\nStarting offline HTML processing with BeautifulSoup...")
70
- html_content = args.gray_html.read_text()
71
  soup = BeautifulSoup(html_content, 'html.parser')
72
 
73
  # 1. Find all placeholder elements by their class, in document order.
@@ -115,27 +143,16 @@ def main(args):
115
  ph_element.replace_with(img_tag)
116
 
117
  # Save the modified HTML
118
- args.output_html.write_text(str(soup))
119
 
120
  print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
121
- print(f"Final HTML generated at {args.output_html.resolve()}")
 
122
 
 
 
 
 
123
 
124
  if __name__ == "__main__":
125
- parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
126
- parser.add_argument("--mapping", type=Path, required=False, help="Path to the mapping JSON file from mapping.py.")
127
- parser.add_argument("--uied", type=Path, required=False, help="Path to the UIED JSON file.")
128
- parser.add_argument("--original-image", type=Path, required=False, help="Path to the original screenshot image.")
129
- parser.add_argument("--gray-html", type=Path, required=False, help="Path to the input HTML file with gray placeholders.")
130
- parser.add_argument("--output-html", type=Path, required=False, help="Path to save the final, modified HTML file.")
131
-
132
- parser.set_defaults(
133
- mapping=Path('data/tmp/mapping_full_test1.json'),
134
- uied=Path('data/tmp/ip/test1.json'),
135
- original_image=Path('data/input/test1.png'),
136
- gray_html=Path('data/output/test1_layout.html'),
137
- output_html=Path('data/output/test1_layout_final.html')
138
- )
139
-
140
- args = parser.parse_args()
141
- main(args)
 
4
  from bs4 import BeautifulSoup
5
  import cv2
6
  import re
7
+ import sys
8
+
9
+ def main():
10
+ args = get_args()
11
+ run_id = args.run_id
12
+
13
+ # --- Dynamic Path Construction ---
14
+ base_dir = Path(__file__).parent.resolve()
15
+ tmp_dir = base_dir / 'data' / 'tmp' / run_id
16
+ output_dir = base_dir / 'data' / 'output' / run_id
17
+
18
+ mapping_path = tmp_dir / f"mapping_full_{run_id}.json"
19
+ uied_path = tmp_dir / "ip" / f"{run_id}.json"
20
+ original_image_path = tmp_dir / f"{run_id}.png"
21
+ # This is the input HTML with placeholders
22
+ gray_html_path = output_dir / f"{run_id}_layout.html"
23
+ # This will be the final output of the entire pipeline
24
+ final_html_path = output_dir / f"{run_id}_layout_final.html"
25
+
26
+ # --- Input Validation ---
27
+ if not all([p.exists() for p in [mapping_path, uied_path, original_image_path, gray_html_path]]):
28
+ print("Error: One or more required input files are missing.", file=sys.stderr)
29
+ if not mapping_path.exists(): print(f"- Missing: {mapping_path}", file=sys.stderr)
30
+ if not uied_path.exists(): print(f"- Missing: {uied_path}", file=sys.stderr)
31
+ if not original_image_path.exists(): print(f"- Missing: {original_image_path}", file=sys.stderr)
32
+ if not gray_html_path.exists(): print(f"- Missing: {gray_html_path}", file=sys.stderr)
33
+ sys.exit(1)
34
+
35
+ print(f"--- Starting Image Replacement for run_id: {run_id} ---")
36
 
 
37
  # --- Phase 1: Crop and Save All Images First ---
38
 
39
  # 1. Load data
40
+ mapping_data = json.loads(mapping_path.read_text())
41
+ uied_data = json.loads(uied_path.read_text())
42
+ original_image = cv2.imread(str(original_image_path))
43
 
44
  if original_image is None:
45
+ raise ValueError(f"Could not load the original image from {original_image_path}")
46
 
47
  # Get image shapes to calculate a simple, global scaling factor
48
  H_proc, W_proc, _ = uied_data['img_shape']
 
57
  }
58
 
59
  # 2. Create a directory for cropped images
60
+ crop_dir = final_html_path.parent / f"cropped_images_{run_id}"
61
  crop_dir.mkdir(exist_ok=True)
62
  print(f"Saving cropped images to: {crop_dir.resolve()}")
63
 
 
95
  # --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
96
 
97
  print("\nStarting offline HTML processing with BeautifulSoup...")
98
+ html_content = gray_html_path.read_text()
99
  soup = BeautifulSoup(html_content, 'html.parser')
100
 
101
  # 1. Find all placeholder elements by their class, in document order.
 
143
  ph_element.replace_with(img_tag)
144
 
145
  # Save the modified HTML
146
+ final_html_path.write_text(str(soup))
147
 
148
  print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
149
+ print(f"Final HTML generated at {final_html_path.resolve()}")
150
+ print(f"--- Image Replacement Complete for run_id: {run_id} ---")
151
 
152
+ def get_args():
153
+ parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
154
+ parser.add_argument("--run_id", type=str, required=True, help="A unique identifier for the processing run.")
155
+ return parser.parse_args()
156
 
157
  if __name__ == "__main__":
158
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
screencoder/main.py CHANGED
@@ -2,16 +2,15 @@ import subprocess
2
  import sys
3
  import os
4
  import json
 
 
 
5
 
6
-
7
- # A simple placeholder for prompt injection
8
- # In a real scenario, this should be a more robust mechanism
9
- def inject_prompt_to_generator(prompt_text):
10
  if not prompt_text:
11
  return
12
 
13
- # In this example, we assume the prompt is a simple string for the "main content"
14
- # A more complex implementation would parse a structured prompt
15
  user_instruction = {
16
  "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
17
  "header": "Make the Google logo look better; change the avatar color to be more appealing.",
@@ -19,123 +18,115 @@ def inject_prompt_to_generator(prompt_text):
19
  "main content": prompt_text
20
  }
21
 
22
- generator_path = os.path.join(os.path.dirname(__file__), 'html_generator.py')
23
- with open(generator_path, 'r', encoding='utf-8') as f:
24
- lines = f.readlines()
25
 
26
- # Find the user_instruction dictionary and replace it
27
- new_lines = []
28
- in_dict = False
29
- for line in lines:
30
- if 'user_instruction = {' in line:
31
- in_dict = True
32
- new_lines.append(f"user_instruction = {json.dumps(user_instruction, indent=4)}\n")
33
- elif in_dict and '}' in line:
34
- in_dict = False
35
- continue # Skip the closing brace of the old dict
36
- elif not in_dict:
37
- new_lines.append(line)
38
-
39
- with open(generator_path, 'w', encoding='utf-8') as f:
40
- f.writelines(new_lines)
41
-
42
-
43
- def run_script(script_path):
44
- script_path = os.path.normpath(script_path)
45
 
46
- print(f"\n{'='*20}")
47
- print(f"Executing: python {script_path}")
48
- print(f"{'='*20}")
49
 
 
 
 
 
 
 
50
  try:
51
- result = subprocess.run(
52
- [sys.executable, script_path],
53
- check=True,
54
- capture_output=True,
55
- text=True
56
- )
57
- print("Success!")
58
- print("Output:")
59
  print(result.stdout)
60
  if result.stderr:
61
- print("Stderr:")
62
  print(result.stderr)
63
- except FileNotFoundError:
64
- print(f"ERROR: Script not found at '{script_path}'")
65
- sys.exit(1)
66
  except subprocess.CalledProcessError as e:
67
- print(f"ERROR: Script '{script_path}' failed with exit code {e.returncode}")
68
- print("Stdout:")
69
  print(e.stdout)
70
- print("Stderr:")
71
  print(e.stderr)
72
- sys.exit(1)
73
- except Exception as e:
74
- print(f"An unexpected error occurred while running '{script_path}': {e}")
75
- sys.exit(1)
76
 
77
-
78
- def generate_html_for_demo(image_path, prompt, output_dir="screencoder/data/output"):
79
  """
80
- A modified workflow for the Gradio demo.
81
- It takes an image path and a prompt, and returns the path to the final HTML file.
 
 
 
82
  """
83
- print("Starting the Screencoder demo workflow...")
84
-
85
- # Setup paths
86
- project_root = os.path.dirname(__file__)
87
- # The block_parsor script expects a specific input file name, so we must place our image there.
88
- # IMPORTANT: This assumes a single-user-at-a-time workflow.
89
- # For multi-user, you'd need isolated temp directories.
90
- target_input_path = os.path.join(project_root, "data/input/test1.png")
91
 
92
- # Ensure the input directory exists
93
- os.makedirs(os.path.dirname(target_input_path), exist_ok=True)
 
 
 
94
 
95
- # Copy the user-uploaded image to the location the script expects
96
- import shutil
97
- shutil.copy(image_path, target_input_path)
98
-
99
- # --- Part 1: Initial Generation with Placeholders ---
100
- print("\n--- Part 1: Initial Generation with Placeholders ---")
101
- inject_prompt_to_generator(prompt)
102
- run_script(os.path.join(project_root, "block_parsor.py"))
103
- run_script(os.path.join(project_root, "html_generator.py"))
104
-
105
- # --- Part 2: Final HTML Code Generation ---
106
- print("\n--- Part 2: Final HTML Code Generation ---")
107
- run_script(os.path.join(project_root, "image_box_detection.py"))
108
- run_script(os.path.join(project_root, "UIED/run_single.py"))
109
- run_script(os.path.join(project_root, "mapping.py"))
110
- run_script(os.path.join(project_root, "image_replacer.py"))
111
-
112
- final_html_path = os.path.join(output_dir, "test1_layout_final.html")
113
- print(f"\nScreencoder demo workflow completed! Final HTML at: {final_html_path}")
114
-
115
- # Check if the final file exists
116
- if os.path.exists(final_html_path):
117
- with open(final_html_path, 'r', encoding='utf-8') as f:
118
- return f.read()
119
- else:
120
- return "<html><body><h1>Error: Final HTML not generated.</h1></body></html>"
121
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  def main():
124
- """Main function to run the entire Screencoder workflow."""
125
- print("Starting the Screencoder full workflow...")
126
-
127
- # --- Part 1: Initial Generation with Placeholders ---
128
- print("\n--- Part 1: Initial Generation with Placeholders ---")
129
- run_script("block_parsor.py")
130
- run_script("html_generator.py")
131
-
132
- # --- Part 2: Final HTML Code Generation ---
133
- print("\n--- Part 2: Final HTML Code Generation ---")
134
- run_script("image_box_detection.py")
135
- run_script("UIED/run_single.py")
136
- run_script("mapping.py")
137
- run_script("image_replacer.py")
138
-
139
  print("\nScreencoder workflow completed successfully!")
140
 
141
  if __name__ == "__main__":
 
2
  import sys
3
  import os
4
  import json
5
+ import uuid
6
+ import shutil
7
+ from PIL import Image
8
 
9
+ # This function is now more robust, injecting the prompt into a temporary copy of the generator.
10
+ def inject_prompt_to_generator(prompt_text, temp_generator_path):
 
 
11
  if not prompt_text:
12
  return
13
 
 
 
14
  user_instruction = {
15
  "sidebar": "Make all icons look better; fill in relevant English text; beautify the layout.",
16
  "header": "Make the Google logo look better; change the avatar color to be more appealing.",
 
18
  "main content": prompt_text
19
  }
20
 
21
+ with open(temp_generator_path, 'r', encoding='utf-8') as f:
22
+ content = f.read()
 
23
 
24
+ start_marker = "user_instruction = {"
25
+ end_marker = "}"
26
+ start_index = content.find(start_marker)
27
+ end_index = content.find(end_marker, start_index)
28
+
29
+ if start_index != -1 and end_index != -1:
30
+ dict_str = f"user_instruction = {json.dumps(user_instruction, indent=4)}"
31
+ content = content[:start_index] + dict_str + content[end_index+1:]
32
+
33
+ with open(temp_generator_path, 'w', encoding='utf-8') as f:
34
+ f.write(content)
35
+
36
+ def run_script_with_run_id(script_name, run_id, instructions=None):
37
+ """Executes a script with a specific run_id and optional instructions."""
38
+ screencoder_dir = os.path.dirname(os.path.abspath(__file__))
39
+ script_path = os.path.join(screencoder_dir, script_name)
40
+ if not os.path.exists(script_path):
41
+ # Handle scripts inside subdirectories like UIED/
42
+ script_path = os.path.join(screencoder_dir, "UIED", script_name)
43
 
44
+ command = ["python", script_path, "--run_id", run_id]
 
 
45
 
46
+ # Add instructions to the command if provided
47
+ if instructions and script_name == "html_generator.py":
48
+ instructions_json = json.dumps(instructions)
49
+ command.extend(["--instructions", instructions_json])
50
+
51
+ print(f"\n--- Running script: {script_name} ---")
52
  try:
53
+ # Pass the current environment variables to the subprocess
54
+ result = subprocess.run(command, check=True, capture_output=True, text=True, env=os.environ)
 
 
 
 
 
 
55
  print(result.stdout)
56
  if result.stderr:
57
+ print("Error:")
58
  print(result.stderr)
 
 
 
59
  except subprocess.CalledProcessError as e:
60
+ print(f"Error executing {script_name}:")
 
61
  print(e.stdout)
 
62
  print(e.stderr)
63
+ raise # Re-raise the exception to stop the workflow if a script fails
 
 
 
64
 
65
+ def generate_html_for_demo(image_path, instructions):
 
66
  """
67
+ A refactored main function for Gradio demo integration.
68
+ It orchestrates the script executions for a single image processing run.
69
+ - Creates a unique run_id for each call.
70
+ - Sets up temporary directories for input and output.
71
+ - Cleans up temporary directories after execution.
72
  """
73
+ run_id = str(uuid.uuid4())
74
+ print(f"--- Starting Screencoder workflow for run_id: {run_id} ---")
 
 
 
 
 
 
75
 
76
+ base_dir = os.path.dirname(os.path.abspath(__file__))
77
+ tmp_dir = os.path.join(base_dir, 'data', 'tmp', run_id)
78
+ output_dir = os.path.join(base_dir, 'data', 'output', run_id)
79
+ os.makedirs(tmp_dir, exist_ok=True)
80
+ os.makedirs(output_dir, exist_ok=True)
81
 
82
+ try:
83
+ # 1. Copy user-uploaded image to the temp input directory
84
+ new_image_path = os.path.join(tmp_dir, f"{run_id}.png")
85
+ img = Image.open(image_path)
86
+ img.save(new_image_path, "PNG")
87
+
88
+ # 2. Run the processing scripts in sequence
89
+ run_script_with_run_id("UIED/run_single.py", run_id)
90
+ run_script_with_run_id("block_parsor.py", run_id)
91
+ run_script_with_run_id("html_generator.py", run_id, instructions)
92
+ run_script_with_run_id("image_box_detection.py", run_id)
93
+ run_script_with_run_id("mapping.py", run_id)
94
+ run_script_with_run_id("image_replacer.py", run_id)
95
+
96
+ # 3. Read the final generated HTML
97
+ final_html_path = os.path.join(output_dir, f"{run_id}_layout_final.html")
98
+ if os.path.exists(final_html_path):
99
+ with open(final_html_path, 'r', encoding='utf-8') as f:
100
+ html_content = f.read()
101
+ print(f"Successfully generated HTML for run_id: {run_id}")
102
+ return html_content
103
+ else:
104
+ return f"Error: Final HTML file not found for run_id: {run_id}"
 
 
 
105
 
106
+ except Exception as e:
107
+ print(f"An error occurred during the workflow for run_id {run_id}: {e}")
108
+ return f"An error occurred: {e}"
109
+ finally:
110
+ # 4. Cleanup: Remove temporary directories
111
+ try:
112
+ # shutil.rmtree(tmp_dir)
113
+ # shutil.rmtree(output_dir)
114
+ print(f"Cleaned up temporary files for run_id: {run_id}")
115
+ except OSError as e:
116
+ print(f"Error cleaning up temporary files for run_id {run_id}: {e}")
117
 
118
  def main():
119
+ """Main function to run the entire Screencoder workflow (legacy)."""
120
+ print("Starting the Screencoder full workflow (legacy)...")
121
+ # This main function is now considered legacy and does not use dynamic run_ids.
122
+ # It will continue to use the hardcoded paths.
123
+ run_id = "test1" # Hardcoded for legacy main
124
+ run_script_with_run_id("UIED/run_single.py", run_id)
125
+ run_script_with_run_id("block_parsor.py", run_id)
126
+ run_script_with_run_id("html_generator.py", run_id)
127
+ run_script_with_run_id("image_box_detection.py", run_id)
128
+ run_script_with_run_id("mapping.py", run_id)
129
+ run_script_with_run_id("image_replacer.py", run_id)
 
 
 
 
130
  print("\nScreencoder workflow completed successfully!")
131
 
132
  if __name__ == "__main__":
screencoder/mapping.py CHANGED
@@ -227,21 +227,41 @@ def generate_debug_overlay(img_path, all_uied_boxes, region_results, uied_shape,
227
  cv2.imwrite(str(out_png), canvas)
228
 
229
 
230
- def main(args):
231
- # 1. Load the original screenshot to get its absolute dimensions
232
- if not args.debug_src or not args.debug_src.exists():
233
- sys.exit("Error: A valid --debug-src image path must be provided for coordinate conversion.")
 
 
 
234
 
235
- orig_img = cv2.imread(str(args.debug_src))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  if orig_img is None:
237
- sys.exit(f"Error: Could not read debug source image at {args.debug_src}.")
238
  H_orig, W_orig, _ = orig_img.shape
239
 
240
  # 2. Load proportional data and convert to absolute pixel coordinates
241
- pixel_regions, pixel_placeholders = load_regions_and_placeholders(args.gray, W_orig, H_orig)
242
 
243
  # 3. Load UIED data
244
- all_uied_boxes, uied_shape = load_uied_boxes(args.uied)
245
 
246
  if not pixel_placeholders or not all_uied_boxes:
247
  print("Error: Could not proceed without placeholder and UIED data.")
@@ -294,21 +314,19 @@ def main(args):
294
  # 6. Report and save results
295
  print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
296
 
297
- args.out.write_text(json.dumps(final_results, indent=2, ensure_ascii=False))
298
- print(f"Mapping data written to {args.out}")
299
 
300
- if args.debug:
301
- if not args.debug_src or not args.debug_src.exists():
302
- print("Error: A valid --debug-src image path must be provided when using --debug.")
303
- return
304
- generate_debug_overlay(args.debug_src, all_uied_boxes, final_results, uied_shape, args.debug)
305
- print(f"Debug image written to {args.debug}")
 
 
 
 
306
 
307
  if __name__ == "__main__":
308
- ap = argparse.ArgumentParser()
309
- ap.add_argument("--gray", type=Path, default=Path("data/tmp/test1_bboxes.json"), help="Path to the JSON file with gray placeholder boxes.")
310
- ap.add_argument("--uied", type=Path, default=Path("data/tmp/ip/test1.json"), help="Path to the JSON file with UIED detected boxes.")
311
- ap.add_argument("--out", default=Path("data/tmp/mapping_full_test1.json"), type=Path, help="Output path for the mapping JSON file.")
312
- ap.add_argument("--debug", type=Path, default=Path("data/tmp/overlay_test_test1.png"), help="Output path for the debug overlay PNG.")
313
- ap.add_argument("--debug-src", type=Path, default=Path("data/input/test1.png"), help="Path to the original screenshot for the debug overlay background.")
314
- main(ap.parse_args())
 
227
  cv2.imwrite(str(out_png), canvas)
228
 
229
 
230
+ def main():
231
+ args = get_args()
232
+ run_id = args.run_id
233
+
234
+ # --- Dynamic Path Construction ---
235
+ base_dir = Path(__file__).parent.resolve()
236
+ tmp_dir = base_dir / 'data' / 'tmp' / run_id
237
 
238
+ gray_json_path = tmp_dir / f"{run_id}_bboxes.json"
239
+ uied_json_path = tmp_dir / "ip" / f"{run_id}.json"
240
+ mapping_output_path = tmp_dir / f"mapping_full_{run_id}.json"
241
+ debug_src_path = tmp_dir / f"{run_id}.png"
242
+ debug_overlay_path = tmp_dir / f"overlay_test_{run_id}.png"
243
+
244
+ # --- Input Validation ---
245
+ if not gray_json_path.exists():
246
+ sys.exit(f"Error: Placeholder JSON not found at {gray_json_path}")
247
+ if not uied_json_path.exists():
248
+ sys.exit(f"Error: UIED JSON not found at {uied_json_path}")
249
+ if not debug_src_path.exists():
250
+ sys.exit(f"Error: Source image for coordinate conversion not found at {debug_src_path}")
251
+
252
+ print(f"--- Starting Mapping for run_id: {run_id} ---")
253
+
254
+ # 1. Load the original screenshot to get its absolute dimensions
255
+ orig_img = cv2.imread(str(debug_src_path))
256
  if orig_img is None:
257
+ sys.exit(f"Error: Could not read debug source image at {debug_src_path}.")
258
  H_orig, W_orig, _ = orig_img.shape
259
 
260
  # 2. Load proportional data and convert to absolute pixel coordinates
261
+ pixel_regions, pixel_placeholders = load_regions_and_placeholders(gray_json_path, W_orig, H_orig)
262
 
263
  # 3. Load UIED data
264
+ all_uied_boxes, uied_shape = load_uied_boxes(uied_json_path)
265
 
266
  if not pixel_placeholders or not all_uied_boxes:
267
  print("Error: Could not proceed without placeholder and UIED data.")
 
314
  # 6. Report and save results
315
  print(f"Successfully created {total_mappings_count} one-to-one mappings out of {total_placeholders_count} placeholders.")
316
 
317
+ mapping_output_path.write_text(json.dumps(final_results, indent=2, ensure_ascii=False))
318
+ print(f"Mapping data written to {mapping_output_path}")
319
 
320
+ # Always generate the debug image if the source exists
321
+ generate_debug_overlay(debug_src_path, all_uied_boxes, final_results, uied_shape, debug_overlay_path)
322
+ print(f"Debug image written to {debug_overlay_path}")
323
+
324
+ print(f"--- Mapping Complete for run_id: {run_id} ---")
325
+
326
+ def get_args():
327
+ ap = argparse.ArgumentParser(description="Map UIED components to placeholder boxes.")
328
+ ap.add_argument('--run_id', required=True, type=str, help="A unique identifier for the processing run.")
329
+ return ap.parse_args()
330
 
331
  if __name__ == "__main__":
332
+ main()