Felguk commited on
Commit
e19a5af
Β·
verified Β·
1 Parent(s): a9dfe2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py CHANGED
@@ -199,6 +199,46 @@ async def fetch_codepen_project(codepen_url):
199
  except Exception as e:
200
  return f"Error: {e}", "", ""
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  # Create the Gradio interface
203
  with gr.Blocks() as demo:
204
  gr.HTML(copy_button_html) # Add the "Copy Code" script
@@ -326,5 +366,38 @@ with gr.Blocks() as demo:
326
  outputs=[html_output, css_output, js_output]
327
  )
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  # Launch the interface
330
  demo.launch()
 
199
  except Exception as e:
200
  return f"Error: {e}", "", ""
201
 
202
+ # Web Data Extractor
203
+ async def extract_web_data(url):
204
+ """Extracts additional web data like description, image preview, colors, fonts, similar code, videos, and files."""
205
+ try:
206
+ response = await asyncio.to_thread(requests.get, url, timeout=5)
207
+ response.raise_for_status()
208
+ soup = BeautifulSoup(response.text, "html.parser")
209
+
210
+ # Extract site description
211
+ description = soup.find("meta", attrs={"name": "description"})["content"] if soup.find("meta", attrs={"name": "description"}) else "No description available."
212
+
213
+ # Extract image preview (first image on the page)
214
+ image_preview = soup.find("img")["src"] if soup.find("img") else "No image preview available."
215
+
216
+ # Extract colors (from CSS or inline styles)
217
+ colors = []
218
+ for style in soup.find_all("style"):
219
+ colors.extend([color for color in style.text.split() if color.startswith("#")])
220
+ colors = list(set(colors))[:5] # Limit to 5 unique colors
221
+
222
+ # Extract fonts (from CSS or Google Fonts)
223
+ fonts = []
224
+ for link in soup.find_all("link", attrs={"href": True}):
225
+ if "fonts.googleapis.com" in link["href"]:
226
+ fonts.append(link["href"])
227
+ fonts = list(set(fonts))[:5] # Limit to 5 unique fonts
228
+
229
+ # Extract similar code (shorter version of the HTML)
230
+ similar_code = str(soup)[:1000] # Limit to first 1000 characters
231
+
232
+ # Extract videos (embedded iframes)
233
+ videos = [iframe["src"] for iframe in soup.find_all("iframe") if "src" in iframe.attrs]
234
+
235
+ # Extract files (links to downloadable files)
236
+ files = [a["href"] for a in soup.find_all("a", attrs={"href": True}) if a["href"].endswith((".pdf", ".zip", ".doc", ".docx", ".xls", ".xlsx"))]
237
+
238
+ return description, image_preview, colors, fonts, similar_code, videos, files
239
+ except Exception as e:
240
+ return f"Error: {e}", "", [], [], "", [], []
241
+
242
  # Create the Gradio interface
243
  with gr.Blocks() as demo:
244
  gr.HTML(copy_button_html) # Add the "Copy Code" script
 
366
  outputs=[html_output, css_output, js_output]
367
  )
368
 
369
+ # Tab 5: Web Data Extractor
370
+ with gr.Tab("Web Data Extractor"):
371
+ gr.Markdown("## Web Data Extractor")
372
+ gr.Markdown("Enter a URL to extract additional web data like description, image preview, colors, fonts, similar code, videos, and files.")
373
+
374
+ with gr.Row():
375
+ web_data_url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
376
+
377
+ with gr.Row():
378
+ description_output = gr.Textbox(label="Site Description", interactive=False)
379
+ image_preview_output = gr.Image(label="Image Preview", interactive=False)
380
+
381
+ with gr.Row():
382
+ colors_output = gr.Textbox(label="Colors", interactive=False)
383
+ fonts_output = gr.Textbox(label="Fonts", interactive=False)
384
+
385
+ with gr.Row():
386
+ similar_code_output = gr.Textbox(label="Similar Code", interactive=True, elem_id="similar-code-output")
387
+
388
+ with gr.Row():
389
+ videos_output = gr.Textbox(label="Videos", interactive=False)
390
+ files_output = gr.Textbox(label="Files", interactive=False)
391
+
392
+ with gr.Row():
393
+ gr.HTML("<button onclick='copyCode(\"similar-code-output\")'>Copy Code</button>") # Add the "Copy Code" button
394
+
395
+ submit_web_data_button = gr.Button("Extract Web Data")
396
+ submit_web_data_button.click(
397
+ fn=extract_web_data,
398
+ inputs=[web_data_url_input],
399
+ outputs=[description_output, image_preview_output, colors_output, fonts_output, similar_code_output, videos_output, files_output]
400
+ )
401
+
402
  # Launch the interface
403
  demo.launch()