import gradio as gr import requests from urllib.parse import urlparse, urljoin from bs4 import BeautifulSoup import asyncio # HTML and JavaScript for the "Copy Code" button copy_button_html = """ """ # Common functions def is_valid_url(url): """Checks if the string is a valid URL.""" try: result = urlparse(url) return all([result.scheme, result.netloc]) # Check for scheme and domain except: return False async def fetch_file_content(url): """Fetches the content of a file (CSS, JS, etc.) from a URL.""" try: response = await asyncio.to_thread(requests.get, url, timeout=5) response.raise_for_status() return response.text except: return "Failed to fetch content." # URL to Text Converter async def extract_additional_resources(url): """Extracts links to CSS, JS, and images from HTML code.""" try: response = await asyncio.to_thread(requests.get, url, timeout=5) response.raise_for_status() # Check if the content is HTML if 'text/html' in response.headers.get('Content-Type', ''): soup = BeautifulSoup(response.text, "html.parser") # Extract CSS links (limit to 5) css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5] # Extract JS links (limit to 5) js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5] # Extract image links (limit to 5) img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5] # Fetch CSS and JS content asynchronously css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links]) js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links]) return css_links, js_links, img_links, css_content, js_content else: # If it's not HTML, treat it as a file return [], [], [], [response.text], [] except Exception as e: return [], [], [], [], [] async def convert_to_text(url): # Handle view-source: URLs if url.startswith("view-source:"): url = url[len("view-source:"):] if not is_valid_url(url): return "Error: Please enter a valid URL.", "", None, [], [], [], [], [] # Return error message and empty data try: # Set headers to mimic a browser request headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=5) response.raise_for_status() # Check for HTTP errors (e.g., 404, 500) # Return results status = f"Request status: {response.status_code}" content_length = f"Content size: {len(response.text)} characters" results = f"{status}\n{content_length}" # Save text content to a file file_path = "downloaded_content.txt" with open(file_path, "w", encoding="utf-8") as file: file.write(response.text) # Extract additional resources css_links, js_links, img_links, css_content, js_content = await extract_additional_resources(url) return results, response.text, file_path, css_links, js_links, img_links, css_content, js_content except requests.exceptions.RequestException as e: return f"Error: {e}", "", None, [], [], [], [], [] # Return error message and empty data # Model to Text Converter async def fetch_model_info(model_url): """Fetches model description and installation instructions.""" try: if "huggingface.co" in model_url: # Fetch model card from Hugging Face response = await asyncio.to_thread(requests.get, model_url, timeout=5) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract model description description = soup.find("div", {"class": "prose"}).get_text(strip=True) if soup.find("div", {"class": "prose"}) else "No description available." # Generate installation instructions model_name = model_url.split("/")[-1] install_instructions = f"To install this model, run:\n```bash\npip install transformers\n```\nThen load the model in Python:\n```python\nfrom transformers import AutoModel, AutoTokenizer\nmodel = AutoModel.from_pretrained('{model_name}')\ntokenizer = AutoTokenizer.from_pretrained('{model_name}')\n```" return description, install_instructions elif "github.com" in model_url: # Fetch README from GitHub readme_url = f"{model_url}/raw/main/README.md" response = await asyncio.to_thread(requests.get, readme_url, timeout=5) response.raise_for_status() # Extract description from README description = response.text if response.text else "No description available." # Generate installation instructions install_instructions = f"To install this model, clone the repository:\n```bash\ngit clone {model_url}.git\ncd {model_url.split('/')[-1]}\n```" return description, install_instructions else: return "Unsupported repository.", "" except Exception as e: return f"Error: {e}", "" async def fetch_model_file_content(model_url, file_path): """Fetches the content of a file from a model repository (Hugging Face or GitHub).""" try: # Construct the full URL to the file if "huggingface.co" in model_url: # Убираем /blob/main/ из URL, если он есть if "/blob/main/" in model_url: model_url = model_url.replace("/blob/main/", "/") # Hugging Face URL format: https://huggingface.co/{model}/raw/main/{file_path} full_url = f"{model_url}/raw/main/{file_path}" elif "github.com" in model_url: # GitHub URL format: https://github.com/{user}/{repo}/raw/main/{file_path} full_url = f"{model_url}/raw/main/{file_path}" else: return "Error: Unsupported repository." # Fetch the file content response = await asyncio.to_thread(requests.get, full_url, timeout=5) response.raise_for_status() return response.text except Exception as e: return f"Error: {e}" # Space to Text Converter async def fetch_space_file_content(space_url, file_path): """Fetches the content of a file from a Hugging Face Space.""" try: # Construct the full URL to the file if "huggingface.co/spaces" in space_url: # Hugging Face Spaces URL format: https://huggingface.co/spaces/{user}/{space}/raw/main/{file_path} full_url = f"{space_url}/raw/main/{file_path}" else: return "Error: Unsupported repository. Please provide a Hugging Face Space URL." # Fetch the file content response = await asyncio.to_thread(requests.get, full_url, timeout=5) response.raise_for_status() return response.text except Exception as e: return f"Error: {e}" # CodePen to Text Converter async def fetch_codepen_project(codepen_url): """Fetches the HTML, CSS, and JavaScript content from a CodePen project.""" try: # Extract the project ID from the URL if "codepen.io" not in codepen_url: return "Error: Please enter a valid CodePen URL.", "", "" # Fetch the CodePen project page response = await asyncio.to_thread(requests.get, codepen_url, timeout=5) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract HTML, CSS, and JS content html_content = soup.find("textarea", {"id": "html-input"}).text if soup.find("textarea", {"id": "html-input"}) else "" css_content = soup.find("textarea", {"id": "css-input"}).text if soup.find("textarea", {"id": "css-input"}) else "" js_content = soup.find("textarea", {"id": "js-input"}).text if soup.find("textarea", {"id": "js-input"}) else "" return html_content, css_content, js_content except Exception as e: return f"Error: {e}", "", "" # Web Data Extractor async def extract_web_data(url): """Extracts additional web data like description, image preview, colors, fonts, similar code, videos, and files.""" try: response = await asyncio.to_thread(requests.get, url, timeout=5) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract site description description = soup.find("meta", attrs={"name": "description"})["content"] if soup.find("meta", attrs={"name": "description"}) else "No description available." # Extract image preview (first image on the page) image_preview = soup.find("img")["src"] if soup.find("img") else "No image preview available." # Extract colors (from CSS or inline styles) colors = [] for style in soup.find_all("style"): colors.extend([color for color in style.text.split() if color.startswith("#")]) colors = list(set(colors))[:5] # Limit to 5 unique colors # Extract fonts (from CSS or Google Fonts) fonts = [] for link in soup.find_all("link", attrs={"href": True}): if "fonts.googleapis.com" in link["href"]: fonts.append(link["href"]) fonts = list(set(fonts))[:5] # Limit to 5 unique fonts # Extract similar code (shorter version of the HTML) similar_code = str(soup)[:1000] # Limit to first 1000 characters # Extract videos (embedded iframes) videos = [iframe["src"] for iframe in soup.find_all("iframe") if "src" in iframe.attrs] # Extract files (links to downloadable files) files = [a["href"] for a in soup.find_all("a", attrs={"href": True}) if a["href"].endswith((".pdf", ".zip", ".doc", ".docx", ".xls", ".xlsx"))] return description, image_preview, colors, fonts, similar_code, videos, files except Exception as e: return f"Error: {e}", "", [], [], "", [], [] # Create the Gradio interface with gr.Blocks() as demo: gr.HTML(copy_button_html) # Add the "Copy Code" script with gr.Tabs(): # Tab 1: URL to Text Converter with gr.Tab("URL to Text Converter"): gr.Markdown("## URL to Text Converter") gr.Markdown("Enter a URL to fetch its text content and download it as a .txt file.") with gr.Row(): url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com or view-source:https://example.com") with gr.Row(): results_output = gr.Textbox(label="Request Results", interactive=False) text_output = gr.Textbox(label="Text Content", interactive=True, elem_id="output-text") with gr.Row(): gr.HTML("") # Add the "Copy Code" button file_output = gr.File(label="Download File", visible=False) # Hidden file download component submit_button = gr.Button("Fetch Content") submit_button.click( fn=convert_to_text, inputs=url_input, outputs=[ results_output, text_output, file_output, gr.Textbox(label="CSS Files"), gr.Textbox(label="JS Files"), gr.Textbox(label="Images"), gr.Textbox(label="CSS Content"), gr.Textbox(label="JS Content") ] ) # Add an Accordion to show/hide additional resources with gr.Accordion("Show/Hide Additional Resources", open=False): gr.Markdown("### CSS Files") css_output = gr.Textbox(label="CSS Files", interactive=False) gr.Markdown("### JS Files") js_output = gr.Textbox(label="JS Files", interactive=False) gr.Markdown("### Images") img_output = gr.Textbox(label="Images", interactive=False) gr.Markdown("### CSS Content") css_content_output = gr.Textbox(label="CSS Content", interactive=True) gr.Markdown("### JS Content") js_content_output = gr.Textbox(label="JS Content", interactive=True) # Tab 2: Model to Text Converter with gr.Tab("Model to Text Converter"): gr.Markdown("## Model to Text Converter") gr.Markdown("Enter a link to a model on Hugging Face or GitHub, and specify the file path.") with gr.Row(): model_url_input = gr.Textbox(label="Model URL", placeholder="https://huggingface.co/... or https://github.com/...") file_path_input = gr.Textbox(label="File Path", placeholder="e.g., config.json or README.md") with gr.Row(): model_description_output = gr.Textbox(label="Model Description", interactive=False) install_instructions_output = gr.Textbox(label="Installation Instructions", interactive=False) with gr.Row(): model_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="model-content-output") with gr.Row(): gr.HTML("") # Add the "Copy Code" button submit_model_button = gr.Button("Fetch Model Info and File Content") submit_model_button.click( fn=fetch_model_info, inputs=[model_url_input], outputs=[model_description_output, install_instructions_output] ) submit_model_button.click( fn=fetch_model_file_content, inputs=[model_url_input, file_path_input], outputs=[model_content_output] ) # Tab 3: Space to Text Converter with gr.Tab("Space to Text Converter"): gr.Markdown("## Space to Text Converter") gr.Markdown("Enter a link to a Hugging Face Space and specify the file path to fetch its content.") with gr.Row(): space_url_input = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/...") space_file_path_input = gr.Textbox(label="File Path", placeholder="e.g., app.py or README.md") with gr.Row(): space_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="space-content-output") with gr.Row(): gr.HTML("") # Add the "Copy Code" button submit_space_button = gr.Button("Fetch File Content") submit_space_button.click( fn=fetch_space_file_content, inputs=[space_url_input, space_file_path_input], outputs=[space_content_output] ) # Tab 4: CodePen to Text Converter with gr.Tab("CodePen to Text Converter"): gr.Markdown("## CodePen to Text Converter") gr.Markdown("Enter a CodePen project URL to fetch its HTML, CSS, and JavaScript content.") with gr.Row(): codepen_url_input = gr.Textbox(label="CodePen URL", placeholder="https://codepen.io/.../pen/...") with gr.Row(): html_output = gr.Textbox(label="HTML Content", interactive=True, elem_id="html-output") css_output = gr.Textbox(label="CSS Content", interactive=True, elem_id="css-output") js_output = gr.Textbox(label="JavaScript Content", interactive=True, elem_id="js-output") with gr.Row(): gr.HTML("") gr.HTML("") gr.HTML("") submit_codepen_button = gr.Button("Fetch CodePen Content") submit_codepen_button.click( fn=fetch_codepen_project, inputs=[codepen_url_input], outputs=[html_output, css_output, js_output] ) # Tab 5: Web Data Extractor with gr.Tab("Web Data Extractor"): gr.Markdown("## Web Data Extractor") gr.Markdown("Enter a URL to extract additional web data like description, image preview, colors, fonts, similar code, videos, and files.") with gr.Row(): web_data_url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com") with gr.Row(): description_output = gr.Textbox(label="Site Description", interactive=False) image_preview_output = gr.Image(label="Image Preview", interactive=False) with gr.Row(): colors_output = gr.Textbox(label="Colors", interactive=False) fonts_output = gr.Textbox(label="Fonts", interactive=False) with gr.Row(): similar_code_output = gr.Textbox(label="Similar Code", interactive=True, elem_id="similar-code-output") with gr.Row(): videos_output = gr.Textbox(label="Videos", interactive=False) files_output = gr.Textbox(label="Files", interactive=False) with gr.Row(): gr.HTML("") # Add the "Copy Code" button submit_web_data_button = gr.Button("Extract Web Data") submit_web_data_button.click( fn=extract_web_data, inputs=[web_data_url_input], outputs=[description_output, image_preview_output, colors_output, fonts_output, similar_code_output, videos_output, files_output] ) # Launch the interface demo.launch()