File size: 15,241 Bytes
80fb263
 
0eaed2f
 
4f25906
80fb263
7a18d26
 
 
 
 
 
 
 
 
 
 
 
 
 
aff9dae
80fb263
f7d9daf
80fb263
 
f7d9daf
80fb263
 
 
4f25906
 
 
 
 
 
 
 
 
aff9dae
4f25906
f7d9daf
0eaed2f
4f25906
0eaed2f
 
4e43700
 
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
0eaed2f
4e43700
 
 
 
 
 
 
 
4f25906
 
a682e5d
4f25906
827719f
 
 
 
80fb263
a682e5d
80fb263
 
f7d9daf
48bc3a2
 
 
4f25906
f7d9daf
48bc3a2
f7d9daf
 
 
48bc3a2
66c6476
f7d9daf
66c6476
 
 
 
f7d9daf
4f25906
a682e5d
 
48bc3a2
a682e5d
48bc3a2
aff9dae
d81c533
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06a8cb
 
aff9dae
d06a8cb
 
8b34ee3
 
 
d06a8cb
 
 
 
 
aff9dae
d06a8cb
aff9dae
d06a8cb
 
 
 
aff9dae
d06a8cb
aff9dae
8b34ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe55a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7d9daf
7a18d26
704e96a
 
aff9dae
 
 
 
 
 
 
827719f
aff9dae
 
 
 
 
 
704e96a
aff9dae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d06a8cb
aff9dae
 
 
d06a8cb
aff9dae
d81c533
 
 
 
aff9dae
704e96a
aff9dae
 
704e96a
aff9dae
d81c533
 
 
 
 
 
aff9dae
d06a8cb
 
 
aff9dae
80fb263
8b34ee3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe55a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7d9daf
48bc3a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
import gradio as gr
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import asyncio

# HTML and JavaScript for the "Copy Code" button
copy_button_html = """
<script>
function copyCode(textareaId) {
    const text = document.querySelector(`#${textareaId} textarea`).value;
    navigator.clipboard.writeText(text).then(() => {
        alert("Text copied to clipboard!");
    }).catch(() => {
        alert("Failed to copy text.");
    });
}
</script>
"""

# Common functions
def is_valid_url(url):
    """Checks if the string is a valid URL."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])  # Check for scheme and domain
    except:
        return False

async def fetch_file_content(url):
    """Fetches the content of a file (CSS, JS, etc.) from a URL."""
    try:
        response = await asyncio.to_thread(requests.get, url, timeout=5)
        response.raise_for_status()
        return response.text
    except:
        return "Failed to fetch content."

# URL to Text Converter
async def extract_additional_resources(url):
    """Extracts links to CSS, JS, and images from HTML code."""
    try:
        response = await asyncio.to_thread(requests.get, url, timeout=5)
        response.raise_for_status()

        # Check if the content is HTML
        if 'text/html' in response.headers.get('Content-Type', ''):
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract CSS links (limit to 5)
            css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5]

            # Extract JS links (limit to 5)
            js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5]

            # Extract image links (limit to 5)
            img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5]

            # Fetch CSS and JS content asynchronously
            css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links])
            js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])

            return css_links, js_links, img_links, css_content, js_content
        else:
            # If it's not HTML, treat it as a file
            return [], [], [], [response.text], []
    except Exception as e:
        return [], [], [], [], []

async def convert_to_text(url):
    # Handle view-source: URLs
    if url.startswith("view-source:"):
        url = url[len("view-source:"):]

    if not is_valid_url(url):
        return "Error: Please enter a valid URL.", "", None, [], [], [], [], []  # Return error message and empty data

    try:
        # Set headers to mimic a browser request
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = await asyncio.to_thread(requests.get, url, headers=headers, timeout=5)
        response.raise_for_status()  # Check for HTTP errors (e.g., 404, 500)

        # Return results
        status = f"Request status: {response.status_code}"
        content_length = f"Content size: {len(response.text)} characters"
        results = f"{status}\n{content_length}"

        # Save text content to a file
        file_path = "downloaded_content.txt"
        with open(file_path, "w", encoding="utf-8") as file:
            file.write(response.text)

        # Extract additional resources
        css_links, js_links, img_links, css_content, js_content = await extract_additional_resources(url)

        return results, response.text, file_path, css_links, js_links, img_links, css_content, js_content
    except requests.exceptions.RequestException as e:
        return f"Error: {e}", "", None, [], [], [], [], []  # Return error message and empty data

# Model to Text Converter
async def fetch_model_info(model_url):
    """Fetches model description and installation instructions."""
    try:
        if "huggingface.co" in model_url:
            # Fetch model card from Hugging Face
            response = await asyncio.to_thread(requests.get, model_url, timeout=5)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract model description
            description = soup.find("div", {"class": "prose"}).get_text(strip=True) if soup.find("div", {"class": "prose"}) else "No description available."

            # Generate installation instructions
            model_name = model_url.split("/")[-1]
            install_instructions = f"To install this model, run:\n```bash\npip install transformers\n```\nThen load the model in Python:\n```python\nfrom transformers import AutoModel, AutoTokenizer\nmodel = AutoModel.from_pretrained('{model_name}')\ntokenizer = AutoTokenizer.from_pretrained('{model_name}')\n```"

            return description, install_instructions
        elif "github.com" in model_url:
            # Fetch README from GitHub
            readme_url = f"{model_url}/raw/main/README.md"
            response = await asyncio.to_thread(requests.get, readme_url, timeout=5)
            response.raise_for_status()

            # Extract description from README
            description = response.text if response.text else "No description available."

            # Generate installation instructions
            install_instructions = f"To install this model, clone the repository:\n```bash\ngit clone {model_url}.git\ncd {model_url.split('/')[-1]}\n```"

            return description, install_instructions
        else:
            return "Unsupported repository.", ""
    except Exception as e:
        return f"Error: {e}", ""

async def fetch_model_file_content(model_url, file_path):
    """Fetches the content of a file from a model repository (Hugging Face or GitHub)."""
    try:
        # Construct the full URL to the file
        if "huggingface.co" in model_url:
            # Убираем /blob/main/ из URL, если он есть
            if "/blob/main/" in model_url:
                model_url = model_url.replace("/blob/main/", "/")
            # Hugging Face URL format: https://huggingface.co/{model}/raw/main/{file_path}
            full_url = f"{model_url}/raw/main/{file_path}"
        elif "github.com" in model_url:
            # GitHub URL format: https://github.com/{user}/{repo}/raw/main/{file_path}
            full_url = f"{model_url}/raw/main/{file_path}"
        else:
            return "Error: Unsupported repository."

        # Fetch the file content
        response = await asyncio.to_thread(requests.get, full_url, timeout=5)
        response.raise_for_status()
        return response.text
    except Exception as e:
        return f"Error: {e}"

# Space to Text Converter
async def fetch_space_file_content(space_url, file_path):
    """Fetches the content of a file from a Hugging Face Space."""
    try:
        # Construct the full URL to the file
        if "huggingface.co/spaces" in space_url:
            # Hugging Face Spaces URL format: https://huggingface.co/spaces/{user}/{space}/raw/main/{file_path}
            full_url = f"{space_url}/raw/main/{file_path}"
        else:
            return "Error: Unsupported repository. Please provide a Hugging Face Space URL."

        # Fetch the file content
        response = await asyncio.to_thread(requests.get, full_url, timeout=5)
        response.raise_for_status()
        return response.text
    except Exception as e:
        return f"Error: {e}"

# CodePen to Text Converter
async def fetch_codepen_project(codepen_url):
    """Fetches the HTML, CSS, and JavaScript content from a CodePen project."""
    try:
        # Extract the project ID from the URL
        if "codepen.io" not in codepen_url:
            return "Error: Please enter a valid CodePen URL.", "", ""

        # Fetch the CodePen project page
        response = await asyncio.to_thread(requests.get, codepen_url, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        # Extract HTML, CSS, and JS content
        html_content = soup.find("textarea", {"id": "html-input"}).text if soup.find("textarea", {"id": "html-input"}) else ""
        css_content = soup.find("textarea", {"id": "css-input"}).text if soup.find("textarea", {"id": "css-input"}) else ""
        js_content = soup.find("textarea", {"id": "js-input"}).text if soup.find("textarea", {"id": "js-input"}) else ""

        return html_content, css_content, js_content
    except Exception as e:
        return f"Error: {e}", "", ""

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.HTML(copy_button_html)  # Add the "Copy Code" script

    with gr.Tabs():
        # Tab 1: URL to Text Converter
        with gr.Tab("URL to Text Converter"):
            gr.Markdown("## URL to Text Converter")
            gr.Markdown("Enter a URL to fetch its text content and download it as a .txt file.")
            
            with gr.Row():
                url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com or view-source:https://example.com")
            
            with gr.Row():
                results_output = gr.Textbox(label="Request Results", interactive=False)
                text_output = gr.Textbox(label="Text Content", interactive=True, elem_id="output-text")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"output-text\")'>Copy Code</button>")  # Add the "Copy Code" button
                file_output = gr.File(label="Download File", visible=False)  # Hidden file download component
            
            submit_button = gr.Button("Fetch Content")
            submit_button.click(
                fn=convert_to_text,
                inputs=url_input,
                outputs=[
                    results_output, text_output, file_output,
                    gr.Textbox(label="CSS Files"), gr.Textbox(label="JS Files"), gr.Textbox(label="Images"),
                    gr.Textbox(label="CSS Content"), gr.Textbox(label="JS Content")
                ]
            )

            # Add an Accordion to show/hide additional resources
            with gr.Accordion("Show/Hide Additional Resources", open=False):
                gr.Markdown("### CSS Files")
                css_output = gr.Textbox(label="CSS Files", interactive=False)
                
                gr.Markdown("### JS Files")
                js_output = gr.Textbox(label="JS Files", interactive=False)
                
                gr.Markdown("### Images")
                img_output = gr.Textbox(label="Images", interactive=False)
                
                gr.Markdown("### CSS Content")
                css_content_output = gr.Textbox(label="CSS Content", interactive=True)
                
                gr.Markdown("### JS Content")
                js_content_output = gr.Textbox(label="JS Content", interactive=True)

        # Tab 2: Model to Text Converter
        with gr.Tab("Model to Text Converter"):
            gr.Markdown("## Model to Text Converter")
            gr.Markdown("Enter a link to a model on Hugging Face or GitHub, and specify the file path.")
            
            with gr.Row():
                model_url_input = gr.Textbox(label="Model URL", placeholder="https://huggingface.co/... or https://github.com/...")
                file_path_input = gr.Textbox(label="File Path", placeholder="e.g., config.json or README.md")
            
            with gr.Row():
                model_description_output = gr.Textbox(label="Model Description", interactive=False)
                install_instructions_output = gr.Textbox(label="Installation Instructions", interactive=False)
            
            with gr.Row():
                model_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="model-content-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"model-content-output\")'>Copy Code</button>")  # Add the "Copy Code" button
            
            submit_model_button = gr.Button("Fetch Model Info and File Content")
            submit_model_button.click(
                fn=fetch_model_info,
                inputs=[model_url_input],
                outputs=[model_description_output, install_instructions_output]
            )
            submit_model_button.click(
                fn=fetch_model_file_content,
                inputs=[model_url_input, file_path_input],
                outputs=[model_content_output]
            )

        # Tab 3: Space to Text Converter
        with gr.Tab("Space to Text Converter"):
            gr.Markdown("## Space to Text Converter")
            gr.Markdown("Enter a link to a Hugging Face Space and specify the file path to fetch its content.")
            
            with gr.Row():
                space_url_input = gr.Textbox(label="Space URL", placeholder="https://huggingface.co/spaces/...")
                space_file_path_input = gr.Textbox(label="File Path", placeholder="e.g., app.py or README.md")
            
            with gr.Row():
                space_content_output = gr.Textbox(label="File Content", interactive=True, elem_id="space-content-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"space-content-output\")'>Copy Code</button>")  # Add the "Copy Code" button
            
            submit_space_button = gr.Button("Fetch File Content")
            submit_space_button.click(
                fn=fetch_space_file_content,
                inputs=[space_url_input, space_file_path_input],
                outputs=[space_content_output]
            )

        # Tab 4: CodePen to Text Converter
        with gr.Tab("CodePen to Text Converter"):
            gr.Markdown("## CodePen to Text Converter")
            gr.Markdown("Enter a CodePen project URL to fetch its HTML, CSS, and JavaScript content.")
            
            with gr.Row():
                codepen_url_input = gr.Textbox(label="CodePen URL", placeholder="https://codepen.io/.../pen/...")
            
            with gr.Row():
                html_output = gr.Textbox(label="HTML Content", interactive=True, elem_id="html-output")
                css_output = gr.Textbox(label="CSS Content", interactive=True, elem_id="css-output")
                js_output = gr.Textbox(label="JavaScript Content", interactive=True, elem_id="js-output")
            
            with gr.Row():
                gr.HTML("<button onclick='copyCode(\"html-output\")'>Copy HTML</button>")
                gr.HTML("<button onclick='copyCode(\"css-output\")'>Copy CSS</button>")
                gr.HTML("<button onclick='copyCode(\"js-output\")'>Copy JS</button>")
            
            submit_codepen_button = gr.Button("Fetch CodePen Content")
            submit_codepen_button.click(
                fn=fetch_codepen_project,
                inputs=[codepen_url_input],
                outputs=[html_output, css_output, js_output]
            )

# Launch the interface
demo.launch()