Spaces:

Dragneel
/

Manga_OCR

Running

App Files Files Community

Drag2121 commited on Apr 5

Commit

a81573d

1 Parent(s): 7b25815

first

Browse files

Files changed (15) hide show

.dockerignore +51 -0
Dockerfile +38 -0
README.md +147 -5
app.py +292 -0
client_example.py +188 -0
font/Movistar Text Regular.ttf +0 -0
huggingface-space-metadata.md +28 -0
requirements.txt +24 -0
static/translated/.gitkeep +1 -0
utils/__init__.py +9 -0
utils/image.py +254 -0
utils/ocr.py +181 -0
utils/pdf.py +108 -0
utils/translation.py +270 -0
utils/web.py +78 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,51 @@

+# Git
+.git
+.gitignore
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environment
+venv/
+env/
+ENV/
+fastapi_env/
+# Editor directories and files
+.idea/
+.vscode/
+*.swp
+*.swo
+# Environment variables
+.env
+# Static files (except the font directory)
+static/translated/*
+!static/translated/.gitkeep
+# Logs
+*.log
+# OS specific
+.DS_Store
+Thumbs.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM python:3.9-slim
+# Create user with UID 1000 (required by HF Spaces)
+RUN useradd -m -u 1000 user
+# Set environment variables
+ENV PATH="/home/user/.local/bin:$PATH"
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    poppler-utils \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements and install as user
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy rest of the app with correct permissions
+COPY --chown=user . .
+# Ensure static directories exist
+RUN mkdir -p static/translated
+# Switch to non-root user (required by HF Spaces)
+USER user
+# Expose port 7860 as required by HF Spaces
+EXPOSE 7860
+# Run the FastAPI app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,152 @@
 ---
-title: Manga OCR
-emoji: 🐠
-colorFrom: gray
-colorTo: indigo
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Manga OCR Translator API
+A FastAPI-based API for translating manga images using OCR and machine translation. This application can process manga URLs or PDF files and translate text in speech bubbles using proximity-based speech bubble detection.
+## Features
+- **Manga URL Translation**: Scrapes manga images from a URL and translates text in speech bubbles
+- **PDF Translation**: Extracts pages from a PDF file and translates text in speech bubbles
+- **Streaming Response**: Returns translated images as soon as they're processed
+- **Multiple Translation Engines**: Supports Google Translate, MyMemory, Linguee, and Pollinations.ai
+- **Multiple Languages**: Supports Japanese, Korean, Chinese, and more as source languages
+- **Docker Support**: Easy deployment with Docker
+## API Endpoints
+- **GET `/`**: Basic API information
+- **POST `/translate/url`**: Translate manga from a URL
+- **POST `/translate/pdf`**: Translate manga from a PDF file
+- **GET `/docs`**: Swagger documentation
+## Running Locally
+### Prerequisites
+- Python 3.9+
+- Required system libraries for PDF processing (poppler-utils, libgl1-mesa-glx)
+### Installation
+1. Clone this repository
+2. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+3. Make sure you have the font file in place:
+```
+font/Movistar Text Regular.ttf
+```
+4. Run the server:
+```bash
+uvicorn app:app --reload
+```
+The API will be available at [http://localhost:8000](http://localhost:8000)
+## Docker
+Build and run with Docker:
+```bash
+# Build the image
+docker build -t manga-ocr-api .
+# Run the container
+docker run -p 8000:8000 manga-ocr-api
+```
+## Deploying to Hugging Face Spaces
+1. Create a new Space on [Hugging Face Spaces](https://huggingface.co/spaces)
+2. Choose Docker as the Space SDK
+3. Upload this repository to your Space
+4. The container will be built and deployed automatically
+### Hugging Face Space Configuration
+Create a `README.md` file in your Space with the following information:
+```markdown
 ---
+title: Manga OCR Translator
+emoji: 📚
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
 ---
+# Manga OCR Translator
+Translate manga images from URLs or PDF files using OCR and machine translation.
+```
+## Usage Examples
+### Translating from a URL
+```python
+import requests
+import json
+url = "http://localhost:8000/translate/url"
+payload = {
+    "url": "https://example.com/manga/chapter-1",
+    "src_lang": "ja",
+    "tgt_lang": "en",
+    "translator": "google"
+}
+headers = {"Content-Type": "application/json"}
+response = requests.post(url, json=payload, headers=headers, stream=True)
+for line in response.iter_lines():
+    if line:
+        # Process each line of the server-sent events
+        data = line.decode('utf-8').replace('data: ', '')
+        try:
+            result = json.loads(data)
+            print(result)
+        except json.JSONDecodeError:
+            pass
+```
+### Translating from a PDF
+```python
+import requests
+import json
+url = "http://localhost:8000/translate/pdf"
+files = {"file": open("manga.pdf", "rb")}
+data = {
+    "src_lang": "ja",
+    "tgt_lang": "en",
+    "translator": "google"
+}
+response = requests.post(url, files=files, data=data, stream=True)
+for line in response.iter_lines():
+    if line:
+        # Process each line of the server-sent events
+        data = line.decode('utf-8').replace('data: ', '')
+        try:
+            result = json.loads(data)
+            print(result)
+        except json.JSONDecodeError:
+            pass
+```
+## License
+MIT
+## Acknowledgements
+This project is based on the OCR and translation code from the original Gradio-based manga translator.

app.py ADDED Viewed

	@@ -0,0 +1,292 @@

+import os
+import io
+import uuid
+import asyncio
+import aiohttp
+import uvicorn
+from typing import List, Dict, Any, Optional, Generator
+from fastapi import FastAPI, UploadFile, File, Form, Query, BackgroundTasks
+from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field, HttpUrl
+import time
+from PIL import Image
+# Import utility functions
+from utils.ocr import detect_text, group_text_regions
+from utils.web import scrape_comic_images, download_image
+from utils.pdf import pdf_to_images, pdf_stream_to_images
+from utils.image import overlay_grouped_text, save_image
+from utils.translation import translate_grouped_regions
+# Configuration
+STATIC_DIR = "static"
+TRANSLATED_IMAGE_DIR = os.path.join(STATIC_DIR, "translated")
+FONT_PATH = "font/Movistar Text Regular.ttf"
+# Ensure directories exist
+os.makedirs(TRANSLATED_IMAGE_DIR, exist_ok=True)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Manga OCR Translator API",
+    description="API for translating manga images using OCR and machine translation",
+    version="1.0.0",
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all methods
+    allow_headers=["*"],  # Allow all headers
+)
+# Mount static files directory
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+# Define request and response models
+class TranslationRequest(BaseModel):
+    url: HttpUrl = Field(..., description="URL of the manga chapter to translate")
+    src_lang: str = Field(default="auto", description="Source language (auto, ja, ko, zh)")
+    tgt_lang: str = Field(default="en", description="Target language (en, es, fr, de, it, pt, ru)")
+    translator: str = Field(default="google", description="Translation engine (google, mymemory, linguee, pollinations)")
+class TranslationResponse(BaseModel):
+    status: str
+    message: str
+    images: List[str] = []
+# Basic homepage route
+@app.get("/")
+async def root():
+    return {
+        "message": "Welcome to Manga OCR Translator API",
+        "endpoints": {
+            "/translate/url": "Translate manga from a URL",
+            "/translate/pdf": "Translate manga from a PDF file",
+            "/docs": "API documentation"
+        }
+    }
+# Route for processing manga URL with streaming response
+@app.post("/translate/url")
+async def translate_manga_url(request: TranslationRequest):
+    """
+    Process a manga URL and return translated images with streaming response.
+    Each image is processed and returned as soon as it's ready.
+    """
+    print(f"Received request to translate URL: {request.url}")
+    # Create a generator function that yields translated images
+    async def process_images():
+        try:
+            # Scrape image URLs from the manga page
+            image_urls = scrape_comic_images(str(request.url))
+            if not image_urls:
+                yield f"data: {{'status': 'error', 'message': 'No images found at the URL', 'images': []}}\n\n"
+                return
+            print(f"Found {len(image_urls)} images to process")
+            # Limit to first 5 images if too many
+            if len(image_urls) > 5:
+                print("Limiting to first 5 images")
+                image_urls = image_urls[:5]
+            # Process each image
+            for i, image_url in enumerate(image_urls):
+                try:
+                    # Download image
+                    print(f"Processing image {i+1}/{len(image_urls)}: {image_url}")
+                    # Update client with status
+                    yield f"data: {{'status': 'processing', 'message': 'Processing image {i+1}/{len(image_urls)}', 'image_url': '{image_url}'}}\n\n"
+                    # Download the image
+                    image_content = await download_image(image_url)
+                    if not image_content:
+                        print(f"Failed to download image {i+1}")
+                        continue
+                    # Detect text regions
+                    text_regions = detect_text(image_content, request.src_lang)
+                    if not text_regions:
+                        print(f"No text detected in image {i+1}")
+                        continue
+                    # Group text regions
+                    grouped_regions = group_text_regions(text_regions)
+                    if not grouped_regions:
+                        print(f"No text groups formed in image {i+1}")
+                        continue
+                    # Translate grouped regions
+                    use_pollinations = request.translator == "pollinations"
+                    free_translator = request.translator if not use_pollinations else "google"
+                    translated_regions = translate_grouped_regions(
+                        grouped_regions,
+                        request.src_lang,
+                        request.tgt_lang,
+                        use_pollinations,
+                        free_translator
+                    )
+                    # Overlay translated text on image
+                    translated_image = overlay_grouped_text(image_content, translated_regions)
+                    # Save image and get path
+                    image_path = save_image(translated_image, TRANSLATED_IMAGE_DIR)
+                    # Create a URL to the saved image
+                    image_url = f"/static/translated/{os.path.basename(image_path)}"
+                    # Stream the result back to the client
+                    json_response = {
+                        "status": "success",
+                        "message": f"Processed image {i+1}/{len(image_urls)}",
+                        "image_url": image_url
+                    }
+                    # Send this single image result
+                    yield f"data: {json_response}\n\n"
+                except Exception as e:
+                    print(f"Error processing image {i+1}: {e}")
+                    yield f"data: {{'status': 'error', 'message': 'Error processing image {i+1}: {str(e)}'}}\n\n"
+            # Final message
+            yield f"data: {{'status': 'complete', 'message': 'All images processed'}}\n\n"
+        except Exception as e:
+            print(f"Error in process_images: {e}")
+            yield f"data: {{'status': 'error', 'message': 'Error: {str(e)}'}}\n\n"
+    # Return a streaming response
+    return StreamingResponse(
+        process_images(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable buffering for Nginx
+        }
+    )
+# Route for processing PDF file with streaming response
+@app.post("/translate/pdf")
+async def translate_manga_pdf(
+    file: UploadFile = File(...),
+    src_lang: str = Form("auto"),
+    tgt_lang: str = Form("en"),
+    translator: str = Form("google")
+):
+    """
+    Process a manga PDF file and return translated images with streaming response.
+    Each image is processed and returned as soon as it's ready.
+    """
+    print(f"Received PDF file: {file.filename}, size: {file.size} bytes")
+    # Create a generator function that yields translated images
+    async def process_pdf():
+        try:
+            # Read the PDF file
+            pdf_content = await file.read()
+            # Convert PDF to images
+            yield f"data: {{'status': 'processing', 'message': 'Converting PDF to images...'}}\n\n"
+            # Convert PDF to images in memory
+            pdf_images = await pdf_stream_to_images(pdf_content)
+            if not pdf_images:
+                yield f"data: {{'status': 'error', 'message': 'Failed to extract images from PDF', 'images': []}}\n\n"
+                return
+            print(f"Extracted {len(pdf_images)} pages from PDF")
+            # Limit to first 5 images if too many
+            if len(pdf_images) > 5:
+                print("Limiting to first 5 pages")
+                pdf_images = pdf_images[:5]
+            # Process each image
+            for i, image_content in enumerate(pdf_images):
+                try:
+                    # Update client with status
+                    print(f"Processing PDF page {i+1}/{len(pdf_images)}")
+                    yield f"data: {{'status': 'processing', 'message': 'Processing PDF page {i+1}/{len(pdf_images)}'}}\n\n"
+                    # Detect text regions
+                    text_regions = detect_text(image_content, src_lang)
+                    if not text_regions:
+                        print(f"No text detected in PDF page {i+1}")
+                        continue
+                    # Group text regions
+                    grouped_regions = group_text_regions(text_regions)
+                    if not grouped_regions:
+                        print(f"No text groups formed in PDF page {i+1}")
+                        continue
+                    # Translate grouped regions
+                    use_pollinations = translator == "pollinations"
+                    free_translator = translator if not use_pollinations else "google"
+                    translated_regions = translate_grouped_regions(
+                        grouped_regions,
+                        src_lang,
+                        tgt_lang,
+                        use_pollinations,
+                        free_translator
+                    )
+                    # Overlay translated text on image
+                    pil_image = Image.open(io.BytesIO(image_content))
+                    translated_image = overlay_grouped_text(image_content, translated_regions)
+                    # Save image and get path
+                    image_path = save_image(translated_image, TRANSLATED_IMAGE_DIR)
+                    # Create a URL to the saved image
+                    image_url = f"/static/translated/{os.path.basename(image_path)}"
+                    # Stream the result back to the client
+                    json_response = {
+                        "status": "success",
+                        "message": f"Processed PDF page {i+1}/{len(pdf_images)}",
+                        "image_url": image_url
+                    }
+                    # Send this single image result
+                    yield f"data: {json_response}\n\n"
+                except Exception as e:
+                    print(f"Error processing PDF page {i+1}: {e}")
+                    yield f"data: {{'status': 'error', 'message': 'Error processing PDF page {i+1}: {str(e)}'}}\n\n"
+            # Final message
+            yield f"data: {{'status': 'complete', 'message': 'All PDF pages processed'}}\n\n"
+        except Exception as e:
+            print(f"Error in process_pdf: {e}")
+            yield f"data: {{'status': 'error', 'message': 'Error: {str(e)}'}}\n\n"
+    # Return a streaming response
+    return StreamingResponse(
+        process_pdf(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Disable buffering for Nginx
+        }
+    )
+# Main entry point
+if __name__ == "__main__":
+    print("Starting Manga OCR Translator API server...")
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

client_example.py ADDED Viewed

	@@ -0,0 +1,188 @@

+#!/usr/bin/env python
+"""
+Client example for Manga OCR Translator API
+Demonstrates how to use the API with streaming responses
+"""
+import sys
+import requests
+import json
+import argparse
+import webbrowser
+from urllib.parse import urljoin
+from pprint import pprint
+# Debug print statement
+print("Initializing Manga OCR Translator API client...")
+def process_url(api_url, manga_url, src_lang, tgt_lang, translator):
+    """Process a manga URL and display results with streaming."""
+    endpoint = urljoin(api_url, "translate/url")
+    print(f"Translating manga from URL: {manga_url}")
+    print(f"Source language: {src_lang}, Target language: {tgt_lang}")
+    print(f"Using translator: {translator}")
+    payload = {
+        "url": manga_url,
+        "src_lang": src_lang,
+        "tgt_lang": tgt_lang,
+        "translator": translator
+    }
+    headers = {"Content-Type": "application/json"}
+    # Make the API request with streaming enabled
+    print("\nSending request to API...\n")
+    try:
+        response = requests.post(endpoint, json=payload, headers=headers, stream=True)
+        response.raise_for_status()  # Raise exception for 4XX/5XX status codes
+        # Process the streaming response
+        image_urls = []
+        for line in response.iter_lines():
+            if line:
+                # Process each line of the server-sent events
+                data = line.decode('utf-8').replace('data: ', '')
+                try:
+                    # Parse the JSON data
+                    result = json.loads(data)
+                    # Print status update
+                    if "status" in result:
+                        status = result["status"]
+                        message = result.get("message", "")
+                        print(f"[{status.upper()}] {message}")
+                        # Save image URL if available
+                        if "image_url" in result:
+                            image_url = urljoin(api_url, result["image_url"])
+                            image_urls.append(image_url)
+                            print(f"Image available at: {image_url}")
+                            # Open the first image in a browser
+                            if len(image_urls) == 1:
+                                print("Opening first image in browser...")
+                                webbrowser.open(image_url)
+                except json.JSONDecodeError:
+                    print(f"Warning: Received non-JSON data: {data}")
+        print("\nProcessing complete.")
+        print(f"Total images processed: {len(image_urls)}")
+        return image_urls
+    except requests.exceptions.RequestException as e:
+        print(f"Error: Failed to connect to API: {e}")
+        return []
+def process_pdf(api_url, pdf_path, src_lang, tgt_lang, translator):
+    """Process a manga PDF and display results with streaming."""
+    endpoint = urljoin(api_url, "translate/pdf")
+    print(f"Translating manga from PDF: {pdf_path}")
+    print(f"Source language: {src_lang}, Target language: {tgt_lang}")
+    print(f"Using translator: {translator}")
+    # Prepare files and data for multipart form
+    try:
+        files = {"file": open(pdf_path, "rb")}
+    except FileNotFoundError:
+        print(f"Error: PDF file not found at path: {pdf_path}")
+        return []
+    data = {
+        "src_lang": src_lang,
+        "tgt_lang": tgt_lang,
+        "translator": translator
+    }
+    # Make the API request with streaming enabled
+    print("\nSending request to API...\n")
+    try:
+        response = requests.post(endpoint, files=files, data=data, stream=True)
+        response.raise_for_status()  # Raise exception for 4XX/5XX status codes
+        # Process the streaming response
+        image_urls = []
+        for line in response.iter_lines():
+            if line:
+                # Process each line of the server-sent events
+                data = line.decode('utf-8').replace('data: ', '')
+                try:
+                    # Parse the JSON data
+                    result = json.loads(data)
+                    # Print status update
+                    if "status" in result:
+                        status = result["status"]
+                        message = result.get("message", "")
+                        print(f"[{status.upper()}] {message}")
+                        # Save image URL if available
+                        if "image_url" in result:
+                            image_url = urljoin(api_url, result["image_url"])
+                            image_urls.append(image_url)
+                            print(f"Image available at: {image_url}")
+                            # Open the first image in a browser
+                            if len(image_urls) == 1:
+                                print("Opening first image in browser...")
+                                webbrowser.open(image_url)
+                except json.JSONDecodeError:
+                    print(f"Warning: Received non-JSON data: {data}")
+        print("\nProcessing complete.")
+        print(f"Total images processed: {len(image_urls)}")
+        return image_urls
+    except requests.exceptions.RequestException as e:
+        print(f"Error: Failed to connect to API: {e}")
+        return []
+    finally:
+        # Close the file
+        files["file"].close()
+def main():
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="Manga OCR Translator Client")
+    parser.add_argument("--api-url", default="http://localhost:8000", help="API URL")
+    # Create subparsers for URL and PDF commands
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    # URL command
+    url_parser = subparsers.add_parser("url", help="Translate manga from URL")
+    url_parser.add_argument("manga_url", help="URL of manga chapter to translate")
+    url_parser.add_argument("--src-lang", default="auto", help="Source language (auto, ja, ko, zh)")
+    url_parser.add_argument("--tgt-lang", default="en", help="Target language (en, es, fr, etc.)")
+    url_parser.add_argument("--translator", default="google",
+                            help="Translation engine (google, mymemory, linguee, pollinations)")
+    # PDF command
+    pdf_parser = subparsers.add_parser("pdf", help="Translate manga from PDF")
+    pdf_parser.add_argument("pdf_path", help="Path to PDF file")
+    pdf_parser.add_argument("--src-lang", default="auto", help="Source language (auto, ja, ko, zh)")
+    pdf_parser.add_argument("--tgt-lang", default="en", help="Target language (en, es, fr, etc.)")
+    pdf_parser.add_argument("--translator", default="google",
+                           help="Translation engine (google, mymemory, linguee, pollinations)")
+    args = parser.parse_args()
+    # Debug print args
+    print("Debug: Command line arguments:", args)
+    # Process based on command
+    if args.command == "url":
+        process_url(args.api_url, args.manga_url, args.src_lang, args.tgt_lang, args.translator)
+    elif args.command == "pdf":
+        process_pdf(args.api_url, args.pdf_path, args.src_lang, args.tgt_lang, args.translator)
+    else:
+        print("Error: Please specify a command (url or pdf)")
+        parser.print_help()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

font/Movistar Text Regular.ttf ADDED Viewed

Binary file (57.3 kB). View file

huggingface-space-metadata.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+title: Manga OCR Translator
+emoji: 📚
+colorFrom: indigo
+colorTo: purple
+sdk: docker
+app_port: 8000
+pinned: false
+license: mit
+---
+# Manga OCR Translator
+Translate manga images from URLs or PDF files using OCR and machine translation. The API returns each translated image as soon as it's processed, without waiting for the entire batch to complete.
+## Features
+- Manga URL Translation
+- PDF Translation
+- Multiple Translation Engines
+- Streaming Response
+- Multiple Languages Support
+## API Endpoints
+- `/translate/url` - Translate manga from a URL
+- `/translate/pdf` - Translate manga from a PDF file
+- `/docs` - API documentation

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# FastAPI and web server
+fastapi
+uvicorn
+python-multipart
+# OCR dependencies
+easyocr==1.7.0
+Pillow==10.0.1
+numpy==1.25.2
+# Web scraping and HTTP
+requests==2.31.0
+beautifulsoup4==4.12.2
+# Translation
+deep-translator==1.11.4
+# PDF processing
+pdf2image==1.16.3
+pymupdf==1.23.3  # For PDF processing (alternative to pdf2image)
+# Utilities
+python-dotenv==1.0.0
+uuid==1.30

static/translated/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# Import main utility functions for easier access
+from .ocr import detect_text, group_text_regions
+from .web import scrape_comic_images, download_image
+from .pdf import pdf_to_images, pdf_stream_to_images
+from .image import overlay_grouped_text, save_image
+from .translation import translate_grouped_regions, translate_with_free_translator, translate_with_pollinations
+# Debug print
+print("Initialized utils package.")

utils/image.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+import io
+import uuid
+from PIL import Image, ImageDraw, ImageFont
+from typing import List, Dict, Any, Optional, BinaryIO
+# Font configuration
+FONT_PATH = "font/Movistar Text Regular.ttf"  # This should be available in the Docker image
+# Debug print
+print("Loading image processing module...")
+def calculate_font_size(text: str, max_width: float, max_height: float, font_path: str = FONT_PATH) -> int:
+    """Calculates a suitable font size to fit text within a bounding box."""
+    font_size = int(max_height * 0.8)  # Start with a reasonable guess
+    if font_size <= 0:
+        return 1  # Minimum font size
+    try:
+        font = ImageFont.truetype(font_path, font_size)
+        text_bbox = font.getbbox(text)
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+        # Reduce font size until it fits (simplified approach)
+        while (text_width > max_width or text_height > max_height) and font_size > 5:
+            font_size -= 1
+            font = ImageFont.truetype(font_path, font_size)
+            text_bbox = font.getbbox(text)
+            text_width = text_bbox[2] - text_bbox[0]
+            text_height = text_bbox[3] - text_bbox[1]
+        return max(font_size, 5)  # Ensure a minimum size
+    except IOError:
+        print(f"Warning: Font file not found at {font_path}. Using default PIL font.")
+        # Fallback logic if font file is not found
+        return int(max_height * 0.5)  # Simplified fallback
+    except Exception as e:
+        print(f"Error calculating font size: {e}")
+        return int(max_height * 0.5)  # Simplified fallback
+def overlay_grouped_text(image_content: bytes, translated_grouped_regions: List[dict]) -> Image.Image:
+    """Overlay translated grouped text onto the original image and return the PIL Image."""
+    try:
+        # Debug print
+        print("Overlaying translated text on image...")
+        image = Image.open(io.BytesIO(image_content)).convert("RGBA")
+        draw = ImageDraw.Draw(image)
+        # Sort regions by area (smallest to largest) to ensure smaller bubbles are processed later
+        # This helps with overlapping bubbles, as smaller ones often appear on top
+        sorted_regions = sorted(translated_grouped_regions,
+                              key=lambda r: (r.get("bbox")[2][0] - r.get("bbox")[0][0]) *
+                                          (r.get("bbox")[2][1] - r.get("bbox")[0][1])
+                              if r.get("bbox") else 0)
+        for group in sorted_regions:
+            if "translated_text" not in group or not group.get("is_group", False):
+                print("Skipping non-group or untranslated region in overlay.")
+                continue
+            group_bbox_corners = group["bbox"] # This is the combined bbox for the group
+            translated_text = group["translated_text"]
+            # Extract combined bounding box coordinates [x1, y1, x2, y2]
+            x1, y1 = group_bbox_corners[0] # Top-left
+            x2, y2 = group_bbox_corners[2] # Bottom-right
+            # Basic validation
+            if x1 >= x2 or y1 >= y2:
+                print(f"Warning: Degenerate group bbox found: {group_bbox_corners}. Skipping group.")
+                continue
+            width, height = x2 - x1, y2 - y1
+            if width <= 0 or height <= 0:
+                print(f"Warning: Non-positive dimensions for group bbox: {group_bbox_corners}. Skipping group.")
+                continue
+            # --- Background Clearing ---
+            # Apply a more generous padding to ensure no text from other bubbles bleeds in
+            padding = max(10, int(min(width, height) * 0.1))  # Increased padding for better erasure
+            # For more complete text removal, we'll clear both the group bounding box and each original region
+            # 1. First clear the entire group bounding box with padding
+            for px in range(int(x1 - padding), int(x2 + padding + 1)):
+                for py in range(int(y1 - padding), int(y2 + padding + 1)):
+                    if 0 <= px < image.width and 0 <= py < image.height:
+                        image.putpixel((px, py), (255, 255, 255, 255))  # White background
+            # 2. For more thorough clearing, also clear each original region with its own padding
+            # This helps ensure we catch text that might be outside the main group bbox
+            if "original_regions" in group:
+                for orig_region in group["original_regions"]:
+                    orig_bbox = orig_region["bbox"]
+                    orig_x1, orig_y1 = orig_bbox[0]
+                    orig_x2, orig_y2 = orig_bbox[2]
+                    # Add extra padding specifically for original regions
+                    region_padding = max(8, int(min(orig_x2 - orig_x1, orig_y2 - orig_y1) * 0.15))
+                    # Clear each original region with its own padding
+                    for px in range(int(orig_x1 - region_padding), int(orig_x2 + region_padding + 1)):
+                        for py in range(int(orig_y1 - region_padding), int(orig_y2 + region_padding + 1)):
+                            if 0 <= px < image.width and 0 <= py < image.height:
+                                image.putpixel((px, py), (255, 255, 255, 255))  # White background
+            print(f"Cleared background for text region and {len(group.get('original_regions', []))} original regions")
+            # --- Font Calculation with Wrapping Logic ---
+            # Get an initial font size estimate
+            initial_font_size = calculate_font_size(translated_text, width, height, FONT_PATH)
+            try:
+                font = ImageFont.truetype(FONT_PATH, initial_font_size)
+            except Exception as e:
+                print(f"Error loading font size {initial_font_size}: {e}. Using default.")
+                try:
+                    font = ImageFont.load_default()
+                except Exception as font_e:
+                    print(f"Error loading default font: {font_e}. Cannot draw text.")
+                    continue
+            # Calculate effective drawing area (with reduced width for better aesthetics)
+            effective_width = width * 0.9  # Reduce slightly to avoid text touching edges
+            effective_height = height * 0.9
+            # Determine if text needs wrapping
+            text_lines = []
+            words = translated_text.split()
+            current_line = words[0] if words else ""
+            # Simple word wrapping algorithm
+            for word in words[1:]:
+                test_line = current_line + " " + word
+                # Use getbbox for more accurate width calculation during wrapping check
+                line_bbox_wrap = font.getbbox(test_line)
+                line_width_wrap = line_bbox_wrap[2] - line_bbox_wrap[0]
+                if line_width_wrap <= effective_width:
+                    current_line = test_line
+                else:
+                    text_lines.append(current_line)
+                    current_line = word
+            # Add the last line
+            if current_line:
+                text_lines.append(current_line)
+            # If no lines were created (empty text), skip
+            if not text_lines:
+                continue
+            # --- Font Calculation & Line Height (Robust Spacing) ---
+            # Use getbbox for line height calculation based on a reference string
+            line_bbox_ref = font.getbbox("Tg")
+            line_height_metric = line_bbox_ref[3] - line_bbox_ref[1] # Height of the bbox
+            # Increase spacing significantly - force separation
+            line_spacing_factor = 2.0
+            line_height = line_height_metric * line_spacing_factor
+            print(f"Using bbox height for metric: {line_height_metric:.2f}, Aggressive Line Height ({line_spacing_factor}x): {line_height:.2f}")
+            # Approximate total height for resizing check
+            total_text_height_check = line_height * len(text_lines)
+            # If wrapped text is too tall, recalculate font size
+            if total_text_height_check > effective_height:
+                print(f"Resizing font: Estimated wrapped height ({total_text_height_check:.1f}) > effective height ({effective_height:.1f})")
+                scale_factor = effective_height / total_text_height_check
+                new_font_size = max(6, int(initial_font_size * scale_factor)) # Min size 6pt
+                print(f"Original font size: {initial_font_size}, New font size: {new_font_size}")
+                try:
+                    font = ImageFont.truetype(FONT_PATH, new_font_size)
+                    # Recalculate line height metric and line height with new font
+                    line_bbox_ref = font.getbbox("Tg")
+                    line_height_metric = line_bbox_ref[3] - line_bbox_ref[1]
+                    line_height = line_height_metric * line_spacing_factor # Apply same spacing factor
+                    print(f"Recalculated Aggressive Line Height after resize: {line_height:.2f}")
+                except Exception as e:
+                    print(f"Error loading adjusted font: {e}")
+            # Final font decided. Get its metrics if needed elsewhere, but height is set.
+            print(f"Final line height for drawing: {line_height:.2f}")
+            # --- Draw Text (Robust Top-Left Stacking) ---
+            try:
+                # Calculate vertical starting position for the *top* of the first line
+                total_drawn_height = line_height * len(text_lines) # Total height including full spacing for all lines
+                start_y_top = y1 + (height - total_drawn_height) / 2
+                print(f"Drawing text block: Total Height={total_drawn_height:.1f}, Start Top Y={start_y_top:.1f}")
+                # Draw each line using top-left anchor and explicit vertical step
+                for i, line in enumerate(text_lines):
+                    # Use getlength for precise width if possible
+                    try:
+                        line_width = font.getlength(line)
+                    except AttributeError:
+                        line_bbox_draw = font.getbbox(line, anchor="lt") # Use top-left anchor for bbox width
+                        line_width = line_bbox_draw[2] - line_bbox_draw[0]
+                    draw_x = x1 + (width - line_width) / 2
+                    # Position the *top* of the current line
+                    draw_y_top = start_y_top + (i * line_height)
+                    print(f"  Drawing line {i+1}/{len(text_lines)}: '{line}' at Top-Left ({draw_x:.1f}, {draw_y_top:.1f}) Width={line_width:.1f}")
+                    # Basic bounds check for top-left corner
+                    draw_x = max(padding, min(image.width - padding - line_width, draw_x))
+                    draw_y_top = max(padding, min(image.height - padding - line_height_metric, draw_y_top)) # Check against metric height
+                    # Draw using Pillow's stroke feature with top-left anchor
+                    stroke_width = max(1, int(initial_font_size * 0.08))
+                    draw.text(
+                        (draw_x, draw_y_top),
+                        line,
+                        font=font,
+                        fill="black",
+                        anchor="lt", # Use top-left anchor
+                        stroke_width=stroke_width,
+                        stroke_fill="white"
+                    )
+                print(f"Drew wrapped text ({len(text_lines)} lines) in bbox [{x1:.0f},{y1:.0f} - {x2:.0f},{y2:.0f}]")
+            except Exception as draw_e:
+                print(f"Error drawing text: {draw_e}")
+        # Debug statement to confirm processing is complete
+        print(f"Overlay complete. Processed {len(sorted_regions)} regions.")
+        return image
+    except Exception as e:
+        print(f"Error during image overlay: {e}")
+        import traceback
+        traceback.print_exc()
+        # Return original image in case of error
+        return Image.open(io.BytesIO(image_content))
+def save_image(image: Image.Image, output_dir: str = "static/translated") -> str:
+    """Save the image to the specified directory and return the path."""
+    os.makedirs(output_dir, exist_ok=True)
+    # Generate a unique filename
+    filename = f"{uuid.uuid4()}.png"
+    filepath = os.path.join(output_dir, filename)
+    # Convert to RGB if the image is in RGBA mode
+    if image.mode == "RGBA":
+        image = image.convert("RGB")
+    # Save the image
+    image.save(filepath)
+    print(f"Saved translated image to {filepath}")
+    return filepath

utils/ocr.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import io
+import os
+import numpy as np
+import math
+from PIL import Image
+import easyocr
+from typing import List, Dict, Any, Optional, Tuple
+# Debug print
+print("Loading OCR module...")
+def detect_text(image_content: bytes, language: str) -> List[dict]:
+    """Detect text regions in the image using OCR."""
+    try:
+        # Debug print
+        print(f"Processing image for OCR, language: {language}")
+        image = Image.open(io.BytesIO(image_content)).convert("RGB")
+        image_np = np.array(image)
+        # Initialize OCR reader
+        # Use 'ch_sim' for Simplified Chinese instead of 'zh'
+        lang_list = [lang.strip() for lang in language.split(',')] if language != "auto" else ['ko', 'ch_sim', 'ja', 'en']
+        print(f"Initializing EasyOCR with languages: {lang_list}")
+        reader = easyocr.Reader(lang_list, gpu=False)  # Specify gpu=False if no GPU or CUDA issues
+        # Detect text
+        results = reader.readtext(image_np, detail=1, paragraph=False)  # Process line by line
+        # Process results
+        text_regions = []
+        for bbox, text, conf in results:
+            # bbox is [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]
+            # Ensure bbox coordinates are standard Python numbers
+            bbox_float = [[float(p[0]), float(p[1])] for p in bbox]
+            if conf > 0.3:  # Confidence threshold (adjust as needed)
+                text_regions.append({
+                    "bbox": bbox_float,
+                    "text": text,
+                    "confidence": float(conf)
+                })
+        print(f"Detected {len(text_regions)} text regions.")
+        return text_regions
+    except Exception as e:
+        print(f"Error during OCR detection: {e}")
+        return []
+# Rectangle utility functions for speech bubble detection
+def rect_distance(rect1, rect2):
+    """Calculate the distance between two rectangles (bounding boxes)"""
+    # Convert from [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] format to [x1,y1,x2,y2]
+    r1 = [rect1[0][0], rect1[0][1], rect1[2][0], rect1[2][1]]
+    r2 = [rect2[0][0], rect2[0][1], rect2[2][0], rect2[2][1]]
+    # Check for overlap
+    if (r1[0] <= r2[2] and r2[0] <= r1[2] and r1[1] <= r2[3] and r2[1] <= r1[3]):
+        return 0  # Rectangles overlap
+    # Calculate distances
+    dx = max(0, max(r1[0], r2[0]) - min(r1[2], r2[2]))
+    dy = max(0, max(r1[1], r2[1]) - min(r1[3], r2[3]))
+    # Return Euclidean distance
+    return math.sqrt(dx*dx + dy*dy)
+def rect_center(rect):
+    """Calculate the center point of a rectangle"""
+    # Convert from [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] format
+    x1, y1 = rect[0]
+    x2, y2 = rect[2]
+    return [(x1 + x2) / 2, (y1 + y2) / 2]
+def rect_contains_point(rect, point):
+    """Check if a rectangle contains a point"""
+    # Convert from [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] format
+    x1, y1 = rect[0]
+    x2, y2 = rect[2]
+    px, py = point
+    return x1 <= px <= x2 and y1 <= py <= y2
+def expand_rect(rect1, rect2):
+    """Create a new rectangle that encompasses both input rectangles"""
+    # Convert from [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] format
+    x1_1, y1_1 = rect1[0]
+    x2_1, y2_1 = rect1[2]
+    x1_2, y1_2 = rect2[0]
+    x2_2, y2_2 = rect2[2]
+    # Find the min/max coordinates
+    x1 = min(x1_1, x1_2)
+    y1 = min(y1_1, y1_2)
+    x2 = max(x2_1, x2_2)
+    y2 = max(y2_1, y2_2)
+    # Return in the format [[x1,y1],[x2,y1],[x2,y2],[x1,y2]]
+    return [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
+def is_valid_rect(rect):
+    """Validate rectangle properties"""
+    # Convert from [[x1,y1],[x2,y1],[x2,y2],[x1,y2]] format
+    x1, y1 = rect[0]
+    x2, y2 = rect[2]
+    # Check if width and height are positive
+    return x2 > x1 and y2 > y1
+def group_text_regions(regions: List[Dict], proximity_threshold: float = 100.0) -> List[Dict]:
+    """Groups text regions based on proximity and overlap to identify speech bubbles."""
+    if not regions:
+        return []
+    # Extract bounding boxes from regions
+    bboxes = [region['bbox'] for region in regions]
+    # Dictionary to track which rectangles have been grouped
+    grouped = [False] * len(bboxes)
+    # List to store the grouped rectangles
+    grouped_boxes = []
+    # Group rectangles based on proximity and overlap
+    for i in range(len(bboxes)):
+        if grouped[i]:
+            continue  # Skip if already grouped
+        # Start a new group with this rectangle
+        current_group = bboxes[i]
+        grouped[i] = True
+        # Flag to check if we made any changes in this pass
+        made_changes = True
+        # Keep expanding the group until no more changes
+        while made_changes:
+            made_changes = False
+            for j in range(len(bboxes)):
+                if grouped[j]:
+                    continue  # Skip if already grouped
+                # Check if this rectangle should be added to the current group
+                if rect_distance(current_group, bboxes[j]) < proximity_threshold:
+                    # Expand the current group to include this rectangle
+                    current_group = expand_rect(current_group, bboxes[j])
+                    grouped[j] = True
+                    made_changes = True
+        # Add the final group to our list if it's valid
+        if is_valid_rect(current_group):
+            grouped_boxes.append(current_group)
+    # Now combine text from all regions within each group
+    result_groups = []
+    for group_bbox in grouped_boxes:
+        # Find all regions whose center is within this group
+        group_regions = []
+        group_text = ""
+        for region in regions:
+            center = rect_center(region['bbox'])
+            if rect_contains_point(group_bbox, center):
+                group_regions.append(region)
+                # Add space between text fragments
+                if group_text:
+                    group_text += " "
+                group_text += region['text']
+        # Create the grouped region
+        if group_regions:
+            result_groups.append({
+                "text": group_text,
+                "bbox": group_bbox,
+                "original_regions": group_regions,
+                "is_group": True
+            })
+    # Debug output
+    print(f"Proximity-based grouping: {len(regions)} individual regions into {len(result_groups)} speech bubbles.")
+    return result_groups

utils/pdf.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+import io
+import uuid
+import tempfile
+from typing import List, Optional, Dict, Any, BinaryIO
+import fitz  # PyMuPDF
+from PIL import Image
+import numpy as np
+# Debug print
+print("Loading PDF processing module...")
+async def pdf_to_images(pdf_file: BinaryIO, output_dir: str = None) -> List[str]:
+    """
+    Convert PDF file to a list of image paths.
+    Args:
+        pdf_file: File-like object containing PDF data
+        output_dir: Directory to save the images (optional)
+    Returns:
+        List of image file paths
+    """
+    # If no output directory is provided, use a temporary directory
+    if output_dir is None:
+        temp_dir = tempfile.mkdtemp()
+        output_dir = temp_dir
+        print(f"Using temporary directory for PDF images: {temp_dir}")
+    else:
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"Using provided directory for PDF images: {output_dir}")
+    try:
+        # Debug print
+        print("Processing PDF file...")
+        # Read PDF file content
+        pdf_data = pdf_file.read()
+        # Create a unique subfolder for this PDF to avoid name collisions
+        pdf_id = uuid.uuid4().hex[:8]
+        pdf_output_dir = os.path.join(output_dir, f"pdf_{pdf_id}")
+        os.makedirs(pdf_output_dir, exist_ok=True)
+        # Open PDF document with PyMuPDF
+        pdf_document = fitz.open(stream=pdf_data, filetype="pdf")
+        image_paths = []
+        for page_num in range(len(pdf_document)):
+            # Get the page
+            page = pdf_document.load_page(page_num)
+            # Render page to an image (adjust the matrix for higher resolution if needed)
+            # Default DPI is 72, so matrix=4 gives 288 DPI
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+            # Save the image
+            image_path = os.path.join(pdf_output_dir, f"page_{page_num+1}.png")
+            pix.save(image_path)
+            image_paths.append(image_path)
+            print(f"Saved PDF page {page_num+1} to {image_path}")
+        pdf_document.close()
+        return image_paths
+    except Exception as e:
+        print(f"Error converting PDF to images: {e}")
+        return []
+async def pdf_stream_to_images(pdf_stream: bytes) -> List[bytes]:
+    """
+    Convert PDF binary data to a list of image binary data.
+    Useful for processing PDFs in memory without saving to disk.
+    Args:
+        pdf_stream: PDF file binary data
+    Returns:
+        List of image binary data (bytes)
+    """
+    try:
+        # Debug print
+        print("Processing PDF stream in memory...")
+        # Open PDF document from binary data
+        pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
+        images_data = []
+        for page_num in range(len(pdf_document)):
+            # Get the page
+            page = pdf_document.load_page(page_num)
+            # Render page to an image with 2x resolution (adjust as needed)
+            pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
+            # Convert to PIL Image and then to bytes
+            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+            img_bytes = io.BytesIO()
+            img.save(img_bytes, format="PNG")
+            images_data.append(img_bytes.getvalue())
+            print(f"Processed PDF page {page_num+1} in memory")
+        pdf_document.close()
+        return images_data
+    except Exception as e:
+        print(f"Error converting PDF stream to images: {e}")
+        return []

utils/translation.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import json
+import requests
+from typing import List, Dict, Any, Optional
+from deep_translator import GoogleTranslator, MyMemoryTranslator, LingueeTranslator
+# Debug print
+print("Loading translation module...")
+# Default free translator
+DEFAULT_FREE_TRANSLATOR = "google"
+def translate_with_free_translator(texts: List[str], src_lang: str, target_lang: str,
+                                  translator_type: str = DEFAULT_FREE_TRANSLATOR) -> List[Dict[str, str]]:
+    """Translate texts using available free translation APIs."""
+    if not texts:
+        return []
+    # Debug info
+    print(f"Translating {len(texts)} texts using {translator_type} translator")
+    print(f"Source language: {src_lang}, Target language: {target_lang}")
+    # Standardize language codes for different services
+    lang_map = {
+        # ISO-639 language code mapping for various services
+        "auto": "auto",
+        "en": "en",
+        "zh": "zh-CN",
+        "ja": "ja",
+        "ko": "ko",
+        "es": "es",
+        "fr": "fr",
+        "de": "de",
+        "it": "it",
+        "pt": "pt",
+        "ru": "ru"
+    }
+    # Map to standardized language codes if available, otherwise use as-is
+    std_src_lang = lang_map.get(src_lang, src_lang)
+    std_target_lang = lang_map.get(target_lang, target_lang)
+    translated_results = []
+    try:
+        # Select translator based on specified type
+        if translator_type == "google":
+            # Google Translate (free tier without API key)
+            translator = GoogleTranslator(source=std_src_lang if std_src_lang != "auto" else "auto",
+                                         target=std_target_lang)
+            for text in texts:
+                if not text or len(text.strip()) < 2:
+                    translated_results.append({"original": text, "translated": text})
+                    continue
+                try:
+                    translated = translator.translate(text)
+                    translated_results.append({
+                        "original": text,
+                        "translated": translated or text  # Fallback to original if None
+                    })
+                    print(f"Translated: '{text}' -> '{translated}'")
+                except Exception as e:
+                    print(f"Error translating text '{text}': {e}")
+                    translated_results.append({"original": text, "translated": text})
+        elif translator_type == "mymemory":
+            # MyMemory (free with limits)
+            translator = MyMemoryTranslator(source=std_src_lang if std_src_lang != "auto" else "auto",
+                                           target=std_target_lang)
+            for text in texts:
+                if not text or len(text.strip()) < 2:
+                    translated_results.append({"original": text, "translated": text})
+                    continue
+                try:
+                    translated = translator.translate(text)
+                    translated_results.append({
+                        "original": text,
+                        "translated": translated or text
+                    })
+                    print(f"Translated: '{text}' -> '{translated}'")
+                except Exception as e:
+                    print(f"Error translating text '{text}': {e}")
+                    translated_results.append({"original": text, "translated": text})
+        elif translator_type == "linguee":
+            # Linguee (free)
+            # Note: Linguee has limited language support
+            try:
+                translator = LingueeTranslator(source=std_src_lang, target=std_target_lang)
+                for text in texts:
+                    if not text or len(text.strip()) < 2:
+                        translated_results.append({"original": text, "translated": text})
+                        continue
+                    try:
+                        translated = translator.translate(text)
+                        translated_results.append({
+                            "original": text,
+                            "translated": translated or text
+                        })
+                        print(f"Translated: '{text}' -> '{translated}'")
+                    except Exception as e:
+                        print(f"Error translating text '{text}': {e}")
+                        translated_results.append({"original": text, "translated": text})
+            except Exception as e:
+                print(f"Linguee translator error: {e}. Falling back to Google Translate.")
+                # Fallback to Google Translate
+                return translate_with_free_translator(texts, src_lang, target_lang, "google")
+        else:
+            # Default fallback to Google
+            print(f"Unknown translator type '{translator_type}', using Google Translate as fallback")
+            return translate_with_free_translator(texts, src_lang, target_lang, "google")
+    except Exception as e:
+        print(f"Error setting up translator: {e}")
+        # Return original texts if translation fails
+        for text in texts:
+            translated_results.append({"original": text, "translated": text})
+    return translated_results
+def translate_with_pollinations(texts: List[str], src_lang: str, target_lang: str) -> List[Dict[str, str]]:
+    """Translate texts using Pollinations.ai API."""
+    if not texts:
+        return []
+    try:
+        # Convert language codes to what Pollinations expects
+        lang_map = {
+            "zh": "zh-CN",
+            "ko": "ko",
+            "ja": "ja",
+            "en": "en",
+            "auto": "auto"
+        }
+        # Map our language codes to Pollinations expected codes
+        src_lang_mapped = lang_map.get(src_lang, src_lang)
+        target_lang_mapped = lang_map.get(target_lang, target_lang)
+        # Preparing batch of at least 10 texts for translation
+        batch_texts = texts.copy()
+        while len(batch_texts) < 10:
+            batch_texts.extend(texts[:min(len(texts), 10-len(batch_texts))])
+        # Prepare the system prompt for the translation task
+        system_prompt = f"You are a professional translator. Translate the following texts from {src_lang_mapped} to {target_lang_mapped}. Preserve the meaning, tone, and style of the original text. Return the results in JSON format with 'original' and 'translated' keys for each text."
+        # Create the user prompt with the texts to translate
+        user_prompt = "Translate these texts and return a JSON array with objects containing 'original' and 'translated' properties:\n"
+        for i, text in enumerate(batch_texts):
+            user_prompt += f"{i+1}. {text}\n"
+        # Prepare the API request to Pollinations.ai
+        api_url = "https://api.pollinations.ai/v2/generate/text"
+        headers = {
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": "openai",  # Using OpenAI model as it's good for translation
+            "prompt": user_prompt,
+            "system": system_prompt,
+            "jsonMode": True,  # Request JSON output
+            "reasoning_effort": "high",  # Higher quality translations
+            "private": True,
+            "referrer": "manga_ocr_translator"
+        }
+        print(f"Sending batch of {len(batch_texts)} texts to Pollinations.ai for translation")
+        response = requests.post(api_url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        # Parse the response
+        result = response.json()
+        translated_text = result.get("response", "")
+        # The response should be a JSON string that we need to parse
+        try:
+            translated_data = json.loads(translated_text)
+            # Map the translation results back to the original texts
+            # Create a mapping of original text to its translation
+            translation_map = {}
+            for item in translated_data:
+                if isinstance(item, dict) and "original" in item and "translated" in item:
+                    translation_map[item["original"]] = item["translated"]
+            # Apply translations to our original texts list
+            translated_results = []
+            for text in texts:
+                translated_results.append({
+                    "original": text,
+                    "translated": translation_map.get(text, text)  # Default to original if not found
+                })
+                print(f"Pollinations translation: '{text}' -> '{translation_map.get(text, text)}'")
+            return translated_results
+        except json.JSONDecodeError as e:
+            print(f"Error parsing translation response as JSON: {e}")
+            print(f"Raw response: {translated_text}")
+            # Fallback: Return original texts
+            return [{"original": text, "translated": text} for text in texts]
+    except Exception as e:
+        print(f"Error with Pollinations.ai translation: {e}")
+        # Return original texts as fallback
+        return [{"original": text, "translated": text} for text in texts]
+def translate_grouped_regions(grouped_regions: List[Dict], src_lang: str, target_lang: str, use_pollinations: bool = False,
+                             free_translator: str = DEFAULT_FREE_TRANSLATOR) -> List[Dict]:
+    """Translate text within grouped regions."""
+    if not grouped_regions:
+        return []
+    # Add translated_text to all regions with original text as a fallback
+    for region in grouped_regions:
+        region["translated_text"] = region["text"]  # Default fallback for the group
+    # Extract all texts (already grouped) for translation
+    texts_to_translate = [region["text"] for region in grouped_regions if region["text"] and len(region["text"].strip()) >= 2]
+    if not texts_to_translate:
+        print("No valid grouped texts to translate")
+        return grouped_regions # Return groups with original text as fallback
+    try:
+        print(f"Translating {len(texts_to_translate)} grouped texts from '{src_lang}' to '{target_lang}'...")
+        translation_results = []
+        # Use Pollinations.ai for translation if enabled
+        if use_pollinations:
+            print("Using Pollinations.ai for translation")
+            translation_results = translate_with_pollinations(texts_to_translate, src_lang, target_lang)
+        # Otherwise, use selected free translator
+        else:
+            print(f"Using free translator: {free_translator}")
+            translation_results = translate_with_free_translator(
+                texts_to_translate,
+                src_lang,
+                target_lang,
+                free_translator
+            )
+        # Create a dictionary mapping original grouped text to translated text
+        # Ensure the results match the input order
+        translations_dict = {item["original"]: item["translated"] for item in translation_results}
+        # Apply translations back to the grouped regions
+        for region in grouped_regions:
+            original_text = region["text"]
+            if original_text in translations_dict:
+                region["translated_text"] = translations_dict[original_text]
+                print(f"  Applied translation to group: '{original_text}' -> '{region['translated_text']}'")
+            else:
+                 print(f"  Warning: Translation not found for group text: '{original_text}'") # Should not happen if results map correctly
+        return grouped_regions
+    except Exception as e:
+        print(f"Error during grouped translation setup: {e}")
+        # Fallback already handled by setting original text
+        return grouped_regions

utils/web.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import requests
+import os
+from typing import List, Dict, Any, Optional
+from bs4 import BeautifulSoup
+# Debug print
+print("Loading web scraping module...")
+def scrape_comic_images(url: str) -> List[str]:
+    """Scrape all comic images from the provided URL."""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    try:
+        # Debug print
+        print(f"Scraping manga images from URL: {url}")
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()  # Raise an exception for bad status codes
+        soup = BeautifulSoup(response.content, "html.parser")
+        images = []
+        image_urls = set()  # Use a set to avoid duplicate URLs
+        # Common selectors for manhwa/manhua sites
+        selectors = [
+            ".chapter-content img",
+            ".comic-container img",
+            ".reading-content img",
+            "#readerarea img",
+            ".viewer-container img",
+            "img.comic-panel"
+        ]
+        for selector in selectors:
+            for img in soup.select(selector):
+                src = img.get("src") or img.get("data-src") or img.get("data-original")
+                if src:
+                    # Resolve relative URLs
+                    src = requests.compat.urljoin(url, src.strip())
+                    if src not in image_urls:
+                        images.append(src)
+                        image_urls.add(src)
+        if not images:
+            # Fallback: Find all images if specific selectors fail
+            print("Warning: Specific selectors failed, trying to find all images.")
+            for img in soup.find_all("img"):
+                src = img.get("src") or img.get("data-src") or img.get("data-original")
+                if src:
+                    src = requests.compat.urljoin(url, src.strip())
+                    if src not in image_urls:
+                        images.append(src)
+                        image_urls.add(src)
+        print(f"Found {len(images)} manga images.")
+        if not images:
+            raise ValueError("Could not find any images on the page using common selectors.")
+        return images
+    except Exception as e:
+        print(f"Error scraping comic images: {e}")
+        return []
+async def download_image(image_url: str) -> Optional[bytes]:
+    """Download an image from the provided URL."""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+    }
+    try:
+        print(f"Downloading image: {image_url}")
+        response = requests.get(image_url, headers=headers, timeout=15)
+        response.raise_for_status()
+        return response.content
+    except Exception as e:
+        print(f"Error downloading image {image_url}: {e}")
+        return None