Spaces:
Sleeping
Sleeping
Commit
Β·
de68d43
1
Parent(s):
6f8bc75
version 1.0
Browse files- .gitattributes +35 -0
- .gitignore +132 -0
- Dockerfile +39 -21
- README.md +11 -0
- {screenshot β app}/__init__.py +0 -0
- {screenshot/routers β app/core}/__init__.py +0 -0
- app/core/service/__init__.py +0 -0
- app/core/service/playwright/__init__.py +5 -0
- app/core/service/playwright/models.py +39 -0
- app/core/service/playwright/playwright_context.py +103 -0
- {screenshot β app}/main.py +4 -2
- app/routers/__init__.py +0 -0
- app/routers/screenshot/__init__.py +6 -0
- app/routers/screenshot/screenshot.py +30 -0
- pyproject.toml +5 -3
- requirements-dev.txt +1 -0
- requirements.txt +4 -0
- screenshot/routers/screenshot.py +0 -93
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# requirements dev
|
| 2 |
+
.ruff_cache
|
| 3 |
+
|
| 4 |
+
# Byte-compiled / optimized / DLL files
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
|
| 9 |
+
# C extensions
|
| 10 |
+
*.so
|
| 11 |
+
|
| 12 |
+
# Distribution / packaging
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
pip-wheel-metadata/
|
| 27 |
+
share/python-wheels/
|
| 28 |
+
*.egg-info/
|
| 29 |
+
.installed.cfg
|
| 30 |
+
*.egg
|
| 31 |
+
MANIFEST
|
| 32 |
+
|
| 33 |
+
# PyInstaller
|
| 34 |
+
# Usually these files are written by a python script from a template
|
| 35 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 36 |
+
*.manifest
|
| 37 |
+
*.spec
|
| 38 |
+
|
| 39 |
+
# Installer logs
|
| 40 |
+
pip-log.txt
|
| 41 |
+
pip-delete-this-directory.txt
|
| 42 |
+
|
| 43 |
+
# Unit test / coverage reports
|
| 44 |
+
htmlcov/
|
| 45 |
+
.tox/
|
| 46 |
+
.nox/
|
| 47 |
+
.coverage
|
| 48 |
+
.coverage.*
|
| 49 |
+
.cache
|
| 50 |
+
nosetests.xml
|
| 51 |
+
coverage.xml
|
| 52 |
+
*.cover
|
| 53 |
+
*.py,cover
|
| 54 |
+
.hypothesis/
|
| 55 |
+
.pytest_cache/
|
| 56 |
+
|
| 57 |
+
# Translations
|
| 58 |
+
*.mo
|
| 59 |
+
*.pot
|
| 60 |
+
|
| 61 |
+
# Django stuff:
|
| 62 |
+
*.log
|
| 63 |
+
local_settings.py
|
| 64 |
+
db.sqlite3
|
| 65 |
+
db.sqlite3-journal
|
| 66 |
+
|
| 67 |
+
# Flask stuff:
|
| 68 |
+
instance/
|
| 69 |
+
.webassets-cache
|
| 70 |
+
|
| 71 |
+
# Scrapy stuff:
|
| 72 |
+
.scrapy
|
| 73 |
+
|
| 74 |
+
# Sphinx documentation
|
| 75 |
+
docs/_build/
|
| 76 |
+
|
| 77 |
+
# PyBuilder
|
| 78 |
+
target/
|
| 79 |
+
|
| 80 |
+
# Jupyter Notebook
|
| 81 |
+
.ipynb_checkpoints
|
| 82 |
+
|
| 83 |
+
# IPython
|
| 84 |
+
profile_default/
|
| 85 |
+
ipython_config.py
|
| 86 |
+
|
| 87 |
+
# pyenv
|
| 88 |
+
.python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
| 98 |
+
__pypackages__/
|
| 99 |
+
|
| 100 |
+
# Celery stuff
|
| 101 |
+
celerybeat-schedule
|
| 102 |
+
celerybeat.pid
|
| 103 |
+
|
| 104 |
+
# SageMath parsed files
|
| 105 |
+
*.sage.py
|
| 106 |
+
|
| 107 |
+
# Environments
|
| 108 |
+
.env
|
| 109 |
+
.venv
|
| 110 |
+
env/
|
| 111 |
+
venv/
|
| 112 |
+
ENV/
|
| 113 |
+
env.bak/
|
| 114 |
+
venv.bak/
|
| 115 |
+
|
| 116 |
+
# Spyder project settings
|
| 117 |
+
.spyderproject
|
| 118 |
+
.spyproject
|
| 119 |
+
|
| 120 |
+
# Rope project settings
|
| 121 |
+
.ropeproject
|
| 122 |
+
|
| 123 |
+
# mkdocs documentation
|
| 124 |
+
/site
|
| 125 |
+
|
| 126 |
+
# mypy
|
| 127 |
+
.mypy_cache/
|
| 128 |
+
.dmypy.json
|
| 129 |
+
dmypy.json
|
| 130 |
+
|
| 131 |
+
# Pyre type checker
|
| 132 |
+
.pyre/
|
Dockerfile
CHANGED
|
@@ -1,24 +1,42 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
ENV VIRTUAL_ENV=/app/venv
|
| 7 |
-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
| 8 |
-
|
| 9 |
COPY requirements.txt .
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
WORKDIR /app
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Stage 1: Build
|
| 2 |
+
FROM python:3.11-slim as builder
|
| 3 |
+
|
| 4 |
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Copy requirements.txt
|
|
|
|
|
|
|
|
|
|
| 7 |
COPY requirements.txt .
|
| 8 |
+
|
| 9 |
+
# Install dependencies
|
| 10 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
+
|
| 12 |
+
# Copy everything
|
| 13 |
+
COPY app app
|
| 14 |
+
|
| 15 |
+
# Stage 2: Production
|
| 16 |
+
FROM python:3.11-slim as production
|
| 17 |
+
|
| 18 |
WORKDIR /app
|
| 19 |
+
|
| 20 |
+
# Copy installed modules from the builder stage
|
| 21 |
+
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
|
| 22 |
+
|
| 23 |
+
# Install playwright dependencies
|
| 24 |
+
RUN python -m playwright install-deps && \
|
| 25 |
+
# Clean up
|
| 26 |
+
rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* /usr/lib/x86_64-linux-gnu/mfx/* && \
|
| 27 |
+
# Create non-root user
|
| 28 |
+
useradd --home-dir /app --shell /bin/sh zaws && \
|
| 29 |
+
chown -R zaws:zaws .
|
| 30 |
+
|
| 31 |
+
# Switch to non-root user
|
| 32 |
+
USER zaws
|
| 33 |
+
|
| 34 |
+
# Installfirefox inside non-root
|
| 35 |
+
# Do not install firefox in root to avoid permission error otherwise you need to give zaws permission to access installation path
|
| 36 |
+
RUN python -m playwright install firefox
|
| 37 |
+
|
| 38 |
+
# Copy application code from the builder stage
|
| 39 |
+
COPY --from=builder /app /app
|
| 40 |
+
|
| 41 |
+
# Run application
|
| 42 |
+
CMD ["python", "app/main.py"]
|
README.md
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Zawshoot
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: indigo
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
{screenshot β app}/__init__.py
RENAMED
|
File without changes
|
{screenshot/routers β app/core}/__init__.py
RENAMED
|
File without changes
|
app/core/service/__init__.py
ADDED
|
File without changes
|
app/core/service/playwright/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ruff: noqa: F401
|
| 2 |
+
from .playwright_context import (
|
| 3 |
+
AsyncPlaywrightContext,
|
| 4 |
+
TimeoutError,
|
| 5 |
+
)
|
app/core/service/playwright/models.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Literal
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field, HttpUrl, ValidationInfo, field_validator
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ViewPortModel(BaseModel):
|
| 9 |
+
width: int = 1280
|
| 10 |
+
height: int = 720
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class PageModel(BaseModel):
|
| 14 |
+
color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
|
| 15 |
+
java_script_enabled: bool | None = True
|
| 16 |
+
viewport: ViewPortModel | None = None
|
| 17 |
+
proxy: dict | None = None
|
| 18 |
+
no_viewport: bool | None = False
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class GetContentModel(BaseModel):
|
| 22 |
+
url: HttpUrl
|
| 23 |
+
query_selector: str | None = None
|
| 24 |
+
wait_selector: bool | None = False
|
| 25 |
+
ms_delay: float = Field(default=0.0, gt=-1, lt=15_000.1)
|
| 26 |
+
|
| 27 |
+
@field_validator("wait_selector")
|
| 28 |
+
@classmethod
|
| 29 |
+
def check_query_selector(cls, v: bool | None, info: ValidationInfo) -> bool | None:
|
| 30 |
+
if v is True and info.data.get("query_selector") is None:
|
| 31 |
+
msg = "wait_selector cannot be set to True without specifying a query_selector"
|
| 32 |
+
raise ValueError(
|
| 33 |
+
msg,
|
| 34 |
+
)
|
| 35 |
+
return v
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class ScreenshotModel(GetContentModel):
|
| 39 |
+
full_page: bool | None = False
|
app/core/service/playwright/playwright_context.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import TYPE_CHECKING, AsyncContextManager
|
| 4 |
+
|
| 5 |
+
from playwright.async_api import ( # noqa: F401
|
| 6 |
+
Browser,
|
| 7 |
+
BrowserContext,
|
| 8 |
+
Page,
|
| 9 |
+
TimeoutError,
|
| 10 |
+
async_playwright,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from .models import BrowserModel, GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
|
| 14 |
+
|
| 15 |
+
if TYPE_CHECKING:
|
| 16 |
+
from types import TracebackType
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class AsyncPlaywrightContext:
|
| 20 |
+
async def __aenter__(self) -> AsyncContextManager:
|
| 21 |
+
self.playwright = await async_playwright().start()
|
| 22 |
+
self.browser = await self.playwright.firefox.launch(
|
| 23 |
+
firefox_user_prefs={
|
| 24 |
+
"extensions.enabledScopes": 1,
|
| 25 |
+
"extensions.autoDisableScopes": 1,
|
| 26 |
+
"dom.webdriver.enabled": False,
|
| 27 |
+
"useAutomationExtension": False,
|
| 28 |
+
"general.useragent.override": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", # noqa: E501
|
| 29 |
+
},
|
| 30 |
+
)
|
| 31 |
+
return self
|
| 32 |
+
|
| 33 |
+
async def new_browser_page(self, browser: Browser, page_model: PageModel) -> Page:
|
| 34 |
+
return await browser.new_page(
|
| 35 |
+
color_scheme=page_model.color_scheme,
|
| 36 |
+
java_script_enabled=page_model.java_script_enabled,
|
| 37 |
+
no_viewport=page_model.no_viewport,
|
| 38 |
+
proxy=page_model.proxy.model_dump() if page_model.proxy else None,
|
| 39 |
+
viewport=page_model.viewport.model_dump() if page_model.viewport else None,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
async def screenshot(
|
| 43 |
+
self,
|
| 44 |
+
screenshot_model: ScreenshotModel,
|
| 45 |
+
page_model: PageModel,
|
| 46 |
+
) -> bytes:
|
| 47 |
+
page = await self.new_browser_page(browser=self.browser, page_model=page_model)
|
| 48 |
+
|
| 49 |
+
await page.goto(str(screenshot_model.url))
|
| 50 |
+
|
| 51 |
+
await page.wait_for_timeout(screenshot_model.ms_delay)
|
| 52 |
+
|
| 53 |
+
screenshot_locator = (
|
| 54 |
+
page.locator(screenshot_model.query_selector)
|
| 55 |
+
if screenshot_model.query_selector
|
| 56 |
+
else None
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
if screenshot_locator:
|
| 60 |
+
if screenshot_model.wait_selector:
|
| 61 |
+
await screenshot_locator.wait_for()
|
| 62 |
+
screenshot_data: bytes = await screenshot_locator.screenshot()
|
| 63 |
+
else:
|
| 64 |
+
screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)
|
| 65 |
+
|
| 66 |
+
await page.close()
|
| 67 |
+
return screenshot_data
|
| 68 |
+
|
| 69 |
+
async def get_content(
|
| 70 |
+
self,
|
| 71 |
+
get_content_model: GetContentModel,
|
| 72 |
+
browser_model: BrowserModel,
|
| 73 |
+
) -> str:
|
| 74 |
+
browser_context = await self.new_browser_context(
|
| 75 |
+
browser=self.browser,
|
| 76 |
+
browser_model=browser_model,
|
| 77 |
+
)
|
| 78 |
+
page = browser_context.new_page()
|
| 79 |
+
await page.goto(str(get_content_model.url))
|
| 80 |
+
await page.wait_for_timeout(get_content_model.ms_delay)
|
| 81 |
+
|
| 82 |
+
wait_locator = (
|
| 83 |
+
await page.locator(get_content_model.query_selector)
|
| 84 |
+
if get_content_model.query_selector
|
| 85 |
+
else None
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
if wait_locator:
|
| 89 |
+
await wait_locator.wait_for()
|
| 90 |
+
html = page.content()
|
| 91 |
+
await page.close()
|
| 92 |
+
return html
|
| 93 |
+
|
| 94 |
+
async def __aexit__(
|
| 95 |
+
self,
|
| 96 |
+
typ: type[BaseException] | None,
|
| 97 |
+
exc: BaseException | None,
|
| 98 |
+
tb: TracebackType | None,
|
| 99 |
+
) -> None:
|
| 100 |
+
if self.browser:
|
| 101 |
+
await self.browser.close()
|
| 102 |
+
if self.playwright:
|
| 103 |
+
await self.playwright.stop()
|
{screenshot β app}/main.py
RENAMED
|
@@ -1,10 +1,12 @@
|
|
|
|
|
|
|
|
| 1 |
import uvicorn
|
| 2 |
from fastapi import FastAPI
|
| 3 |
-
from routers.screenshot import
|
| 4 |
|
| 5 |
app = FastAPI()
|
| 6 |
|
| 7 |
app.include_router(screenshot_router)
|
| 8 |
|
| 9 |
if __name__ == "__main__":
|
| 10 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
import uvicorn
|
| 4 |
from fastapi import FastAPI
|
| 5 |
+
from routers.screenshot import router as screenshot_router
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
| 9 |
app.include_router(screenshot_router)
|
| 10 |
|
| 11 |
if __name__ == "__main__":
|
| 12 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", default="7860"))) # noqa: S104
|
app/routers/__init__.py
ADDED
|
File without changes
|
app/routers/screenshot/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
|
| 3 |
+
from .screenshot import router as screenshot_router
|
| 4 |
+
|
| 5 |
+
router = APIRouter()
|
| 6 |
+
router.include_router(screenshot_router)
|
app/routers/screenshot/screenshot.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
from core.service.playwright import AsyncPlaywrightContext, TimeoutError
|
| 6 |
+
from core.service.playwright.models import PageModel, ScreenshotModel
|
| 7 |
+
from fastapi import APIRouter, HTTPException
|
| 8 |
+
from fastapi.responses import Response
|
| 9 |
+
|
| 10 |
+
router = APIRouter()
|
| 11 |
+
playwright_context = AsyncPlaywrightContext()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@router.post("/screenshot", tags=["V1"])
|
| 15 |
+
async def screenshot(
|
| 16 |
+
screenshot_model: ScreenshotModel,
|
| 17 |
+
page_model: PageModel | None = None,
|
| 18 |
+
) -> Response:
|
| 19 |
+
page_model = page_model if page_model else PageModel()
|
| 20 |
+
async with playwright_context as sc:
|
| 21 |
+
try:
|
| 22 |
+
response = await sc.screenshot(screenshot_model=screenshot_model, page_model=page_model)
|
| 23 |
+
return Response(content=response, media_type="image/png")
|
| 24 |
+
except TimeoutError as e:
|
| 25 |
+
status_code = 504 if isinstance(e, TimeoutError) else 400
|
| 26 |
+
detail = f"An error occurred while generating the screenshot: {e}"
|
| 27 |
+
raise HTTPException(status_code=status_code, detail=detail) from e
|
| 28 |
+
except Exception as e:
|
| 29 |
+
logging.exception("Unhandled error in screenshot generation")
|
| 30 |
+
raise HTTPException(status_code=500, detail="An unexpected error occurred") from e
|
pyproject.toml
CHANGED
|
@@ -5,7 +5,9 @@ line-length = 100
|
|
| 5 |
select = ["ALL"]
|
| 6 |
ignore = [
|
| 7 |
"CPY001", # copyright above code
|
| 8 |
-
"
|
|
|
|
|
|
|
| 9 |
]
|
| 10 |
|
| 11 |
[tool.mypy]
|
|
@@ -14,5 +16,5 @@ show_error_codes = true
|
|
| 14 |
no_implicit_optional = true
|
| 15 |
warn_return_any = true
|
| 16 |
warn_unused_ignores = true
|
| 17 |
-
|
| 18 |
-
|
|
|
|
| 5 |
select = ["ALL"]
|
| 6 |
ignore = [
|
| 7 |
"CPY001", # copyright above code
|
| 8 |
+
"ANN101", # self annotations
|
| 9 |
+
"ANN102", # cls annotations
|
| 10 |
+
"D", # sphinx not support
|
| 11 |
]
|
| 12 |
|
| 13 |
[tool.mypy]
|
|
|
|
| 16 |
no_implicit_optional = true
|
| 17 |
warn_return_any = true
|
| 18 |
warn_unused_ignores = true
|
| 19 |
+
python_version = "3.10"
|
| 20 |
+
plugins = ["pydantic.mypy"]
|
requirements-dev.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ruff==0.3.3
|
requirements.txt
CHANGED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pydantic==2.6.4
|
| 2 |
+
fastapi==0.110.0
|
| 3 |
+
playwright==1.42.0
|
| 4 |
+
uvicorn==0.28.0
|
screenshot/routers/screenshot.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import logging
|
| 4 |
-
from typing import IO, TYPE_CHECKING, AsyncContextManager, Literal
|
| 5 |
-
|
| 6 |
-
from fastapi import APIRouter, HTTPException
|
| 7 |
-
from fastapi.responses import Response
|
| 8 |
-
from playwright.async_api import BrowserContext, TimeoutError, async_playwright
|
| 9 |
-
from pydantic import BaseModel, HttpUrl
|
| 10 |
-
|
| 11 |
-
if TYPE_CHECKING:
|
| 12 |
-
from types import TracebackType
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
router = APIRouter()
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class ViewPort(BaseModel):
|
| 19 |
-
width: int = 1280
|
| 20 |
-
height: int = 720
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
class ScreenshotItems(BaseModel):
|
| 24 |
-
url: HttpUrl
|
| 25 |
-
full_page: bool | None = False
|
| 26 |
-
query_selector: str | None = None
|
| 27 |
-
|
| 28 |
-
viewport: ViewPort | None = None
|
| 29 |
-
color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
|
| 30 |
-
bypass_csp: bool | None = False
|
| 31 |
-
java_script_enabled: bool | None = True
|
| 32 |
-
proxy: dict | None = None
|
| 33 |
-
is_mobile: bool | None = False
|
| 34 |
-
no_viewport: bool | None = False
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
class ScreenShot:
|
| 38 |
-
async def __aenter__(self) -> AsyncContextManager[ScreenShot]:
|
| 39 |
-
self.playwright = await async_playwright().start()
|
| 40 |
-
self.browser = await self.playwright.chromium.launch(
|
| 41 |
-
args=["--disable-extensions"],
|
| 42 |
-
chromium_sandbox=True,
|
| 43 |
-
)
|
| 44 |
-
return self
|
| 45 |
-
|
| 46 |
-
async def browser_context(self, items: ScreenshotItems) -> BrowserContext:
|
| 47 |
-
return await self.browser.new_context(
|
| 48 |
-
viewport=items.viewport.model_dump() if items.viewport else None,
|
| 49 |
-
color_scheme=items.color_scheme,
|
| 50 |
-
bypass_csp=items.bypass_csp,
|
| 51 |
-
java_script_enabled=items.java_script_enabled,
|
| 52 |
-
proxy=items.proxy.model_dump() if items.proxy else None,
|
| 53 |
-
is_mobile=items.is_mobile,
|
| 54 |
-
no_viewport=items.no_viewport,
|
| 55 |
-
)
|
| 56 |
-
|
| 57 |
-
async def capture(self, items: ScreenshotItems) -> IO[bytes]:
|
| 58 |
-
context: BrowserContext = await self.browser_context(items)
|
| 59 |
-
page = await context.new_page()
|
| 60 |
-
await page.goto(str(items.url))
|
| 61 |
-
|
| 62 |
-
if items.query_selector:
|
| 63 |
-
page = page.locator(items.query_selector)
|
| 64 |
-
|
| 65 |
-
screenshot_data = await page.screenshot(full_page=items.full_page)
|
| 66 |
-
await context.close()
|
| 67 |
-
return screenshot_data
|
| 68 |
-
|
| 69 |
-
async def __aexit__(
|
| 70 |
-
self,
|
| 71 |
-
typ: type[BaseException] | None,
|
| 72 |
-
exc: BaseException | None,
|
| 73 |
-
tb: TracebackType | None,
|
| 74 |
-
) -> None:
|
| 75 |
-
if self.browser:
|
| 76 |
-
await self.browser.close()
|
| 77 |
-
if self.playwright:
|
| 78 |
-
await self.playwright.stop()
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
@router.post("/screenshot")
|
| 82 |
-
async def screenshot(data: ScreenshotItems) -> Response:
|
| 83 |
-
async with ScreenShot() as sc:
|
| 84 |
-
try:
|
| 85 |
-
response = await sc.capture(items=data)
|
| 86 |
-
return Response(content=response, media_type="image/png")
|
| 87 |
-
except TimeoutError as e:
|
| 88 |
-
raise HTTPException(
|
| 89 |
-
status_code=504,
|
| 90 |
-
detail=f"An error occurred while generating the screenshot: {e}",
|
| 91 |
-
) from e
|
| 92 |
-
except Exception:
|
| 93 |
-
logging.exception("screenshot unhandled error")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|