SilentWraith commited on
Commit
de68d43
β€’
1 Parent(s): 6f8bc75

version 1.0

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # requirements dev
2
+ .ruff_cache
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
Dockerfile CHANGED
@@ -1,24 +1,42 @@
1
- FROM python:3.11 AS builder
2
-
 
3
  WORKDIR /app
4
-
5
- RUN python3 -m venv venv
6
- ENV VIRTUAL_ENV=/app/venv
7
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
8
-
9
  COPY requirements.txt .
10
- RUN pip install -r requirements.txt
11
-
12
- # Stage 2
13
- FROM python:3.11 AS runner
14
-
 
 
 
 
 
15
  WORKDIR /app
16
-
17
- COPY --from=builder /app/venv venv
18
-
19
- ENV VIRTUAL_ENV=/app/venv
20
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
21
-
22
- EXPOSE 8000
23
-
24
- CMD [ "python screenshot/main.py" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build
2
+ FROM python:3.11-slim as builder
3
+
4
  WORKDIR /app
5
+
6
+ # Copy requirements.txt
 
 
 
7
  COPY requirements.txt .
8
+
9
+ # Install dependencies
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Copy everything
13
+ COPY app app
14
+
15
+ # Stage 2: Production
16
+ FROM python:3.11-slim as production
17
+
18
  WORKDIR /app
19
+
20
+ # Copy installed modules from the builder stage
21
+ COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
22
+
23
+ # Install playwright dependencies
24
+ RUN python -m playwright install-deps && \
25
+ # Clean up
26
+ rm -f /usr/lib/x86_64-linux-gnu/libmfxhw* /usr/lib/x86_64-linux-gnu/mfx/* && \
27
+ # Create non-root user
28
+ useradd --home-dir /app --shell /bin/sh zaws && \
29
+ chown -R zaws:zaws .
30
+
31
+ # Switch to non-root user
32
+ USER zaws
33
+
34
+ # Installfirefox inside non-root
35
+ # Do not install firefox in root to avoid permission error otherwise you need to give zaws permission to access installation path
36
+ RUN python -m playwright install firefox
37
+
38
+ # Copy application code from the builder stage
39
+ COPY --from=builder /app /app
40
+
41
+ # Run application
42
+ CMD ["python", "app/main.py"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Zawshoot
3
+ emoji: πŸ“ˆ
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
{screenshot β†’ app}/__init__.py RENAMED
File without changes
{screenshot/routers β†’ app/core}/__init__.py RENAMED
File without changes
app/core/service/__init__.py ADDED
File without changes
app/core/service/playwright/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # ruff: noqa: F401
2
+ from .playwright_context import (
3
+ AsyncPlaywrightContext,
4
+ TimeoutError,
5
+ )
app/core/service/playwright/models.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field, HttpUrl, ValidationInfo, field_validator
6
+
7
+
8
+ class ViewPortModel(BaseModel):
9
+ width: int = 1280
10
+ height: int = 720
11
+
12
+
13
+ class PageModel(BaseModel):
14
+ color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
15
+ java_script_enabled: bool | None = True
16
+ viewport: ViewPortModel | None = None
17
+ proxy: dict | None = None
18
+ no_viewport: bool | None = False
19
+
20
+
21
+ class GetContentModel(BaseModel):
22
+ url: HttpUrl
23
+ query_selector: str | None = None
24
+ wait_selector: bool | None = False
25
+ ms_delay: float = Field(default=0.0, gt=-1, lt=15_000.1)
26
+
27
+ @field_validator("wait_selector")
28
+ @classmethod
29
+ def check_query_selector(cls, v: bool | None, info: ValidationInfo) -> bool | None:
30
+ if v is True and info.data.get("query_selector") is None:
31
+ msg = "wait_selector cannot be set to True without specifying a query_selector"
32
+ raise ValueError(
33
+ msg,
34
+ )
35
+ return v
36
+
37
+
38
+ class ScreenshotModel(GetContentModel):
39
+ full_page: bool | None = False
app/core/service/playwright/playwright_context.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, AsyncContextManager
4
+
5
+ from playwright.async_api import ( # noqa: F401
6
+ Browser,
7
+ BrowserContext,
8
+ Page,
9
+ TimeoutError,
10
+ async_playwright,
11
+ )
12
+
13
+ from .models import BrowserModel, GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
14
+
15
+ if TYPE_CHECKING:
16
+ from types import TracebackType
17
+
18
+
19
+ class AsyncPlaywrightContext:
20
+ async def __aenter__(self) -> AsyncContextManager:
21
+ self.playwright = await async_playwright().start()
22
+ self.browser = await self.playwright.firefox.launch(
23
+ firefox_user_prefs={
24
+ "extensions.enabledScopes": 1,
25
+ "extensions.autoDisableScopes": 1,
26
+ "dom.webdriver.enabled": False,
27
+ "useAutomationExtension": False,
28
+ "general.useragent.override": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", # noqa: E501
29
+ },
30
+ )
31
+ return self
32
+
33
+ async def new_browser_page(self, browser: Browser, page_model: PageModel) -> Page:
34
+ return await browser.new_page(
35
+ color_scheme=page_model.color_scheme,
36
+ java_script_enabled=page_model.java_script_enabled,
37
+ no_viewport=page_model.no_viewport,
38
+ proxy=page_model.proxy.model_dump() if page_model.proxy else None,
39
+ viewport=page_model.viewport.model_dump() if page_model.viewport else None,
40
+ )
41
+
42
+ async def screenshot(
43
+ self,
44
+ screenshot_model: ScreenshotModel,
45
+ page_model: PageModel,
46
+ ) -> bytes:
47
+ page = await self.new_browser_page(browser=self.browser, page_model=page_model)
48
+
49
+ await page.goto(str(screenshot_model.url))
50
+
51
+ await page.wait_for_timeout(screenshot_model.ms_delay)
52
+
53
+ screenshot_locator = (
54
+ page.locator(screenshot_model.query_selector)
55
+ if screenshot_model.query_selector
56
+ else None
57
+ )
58
+
59
+ if screenshot_locator:
60
+ if screenshot_model.wait_selector:
61
+ await screenshot_locator.wait_for()
62
+ screenshot_data: bytes = await screenshot_locator.screenshot()
63
+ else:
64
+ screenshot_data: bytes = await page.screenshot(full_page=screenshot_model.full_page)
65
+
66
+ await page.close()
67
+ return screenshot_data
68
+
69
+ async def get_content(
70
+ self,
71
+ get_content_model: GetContentModel,
72
+ browser_model: BrowserModel,
73
+ ) -> str:
74
+ browser_context = await self.new_browser_context(
75
+ browser=self.browser,
76
+ browser_model=browser_model,
77
+ )
78
+ page = browser_context.new_page()
79
+ await page.goto(str(get_content_model.url))
80
+ await page.wait_for_timeout(get_content_model.ms_delay)
81
+
82
+ wait_locator = (
83
+ await page.locator(get_content_model.query_selector)
84
+ if get_content_model.query_selector
85
+ else None
86
+ )
87
+
88
+ if wait_locator:
89
+ await wait_locator.wait_for()
90
+ html = page.content()
91
+ await page.close()
92
+ return html
93
+
94
+ async def __aexit__(
95
+ self,
96
+ typ: type[BaseException] | None,
97
+ exc: BaseException | None,
98
+ tb: TracebackType | None,
99
+ ) -> None:
100
+ if self.browser:
101
+ await self.browser.close()
102
+ if self.playwright:
103
+ await self.playwright.stop()
{screenshot β†’ app}/main.py RENAMED
@@ -1,10 +1,12 @@
 
 
1
  import uvicorn
2
  from fastapi import FastAPI
3
- from routers.screenshot import routers as screenshot_router
4
 
5
  app = FastAPI()
6
 
7
  app.include_router(screenshot_router)
8
 
9
  if __name__ == "__main__":
10
- uvicorn.run(app, host="0.0.0.0", port=8000) # noqa: S104
 
1
+ import os
2
+
3
  import uvicorn
4
  from fastapi import FastAPI
5
+ from routers.screenshot import router as screenshot_router
6
 
7
  app = FastAPI()
8
 
9
  app.include_router(screenshot_router)
10
 
11
  if __name__ == "__main__":
12
+ uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", default="7860"))) # noqa: S104
app/routers/__init__.py ADDED
File without changes
app/routers/screenshot/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ from .screenshot import router as screenshot_router
4
+
5
+ router = APIRouter()
6
+ router.include_router(screenshot_router)
app/routers/screenshot/screenshot.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from core.service.playwright import AsyncPlaywrightContext, TimeoutError
6
+ from core.service.playwright.models import PageModel, ScreenshotModel
7
+ from fastapi import APIRouter, HTTPException
8
+ from fastapi.responses import Response
9
+
10
+ router = APIRouter()
11
+ playwright_context = AsyncPlaywrightContext()
12
+
13
+
14
+ @router.post("/screenshot", tags=["V1"])
15
+ async def screenshot(
16
+ screenshot_model: ScreenshotModel,
17
+ page_model: PageModel | None = None,
18
+ ) -> Response:
19
+ page_model = page_model if page_model else PageModel()
20
+ async with playwright_context as sc:
21
+ try:
22
+ response = await sc.screenshot(screenshot_model=screenshot_model, page_model=page_model)
23
+ return Response(content=response, media_type="image/png")
24
+ except TimeoutError as e:
25
+ status_code = 504 if isinstance(e, TimeoutError) else 400
26
+ detail = f"An error occurred while generating the screenshot: {e}"
27
+ raise HTTPException(status_code=status_code, detail=detail) from e
28
+ except Exception as e:
29
+ logging.exception("Unhandled error in screenshot generation")
30
+ raise HTTPException(status_code=500, detail="An unexpected error occurred") from e
pyproject.toml CHANGED
@@ -5,7 +5,9 @@ line-length = 100
5
  select = ["ALL"]
6
  ignore = [
7
  "CPY001", # copyright above code
8
- "D", # sphinx not support
 
 
9
  ]
10
 
11
  [tool.mypy]
@@ -14,5 +16,5 @@ show_error_codes = true
14
  no_implicit_optional = true
15
  warn_return_any = true
16
  warn_unused_ignores = true
17
- exclude = ["tests"]
18
- python_version = "3.10"
 
5
  select = ["ALL"]
6
  ignore = [
7
  "CPY001", # copyright above code
8
+ "ANN101", # self annotations
9
+ "ANN102", # cls annotations
10
+ "D", # sphinx not support
11
  ]
12
 
13
  [tool.mypy]
 
16
  no_implicit_optional = true
17
  warn_return_any = true
18
  warn_unused_ignores = true
19
+ python_version = "3.10"
20
+ plugins = ["pydantic.mypy"]
requirements-dev.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ruff==0.3.3
requirements.txt CHANGED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pydantic==2.6.4
2
+ fastapi==0.110.0
3
+ playwright==1.42.0
4
+ uvicorn==0.28.0
screenshot/routers/screenshot.py DELETED
@@ -1,93 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- from typing import IO, TYPE_CHECKING, AsyncContextManager, Literal
5
-
6
- from fastapi import APIRouter, HTTPException
7
- from fastapi.responses import Response
8
- from playwright.async_api import BrowserContext, TimeoutError, async_playwright
9
- from pydantic import BaseModel, HttpUrl
10
-
11
- if TYPE_CHECKING:
12
- from types import TracebackType
13
-
14
-
15
- router = APIRouter()
16
-
17
-
18
- class ViewPort(BaseModel):
19
- width: int = 1280
20
- height: int = 720
21
-
22
-
23
- class ScreenshotItems(BaseModel):
24
- url: HttpUrl
25
- full_page: bool | None = False
26
- query_selector: str | None = None
27
-
28
- viewport: ViewPort | None = None
29
- color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
30
- bypass_csp: bool | None = False
31
- java_script_enabled: bool | None = True
32
- proxy: dict | None = None
33
- is_mobile: bool | None = False
34
- no_viewport: bool | None = False
35
-
36
-
37
- class ScreenShot:
38
- async def __aenter__(self) -> AsyncContextManager[ScreenShot]:
39
- self.playwright = await async_playwright().start()
40
- self.browser = await self.playwright.chromium.launch(
41
- args=["--disable-extensions"],
42
- chromium_sandbox=True,
43
- )
44
- return self
45
-
46
- async def browser_context(self, items: ScreenshotItems) -> BrowserContext:
47
- return await self.browser.new_context(
48
- viewport=items.viewport.model_dump() if items.viewport else None,
49
- color_scheme=items.color_scheme,
50
- bypass_csp=items.bypass_csp,
51
- java_script_enabled=items.java_script_enabled,
52
- proxy=items.proxy.model_dump() if items.proxy else None,
53
- is_mobile=items.is_mobile,
54
- no_viewport=items.no_viewport,
55
- )
56
-
57
- async def capture(self, items: ScreenshotItems) -> IO[bytes]:
58
- context: BrowserContext = await self.browser_context(items)
59
- page = await context.new_page()
60
- await page.goto(str(items.url))
61
-
62
- if items.query_selector:
63
- page = page.locator(items.query_selector)
64
-
65
- screenshot_data = await page.screenshot(full_page=items.full_page)
66
- await context.close()
67
- return screenshot_data
68
-
69
- async def __aexit__(
70
- self,
71
- typ: type[BaseException] | None,
72
- exc: BaseException | None,
73
- tb: TracebackType | None,
74
- ) -> None:
75
- if self.browser:
76
- await self.browser.close()
77
- if self.playwright:
78
- await self.playwright.stop()
79
-
80
-
81
- @router.post("/screenshot")
82
- async def screenshot(data: ScreenshotItems) -> Response:
83
- async with ScreenShot() as sc:
84
- try:
85
- response = await sc.capture(items=data)
86
- return Response(content=response, media_type="image/png")
87
- except TimeoutError as e:
88
- raise HTTPException(
89
- status_code=504,
90
- detail=f"An error occurred while generating the screenshot: {e}",
91
- ) from e
92
- except Exception:
93
- logging.exception("screenshot unhandled error")