SilentWraith commited on
Commit
f37cf04
1 Parent(s): 0080981

Update app/core/service/playwright/playwright_context.py

Browse files
app/core/service/playwright/playwright_context.py CHANGED
@@ -1,88 +1,139 @@
1
  from __future__ import annotations
2
 
3
- from typing import Literal
4
-
5
- from pydantic import BaseModel, Field, HttpUrl
6
-
7
-
8
- class ViewPortModel(BaseModel):
9
- """Page viewport
10
-
11
- Attributes:
12
- width (int):
13
- viewport width.
14
-
15
- height (int):
16
- viewport height.
17
- """
18
-
19
- width: int = 1280
20
- height: int = 720
21
-
22
-
23
- class PageModel(BaseModel):
24
- """Page attrs
25
-
26
- Attributes:
27
- color_scheme (Literal["light", "dark", "no-preference"] | None):
28
- Page color.
29
-
30
- java_script_enabled (bool | None):
31
- Whether or not to enable JavaScript in the context. Defaults to true.
32
-
33
- viewport (ViewPortModel | None):
34
- Sets a consistent viewport for each page. Defaults to an 1280x720 viewport.
35
-
36
- no_viewport (bool | None):
37
- Does not enforce fixed viewport, allows resizing window in the headed mode.
38
-
39
- proxy (dict | None):
40
- Proxy to be used for all requests. HTTP and SOCKS proxies are supported. Example: proxy={'server': 'http://proxy.example.com:3128'}
41
- """ # noqa: E501
42
-
43
- color_scheme: Literal["light", "dark", "no-preference"] | None = "no-preference"
44
- java_script_enabled: bool | None = True
45
- viewport: ViewPortModel | None = None
46
- no_viewport: bool | None = False
47
- proxy: dict | None = None
48
-
49
-
50
- class GetContentModel(BaseModel):
51
- """Webpage to request and parse.
52
-
53
- Attributes:
54
- url (HttpUrl):
55
- Url to request.
56
-
57
- new_browser (bool | None):
58
- Whether you want to make a new browser context or not.
59
-
60
- query_selector (str | None):
61
- Used to locate a selector.
62
-
63
- ms_delay (int):
64
- A delay before performing a task after requesting the url.
65
- """
66
-
67
- url: HttpUrl
68
- new_browser: bool | None = False
69
- query_selector: str | None = None
70
- ms_delay: int = Field(default=0.0, ge=0, le=15_000)
71
-
72
-
73
- class ScreenshotModel(GetContentModel):
74
- """Screenshot schemas
75
-
76
- Attributes:
77
-
78
- full_page (bool | None): Whether you want a full page screenshot or not.
79
-
80
- image_type (Literal["png", "jpeg"]):
81
- The image type of screenshot.
82
- """
83
-
84
- full_page: bool | None = Field(
85
- default = False,
86
- description = "Whether you want a full page screenshot or not.")
87
- image_type: Literal["png", "jpeg"] = "jpeg"
88
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
+ from typing import AsyncIterator, Awaitable, ClassVar
4
+
5
+ from playwright.async_api import ( # noqa: F401
6
+ Browser,
7
+ BrowserContext,
8
+ Page,
9
+ TimeoutError,
10
+ async_playwright,
11
+ )
12
+
13
+ from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001
14
+
15
+
16
+ class AsyncMixin:
17
+ """Experimental: making awaitable class."""
18
+
19
+ async def __ainit__(self) -> None:
20
+ """Initialize the class."""
21
+
22
+ def __await__(self) -> AsyncIterator[Awaitable]:
23
+ """Make the class awaitable."""
24
+ return self.__ainit__().__await__()
25
+
26
+
27
+ class PlaywrightInstance(AsyncMixin):
28
+ """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501
29
+
30
+ HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501
31
+
32
+ FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = {
33
+ "extensions.enabledScopes": 1,
34
+ "extensions.autoDisableScopes": 1,
35
+ "dom.webdriver.enabled": False,
36
+ "useAutomationExtension": False,
37
+ "general.useragent.override": HEADERS,
38
+ }
39
+
40
+ def __init__(self) -> None:
41
+ """Initialize the class."""
42
+ self.playwright: async_playwright | None = None
43
+ self.browser: Browser | None = None
44
+
45
+ async def __ainit__(self) -> PlaywrightInstance:
46
+ if not self.playwright:
47
+ self.playwright = await async_playwright().start()
48
+ self.browser = await self.playwright.firefox.launch(
49
+ firefox_user_prefs=self.FIREFOX_USER_PREFS,
50
+ )
51
+ return self
52
+
53
+ async def new_context_page(
54
+ self,
55
+ browser: Browser,
56
+ screenshot_model: GetContentModel,
57
+ page_model: PageModel,
58
+ ) -> tuple[BrowserContext | None, Page]:
59
+ """create a brwoser or new browser context page.
60
+
61
+ Parameters:
62
+ browser (Browser):
63
+ The Playwright Browser instance to create a new context in.
64
+ screenshot_model (GetContentModel):
65
+ A pydantic BaseModel instance containing the configuration for the screenshot.
66
+ page_model (PageModel):
67
+ A pydantic BaseModel instance containing the configuration for the page.
68
+
69
+ Returns:
70
+ tuple: BrowserContext and Page
71
+ """
72
+ params = {
73
+ "color_scheme": page_model.color_scheme,
74
+ "java_script_enabled": page_model.java_script_enabled,
75
+ "no_viewport": page_model.no_viewport,
76
+ "proxy": page_model.proxy.model_dump() if page_model.proxy else None,
77
+ "viewport": page_model.viewport.model_dump() if page_model.viewport else None,
78
+ }
79
+
80
+ if not screenshot_model.new_browser:
81
+ return None, await browser.new_page(**params)
82
+
83
+ new_context = await browser.new_context(**params)
84
+ return new_context, await new_context.new_page()
85
+
86
+ async def screenshot(
87
+ self,
88
+ screenshot_model: ScreenshotModel,
89
+ page_model: PageModel,
90
+ ) -> bytes:
91
+ """Take a screenshot of a webpage url.
92
+
93
+ Parameters:
94
+ screenshot_model (ScreenshotModel):
95
+ A pydantic BaseModel instance containing the configuration for the screenshot.
96
+ page_model (PageModel):
97
+ A pydantic BaseModel instance containing the configuration for the page.
98
+
99
+ Returns:
100
+ bytes: The screenshot data in bytes.
101
+ """
102
+ context, page = await self.new_context_page(
103
+ screenshot_model=screenshot_model,
104
+ browser=self.browser,
105
+ page_model=page_model,
106
+ )
107
+
108
+ await page.goto(str(screenshot_model.url))
109
+ await page.wait_for_timeout(screenshot_model.ms_delay)
110
+
111
+ screenshot_locator = (
112
+ page.locator(screenshot_model.query_selector)
113
+ if screenshot_model.query_selector
114
+ else None
115
+ )
116
+
117
+ if screenshot_locator:
118
+ screenshot_data: bytes = await screenshot_locator.screenshot(
119
+ type=screenshot_model.image_type,
120
+ )
121
+ else:
122
+ screenshot_data: bytes = await page.screenshot(
123
+ full_page=screenshot_model.full_page,
124
+ type=screenshot_model.image_type,
125
+ )
126
+
127
+ await page.close()
128
+
129
+ if context:
130
+ await context.close()
131
+
132
+ return screenshot_data
133
+
134
+ async def close_instance(self) -> None:
135
+ """For manual closing of playwright if needed."""
136
+ if self.playwright:
137
+ await self.browser.close()
138
+ await self.playwright.stop()
139
+