This commit is contained in:
2025-10-25 22:00:57 +08:00
2 changed files with 79 additions and 99 deletions

View File

@ -1,7 +1,12 @@
import asyncio
import queue
from typing import Any, Callable, Coroutine
from loguru import logger
from playwright.async_api import async_playwright, Browser, Page, BrowserContext
from playwright.async_api import Page, Playwright, async_playwright, Browser, Page, BrowserContext
PageFunction = Callable[[Page], Coroutine[Any, Any, Any]]
class WebRenderer:
browser_pool: queue.Queue["WebRendererInstance"] = queue.Queue()
@ -27,7 +32,14 @@ class WebRenderer:
return cls.context_pool[id(instance)]
@classmethod
async def render(cls, url: str, target: str, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes:
async def render(
cls,
url: str,
target: str,
params: dict = {},
other_function: PageFunction | None = None,
timeout: int = 30,
) -> bytes:
'''
访问指定URL并返回截图
@ -80,21 +92,43 @@ class WebRenderer:
del cls.page_pool[page_id]
logger.debug(f"Closed and removed persistent page for page_id {page_id}")
class WebRendererInstance:
def __init__(self):
self.playwright = None
self.browser: Browser = None
self.lock: asyncio.Lock = None
self._playwright: Playwright | None = None
self._browser: Browser | None = None
self.lock = asyncio.Lock()
@property
def playwright(self) -> Playwright:
assert self._playwright is not None
return self._playwright
@property
def browser(self) -> Browser:
assert self._browser is not None
return self._browser
async def init(self):
self._playwright = await async_playwright().start()
self._browser = await self.playwright.chromium.launch(headless=True)
@classmethod
async def create(cls) -> "WebRendererInstance":
instance = cls()
instance.playwright = await async_playwright().start()
instance.browser = await instance.playwright.chromium.launch(headless=True)
instance.lock = asyncio.Lock()
await instance.init()
return instance
async def render(self, url: str, target: str, index: int = 0, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes:
async def render(
self,
url: str,
target: str,
index: int = 0,
params: dict = {},
other_function: PageFunction | None = None,
timeout: int = 30
) -> bytes:
'''
访问指定URL并返回截图
@ -142,7 +176,30 @@ class WebRendererInstance:
logger.debug(f"Screenshot taken successfully")
return screenshot
logger.debug(f"Navigating to {url} with timeout {timeout}")
try:
url_with_params = url + ("?" + "&".join(f"{k}={v}" for k, v in params.items()) if params else "")
await page.goto(url_with_params, timeout=timeout * 1000, wait_until="load")
logger.debug("Page loaded successfully")
# 等待目标元素出现
await page.wait_for_selector(target, timeout=timeout * 1000)
logger.debug(f"Target element '{target}' found, taking screenshot")
if other_function:
await other_function(page)
elements = await page.query_selector_all(target)
if not elements:
raise Exception(f"Target element '{target}' not found on the page.")
if index >= len(elements):
raise Exception(f"Index {index} out of range for elements matching '{target}'.")
element = elements[index]
screenshot = await element.screenshot()
logger.debug("Screenshot taken successfully")
return screenshot
finally:
await page.close()
await context.close()
async def close(self):
await self.browser.close()
await self.playwright.stop()