diff --git a/README.md b/README.md index 5038a5e..a208c6f 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,10 @@ code . 详见[LLM 配置文档](/docs/LLM.md)。 +#### 配置 konabot-web 以支持更高级的图片渲染 + +详见[konabot-web 配置文档](/docs/konabot-web.md) + ### 运行 使用命令行手动启动 Bot: diff --git a/docs/konabot-web.md b/docs/konabot-web.md new file mode 100644 index 0000000..6f13903 --- /dev/null +++ b/docs/konabot-web.md @@ -0,0 +1,18 @@ +# konabot-web 配置文档 + +本文档教你配置一个此方 Bot 的 Web 服务器。 + +## 安装并运行 konabot-web + +按照 [konabot-web README](https://gitea.service.jazzwhom.top/mttu-developers/konabot-web) 安装并运行 konabot-web 实例。 + +## 指定 konabot-web 实例地址 + +如果你的 Web 服务器的端口不是 5173,或者你有特殊的网络结构,你需要手动设置 konabot-web。编辑 `.env` 文件: + +``` +MODULE_WEB_RENDER_WEBURL=http://web-server:port +MODULE_WEB_RENDER_INSTANCE=http://konabot-server:port +``` + +替换 web-server 为你的前端服务器地址,konabot-server 为后端服务器地址,port 为端口号。 diff --git a/konabot/common/nb/extract_image.py b/konabot/common/nb/extract_image.py index 6dfa856..4b4eb50 100644 --- a/konabot/common/nb/extract_image.py +++ b/konabot/common/nb/extract_image.py @@ -16,15 +16,28 @@ from nonebot.adapters.onebot.v11 import MessageEvent as OnebotV11MessageEvent import nonebot.params from nonebot_plugin_alconna import Image, RefNode, Reply, UniMessage from PIL import UnidentifiedImageError +from pydantic import BaseModel from returns.result import Failure, Result, Success +from konabot.common.path import ASSETS_PATH + discordConfig = nonebot.get_plugin_config(DiscordConfig) +class ExtractImageConfig(BaseModel): + module_extract_image_no_download: bool = False + "要不要算了,不下载了,直接爆炸算了,适用于一些比较奇怪的网络环境,无法从协议端下载文件" + + +module_config = nonebot.get_plugin_config(ExtractImageConfig) + + async def download_image_bytes(url: str, proxy: str | None = None) -> Result[bytes, str]: # if "/matcha/cache/" in url: # url = url.replace('127.0.0.1', '10.126.126.101') + if module_config.module_extract_image_no_download: + return Success((ASSETS_PATH / "img" / "other" / "boom.jpg").read_bytes()) logger.debug(f"开始从 {url} 下载图片") async with httpx.AsyncClient(proxy=proxy) as c: try: diff --git a/konabot/common/web_render/__init__.py b/konabot/common/web_render/__init__.py index 3478392..53198bf 100644 --- a/konabot/common/web_render/__init__.py +++ b/konabot/common/web_render/__init__.py @@ -1,211 +1,9 @@ -import asyncio -import queue -from typing import Any, Callable, Coroutine -from loguru import logger -from playwright.async_api import Page, Playwright, async_playwright, Browser, Page, BrowserContext +from .config import web_render_config +from .core import WebRenderer as WebRenderer +from .core import WebRendererInstance as WebRendererInstance -PageFunction = Callable[[Page], Coroutine[Any, Any, Any]] - - -class WebRenderer: - browser_pool: queue.Queue["WebRendererInstance"] = queue.Queue() - context_pool: dict[int, BrowserContext] = {} # 长期挂载的浏览器上下文池 - page_pool: dict[str, Page] = {} # 长期挂载的页面池 - - @classmethod - async def get_browser_instance(cls) -> "WebRendererInstance": - if cls.browser_pool.empty(): - instance = await WebRendererInstance.create() - cls.browser_pool.put(instance) - instance = cls.browser_pool.get() - cls.browser_pool.put(instance) - return instance - - @classmethod - async def get_browser_context(cls) -> BrowserContext: - instance = await cls.get_browser_instance() - if id(instance) not in cls.context_pool: - context = await instance.browser.new_context() - cls.context_pool[id(instance)] = context - logger.debug(f"Created new persistent browser context for WebRendererInstance {id(instance)}") - return cls.context_pool[id(instance)] - - @classmethod - async def render( - cls, - url: str, - target: str, - params: dict = {}, - other_function: PageFunction | None = None, - timeout: int = 30, - ) -> bytes: - ''' - 访问指定URL并返回截图 - - :param url: 目标URL - :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 - :param timeout: 页面加载超时时间,单位秒 - :param params: URL键值对参数 - :param other_function: 其他自定义操作函数,接受page参数 - :return: 截图的字节数据 - - ''' - instance = await cls.get_browser_instance() - logger.debug(f"Using WebRendererInstance {id(instance)} to render {url} targeting {target}") - return await instance.render(url, target, params=params, other_function=other_function, timeout=timeout) - - - @classmethod - async def render_persistent_page(cls, page_id: str, url: str, target: str, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes: - ''' - 使用长期挂载的页面访问指定URL并返回截图 - - :param page_id: 页面唯一标识符 - :param url: 目标URL - :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 - :param timeout: 页面加载超时时间,单位秒 - :param params: URL键值对参数 - :param other_function: 其他自定义操作函数,接受page参数 - :return: 截图的字节数据 - - ''' - logger.debug(f"Requesting persistent render for page_id {page_id} at {url} targeting {target} with timeout {timeout}") - instance = await cls.get_browser_instance() - if page_id not in cls.page_pool: - context = await cls.get_browser_context() - page = await context.new_page() - cls.page_pool[page_id] = page - logger.debug(f"Created new persistent page for page_id {page_id} using WebRendererInstance {id(instance)}") - page = cls.page_pool[page_id] - return await instance.render_with_page(page, url, target, params=params, other_function=other_function, timeout=timeout) - - @classmethod - async def render_file( - cls, - file_path: str, - target: str, - params: dict = {}, - other_function: PageFunction | None = None, - timeout: int = 30, - ) -> bytes: - ''' - 访问指定本地文件URL并返回截图 - - :param file_path: 目标文件路径 - :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 - :param timeout: 页面加载超时时间,单位秒 - :param params: URL键值对参数 - :param other_function: 其他自定义操作函数,接受page参数 - :return: 截图的字节数据 - - ''' - instance = await cls.get_browser_instance() - logger.debug(f"Using WebRendererInstance {id(instance)} to render file {file_path} targeting {target}") - return await instance.render_file(file_path, target, params=params, other_function=other_function, timeout=timeout) - - @classmethod - async def close_persistent_page(cls, page_id: str) -> None: - ''' - 关闭并移除长期挂载的页面 - - :param page_id: 页面唯一标识符 - ''' - if page_id in cls.page_pool: - page = cls.page_pool[page_id] - await page.close() - del cls.page_pool[page_id] - logger.debug(f"Closed and removed persistent page for page_id {page_id}") - - - -class WebRendererInstance: - def __init__(self): - self._playwright: Playwright | None = None - self._browser: Browser | None = None - self.lock = asyncio.Lock() - - @property - def playwright(self) -> Playwright: - assert self._playwright is not None - return self._playwright - - @property - def browser(self) -> Browser: - assert self._browser is not None - return self._browser - - async def init(self): - self._playwright = await async_playwright().start() - self._browser = await self.playwright.chromium.launch(headless=True) - - @classmethod - async def create(cls) -> "WebRendererInstance": - instance = cls() - await instance.init() - return instance - - async def render( - self, - url: str, - target: str, - index: int = 0, - params: dict = {}, - other_function: PageFunction | None = None, - timeout: int = 30 - ) -> bytes: - ''' - 访问指定URL并返回截图 - - :param url: 目标URL - :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 - :param timeout: 页面加载超时时间,单位秒 - :param index: 如果目标是一个列表,指定要截图的元素索引 - :param params: URL键值对参数 - :param other_function: 其他自定义操作函数,接受page参数 - :return: 截图的字节数据 - - ''' - async with self.lock: - context = await self.browser.new_context() - page = await context.new_page() - screenshot = await self.inner_render(page, url, target, index, params, other_function, timeout) - await page.close() - await context.close() - return screenshot - - async def render_with_page(self, page: Page, url: str, target: str, index: int = 0, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes: - async with self.lock: - screenshot = await self.inner_render(page, url, target, index, params, other_function, timeout) - return screenshot - - async def render_file(self, file_path: str, target: str, index: int = 0, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes: - file_path = "file:///" + str(file_path).replace("\\", "/") - return await self.render(file_path, target, index, params, other_function, timeout) - - async def inner_render(self, page: Page, url: str, target: str, index: int = 0, params: dict = {}, other_function: callable = None, timeout: int = 30) -> bytes: - logger.debug(f"Navigating to {url} with timeout {timeout}") - url_with_params = url + ("?" + "&".join(f"{k}={v}" for k, v in params.items()) if params else "") - await page.goto(url_with_params, timeout=timeout * 1000, wait_until="load") - logger.debug("Page loaded successfully") - # 等待目标元素出现 - await page.wait_for_selector(target, timeout=timeout * 1000) - logger.debug(f"Target element '{target}' found, taking screenshot") - if other_function: - await other_function(page) - elements = await page.query_selector_all(target) - if not elements: - logger.error(f"Target element '{target}' not found on the page.") - return None - if index >= len(elements): - logger.error(f"Index {index} out of range for elements matching '{target}'") - return None - element = elements[index] - screenshot = await element.screenshot() - logger.debug(f"Screenshot taken successfully") - return screenshot - - async def close(self): - await self.browser.close() - await self.playwright.stop() +def konaweb(sub_url: str): + sub_url = '/' + sub_url.removeprefix('/') + return web_render_config.module_web_render_weburl.removesuffix('/') + sub_url diff --git a/konabot/common/web_render/config.py b/konabot/common/web_render/config.py new file mode 100644 index 0000000..beed6a4 --- /dev/null +++ b/konabot/common/web_render/config.py @@ -0,0 +1,19 @@ +import nonebot + +from pydantic import BaseModel + +class Config(BaseModel): + module_web_render_weburl: str = "localhost:5173" + module_web_render_instance: str = "" + + def get_instance_baseurl(self): + if self.module_web_render_instance: + return self.module_web_render_instance.removesuffix('/') + config = nonebot.get_driver().config + ip = str(config.host) + if ip == "0.0.0.0": + ip = "127.0.0.1" + port = config.port + return f'http://{ip}:{port}' + +web_render_config = nonebot.get_plugin_config(Config) diff --git a/konabot/common/web_render/core.py b/konabot/common/web_render/core.py new file mode 100644 index 0000000..2d069e2 --- /dev/null +++ b/konabot/common/web_render/core.py @@ -0,0 +1,281 @@ +import asyncio +import queue +from typing import Any, Callable, Coroutine +from loguru import logger +from playwright.async_api import ( + Page, + Playwright, + async_playwright, + Browser, + BrowserContext, +) + + +PageFunction = Callable[[Page], Coroutine[Any, Any, Any]] + + +class WebRenderer: + browser_pool: queue.Queue["WebRendererInstance"] = queue.Queue() + context_pool: dict[int, BrowserContext] = {} # 长期挂载的浏览器上下文池 + page_pool: dict[str, Page] = {} # 长期挂载的页面池 + + @classmethod + async def get_browser_instance(cls) -> "WebRendererInstance": + if cls.browser_pool.empty(): + instance = await WebRendererInstance.create() + cls.browser_pool.put(instance) + instance = cls.browser_pool.get() + cls.browser_pool.put(instance) + return instance + + @classmethod + async def get_browser_context(cls) -> BrowserContext: + instance = await cls.get_browser_instance() + if id(instance) not in cls.context_pool: + context = await instance.browser.new_context() + cls.context_pool[id(instance)] = context + logger.debug( + f"Created new persistent browser context for WebRendererInstance {id(instance)}" + ) + return cls.context_pool[id(instance)] + + @classmethod + async def render( + cls, + url: str, + target: str, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + """ + 访问指定URL并返回截图 + + :param url: 目标URL + :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 + :param timeout: 页面加载超时时间,单位秒 + :param params: URL键值对参数 + :param other_function: 其他自定义操作函数,接受page参数 + :return: 截图的字节数据 + + """ + instance = await cls.get_browser_instance() + logger.debug( + f"Using WebRendererInstance {id(instance)} to render {url} targeting {target}" + ) + return await instance.render( + url, target, params=params, other_function=other_function, timeout=timeout + ) + + @classmethod + async def render_persistent_page( + cls, + page_id: str, + url: str, + target: str, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + """ + 使用长期挂载的页面访问指定URL并返回截图 + + :param page_id: 页面唯一标识符 + :param url: 目标URL + :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 + :param timeout: 页面加载超时时间,单位秒 + :param params: URL键值对参数 + :param other_function: 其他自定义操作函数,接受page参数 + :return: 截图的字节数据 + + """ + logger.debug( + f"Requesting persistent render for page_id {page_id} at {url} targeting {target} with timeout {timeout}" + ) + instance = await cls.get_browser_instance() + if page_id not in cls.page_pool: + context = await cls.get_browser_context() + page = await context.new_page() + cls.page_pool[page_id] = page + logger.debug( + f"Created new persistent page for page_id {page_id} using WebRendererInstance {id(instance)}" + ) + page = cls.page_pool[page_id] + return await instance.render_with_page( + page, + url, + target, + params=params, + other_function=other_function, + timeout=timeout, + ) + + @classmethod + async def render_file( + cls, + file_path: str, + target: str, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + """ + 访问指定本地文件URL并返回截图 + + :param file_path: 目标文件路径 + :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 + :param timeout: 页面加载超时时间,单位秒 + :param params: URL键值对参数 + :param other_function: 其他自定义操作函数,接受page参数 + :return: 截图的字节数据 + + """ + instance = await cls.get_browser_instance() + logger.debug( + f"Using WebRendererInstance {id(instance)} to render file {file_path} targeting {target}" + ) + return await instance.render_file( + file_path, + target, + params=params, + other_function=other_function, + timeout=timeout, + ) + + @classmethod + async def close_persistent_page(cls, page_id: str) -> None: + """ + 关闭并移除长期挂载的页面 + + :param page_id: 页面唯一标识符 + """ + if page_id in cls.page_pool: + page = cls.page_pool[page_id] + await page.close() + del cls.page_pool[page_id] + logger.debug(f"Closed and removed persistent page for page_id {page_id}") + + +class WebRendererInstance: + def __init__(self): + self._playwright: Playwright | None = None + self._browser: Browser | None = None + self.lock = asyncio.Lock() + + @property + def playwright(self) -> Playwright: + assert self._playwright is not None + return self._playwright + + @property + def browser(self) -> Browser: + assert self._browser is not None + return self._browser + + async def init(self): + self._playwright = await async_playwright().start() + self._browser = await self.playwright.chromium.launch(headless=True) + + @classmethod + async def create(cls) -> "WebRendererInstance": + instance = cls() + await instance.init() + return instance + + async def render( + self, + url: str, + target: str, + index: int = 0, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + """ + 访问指定URL并返回截图 + + :param url: 目标URL + :param target: 渲染目标,如 ".box"、"#main" 等CSS选择器 + :param timeout: 页面加载超时时间,单位秒 + :param index: 如果目标是一个列表,指定要截图的元素索引 + :param params: URL键值对参数 + :param other_function: 其他自定义操作函数,接受page参数 + :return: 截图的字节数据 + + """ + async with self.lock: + context = await self.browser.new_context() + page = await context.new_page() + screenshot = await self.inner_render( + page, url, target, index, params, other_function, timeout + ) + await page.close() + await context.close() + return screenshot + + async def render_with_page( + self, + page: Page, + url: str, + target: str, + index: int = 0, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + async with self.lock: + screenshot = await self.inner_render( + page, url, target, index, params, other_function, timeout + ) + return screenshot + + async def render_file( + self, + file_path: str, + target: str, + index: int = 0, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + file_path = "file:///" + str(file_path).replace("\\", "/") + return await self.render( + file_path, target, index, params, other_function, timeout + ) + + async def inner_render( + self, + page: Page, + url: str, + target: str, + index: int = 0, + params: dict = {}, + other_function: PageFunction | None = None, + timeout: int = 30, + ) -> bytes: + logger.debug(f"Navigating to {url} with timeout {timeout}") + url_with_params = url + ( + "?" + "&".join(f"{k}={v}" for k, v in params.items()) if params else "" + ) + await page.goto(url_with_params, timeout=timeout * 1000, wait_until="load") + logger.debug("Page loaded successfully") + # 等待目标元素出现 + await page.wait_for_selector(target, timeout=timeout * 1000) + logger.debug(f"Target element '{target}' found, taking screenshot") + if other_function: + await other_function(page) + elements = await page.query_selector_all(target) + if not elements: + logger.warning(f"Target element '{target}' not found on the page.") + elements = await page.query_selector_all('body') + if index >= len(elements): + logger.warning(f"Index {index} out of range for elements matching '{target}'") + index = 0 + element = elements[index] + screenshot = await element.screenshot() + logger.debug("Screenshot taken successfully") + return screenshot + + async def close(self): + await self.browser.close() + await self.playwright.stop() diff --git a/konabot/common/web_render/host_images.py b/konabot/common/web_render/host_images.py new file mode 100644 index 0000000..4f46bce --- /dev/null +++ b/konabot/common/web_render/host_images.py @@ -0,0 +1,66 @@ +import asyncio +import tempfile +from contextlib import asynccontextmanager +from dataclasses import dataclass +from pathlib import Path +from typing import cast + +from fastapi import HTTPException +from fastapi.responses import FileResponse +import nanoid +import nonebot + +from nonebot.drivers.fastapi import Driver as FastAPIDriver + +from .config import web_render_config + +app = cast(FastAPIDriver, nonebot.get_driver()).asgi + +hosted_tempdirs: dict[str, Path] = {} +hosted_tempdirs_lock = asyncio.Lock() + + +@dataclass +class TempDir: + path: Path + url_base: str + + def url_of(self, file: Path): + assert file.is_relative_to(self.path) + relative_path = file.relative_to(self.path) + url_path_segment = str(relative_path).replace("\\", "/") + return f"{self.url_base}/{url_path_segment}" + + +@asynccontextmanager +async def host_tempdir(): + with tempfile.TemporaryDirectory() as tempdir: + fp = Path(tempdir) + nid = nanoid.generate(size=10) + async with hosted_tempdirs_lock: + hosted_tempdirs[nid] = fp + yield TempDir( + path=fp, + url_base=f"{web_render_config.get_instance_baseurl()}/tempdir/{nid}", + ) + async with hosted_tempdirs_lock: + del hosted_tempdirs[nid] + + +@app.get("/tempdir/{nid}/{file_path:path}") +async def _(nid: str, file_path: str): + async with hosted_tempdirs_lock: + base_path = hosted_tempdirs.get(nid) + if base_path is None: + raise HTTPException(404) + full_path = base_path / file_path + try: + if not full_path.resolve().is_relative_to(base_path.resolve()): + raise HTTPException(status_code=403, detail="Access denied.") + except Exception: + raise HTTPException(status_code=403, detail="Access denied.") + if not full_path.is_file(): + raise HTTPException(status_code=404, detail="File not found.") + + return FileResponse(full_path.resolve()) + diff --git a/konabot/plugins/memepack/__init__.py b/konabot/plugins/memepack/__init__.py index fd6ab40..788360d 100644 --- a/konabot/plugins/memepack/__init__.py +++ b/konabot/plugins/memepack/__init__.py @@ -1,6 +1,7 @@ from io import BytesIO from typing import Iterable, cast +from loguru import logger from nonebot import on_message from nonebot_plugin_alconna import ( Alconna, @@ -14,8 +15,12 @@ from nonebot_plugin_alconna import ( UniMsg, on_alconna, ) +from playwright.async_api import ConsoleMessage, Page from konabot.common.nb.extract_image import PIL_Image, extract_image_from_message +from konabot.common.web_render import konaweb +from konabot.common.web_render.core import WebRenderer +from konabot.common.web_render.host_images import host_tempdir from konabot.plugins.memepack.drawing.display import ( draw_cao_display, draw_snaur_display, @@ -302,3 +307,36 @@ async def _(saying: list[str]): await kiosay.send(await UniMessage().image(raw=img_bytes).export()) +quote_cmd = on_alconna(Alconna( + "名人名言", + Args["quote", str], + Args["author", str], + Args["image?", Image | None], +), aliases={"quote"}) + +@quote_cmd.handle() +async def _(quote: str, author: str, img: PIL_Image): + async with host_tempdir() as tempdir: + img_path = tempdir.path / "image.png" + img_url = tempdir.url_of(img_path) + img.save(img_path) + + async def page_function(page: Page): + async def on_console(msg: ConsoleMessage): + logger.debug(f"WEB CONSOLE {msg.text}") + + page.on('console', on_console) + + await page.locator('input[name=image]').fill(img_url) + await page.locator('input[name=quote]').fill(quote) + await page.locator('input[name=author]').fill(author) + + await page.wait_for_load_state('networkidle') + + out = await WebRenderer.render( + konaweb('makequote'), + target='#main', + other_function=page_function, + ) + await quote_cmd.send(await UniMessage().image(raw=out).export()) +