修改解题脚本以添补视觉部分

2026-04-30 17:28:11 +08:00
parent dd3f6b4573
commit ef9950a9d3
1 changed files with 36 additions and 1 deletions
--- a/scripts/solve.py
+++ b/scripts/solve.py
@ -82,6 +82,18 @@ def build_prompt(question_data: dict, typ_content: str | None) -> str:
    return "".join(parts)


+def get_image_attachments(question_data: dict) -> list[tuple[str, Path]]:
+    """Return list of (attachment_name, path) for image attachments."""
+    images = []
+    for att in question_data.get("attachments", []):
+        att_path = DATA_DIR / att
+        if att_path.exists() and att.lower().endswith(
+            (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp")
+        ):
+            images.append((att, att_path))
+    return images
+
+
 async def call_api_streaming(
    session: aiohttp.ClientSession,
    question_name: str,
@ -90,13 +102,34 @@ async def call_api_streaming(
    api_key: str,
    model: str,
    logger: logging.Logger,
+    image_contents: list[tuple[str, Path]] | None = None,
 ) -> str | None:
    """Call the AI API with streaming to solve the question."""
+    import base64
+
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

+    messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
+
+    if image_contents:
+        for att_name, att_path in image_contents:
+            with open(att_path, "rb") as f:
+                image_data = base64.b64encode(f.read()).decode("utf-8")
+            suffix = att_path.suffix[1:].lower()
+            if suffix in ("jpg", "jpeg"):
+                mime_type = "image/jpeg"
+            else:
+                mime_type = f"image/{suffix}"
+            messages[0]["content"].append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:{mime_type};base64,{image_data}"},
+                }
+            )
+
    payload = {
        "model": model,
-        "messages": [{"role": "user", "content": [{"type": "text", "text": prompt}]}],
+        "messages": messages,
        "max_tokens": 4096,
        "stream": True,
    }
@ -182,6 +215,7 @@ async def solve_question(
        )

    prompt = build_prompt(question_data, typ_content)
+    image_contents = get_image_attachments(question_data)

    content = await call_api_streaming(
        session,
@ -191,6 +225,7 @@ async def solve_question(
        api_config["key"],
        api_config["model"],
        logger,
+        image_contents,
    )

    if content is None: