修改解题脚本以添补视觉部分

2026-04-30 17:28:11 +08:00
parent dd3f6b4573
commit ef9950a9d3
1 changed files with 36 additions and 1 deletions
--- a/scripts/solve.py
+++ b/scripts/solve.py
@ -82,6 +82,18 @@ def build_prompt(question_data: dict, typ_content: str | None) -> str:
    return "".join(parts)
 def get_image_attachments(question_data: dict) -> list[tuple[str, Path]]:
    """Return list of (attachment_name, path) for image attachments."""
    images = []
    for att in question_data.get("attachments", []):
        att_path = DATA_DIR / att
        if att_path.exists() and att.lower().endswith(
            (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp")
        ):
            images.append((att, att_path))
    return images
 async def call_api_streaming(
    session: aiohttp.ClientSession,
    question_name: str,
@ -90,13 +102,34 @@ async def call_api_streaming(
    api_key: str,
    model: str,
    logger: logging.Logger,
    image_contents: list[tuple[str, Path]] | None = None,
 ) -> str | None:
    """Call the AI API with streaming to solve the question."""
    import base64
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
    if image_contents:
        for att_name, att_path in image_contents:
            with open(att_path, "rb") as f:
                image_data = base64.b64encode(f.read()).decode("utf-8")
            suffix = att_path.suffix[1:].lower()
            if suffix in ("jpg", "jpeg"):
                mime_type = "image/jpeg"
            else:
                mime_type = f"image/{suffix}"
            messages[0]["content"].append(
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:{mime_type};base64,{image_data}"},
                }
            )
    payload = {
        "model": model,
-        "messages": [{"role": "user", "content": [{"type": "text", "text": prompt}]}],
+        "messages": messages,
        "max_tokens": 4096,
        "stream": True,
    }
@ -182,6 +215,7 @@ async def solve_question(
        )
    prompt = build_prompt(question_data, typ_content)
    image_contents = get_image_attachments(question_data)
    content = await call_api_streaming(
        session,
@ -191,6 +225,7 @@ async def solve_question(
        api_config["key"],
        api_config["model"],
        logger,
        image_contents,
    )
    if content is None: