diff --git a/scripts/solve.py b/scripts/solve.py index d546b25..3840bba 100644 --- a/scripts/solve.py +++ b/scripts/solve.py @@ -82,6 +82,18 @@ def build_prompt(question_data: dict, typ_content: str | None) -> str: return "".join(parts) +def get_image_attachments(question_data: dict) -> list[tuple[str, Path]]: + """Return list of (attachment_name, path) for image attachments.""" + images = [] + for att in question_data.get("attachments", []): + att_path = DATA_DIR / att + if att_path.exists() and att.lower().endswith( + (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp") + ): + images.append((att, att_path)) + return images + + async def call_api_streaming( session: aiohttp.ClientSession, question_name: str, @@ -90,13 +102,34 @@ async def call_api_streaming( api_key: str, model: str, logger: logging.Logger, + image_contents: list[tuple[str, Path]] | None = None, ) -> str | None: """Call the AI API with streaming to solve the question.""" + import base64 + headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} + messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}] + + if image_contents: + for att_name, att_path in image_contents: + with open(att_path, "rb") as f: + image_data = base64.b64encode(f.read()).decode("utf-8") + suffix = att_path.suffix[1:].lower() + if suffix in ("jpg", "jpeg"): + mime_type = "image/jpeg" + else: + mime_type = f"image/{suffix}" + messages[0]["content"].append( + { + "type": "image_url", + "image_url": {"url": f"data:{mime_type};base64,{image_data}"}, + } + ) + payload = { "model": model, - "messages": [{"role": "user", "content": [{"type": "text", "text": prompt}]}], + "messages": messages, "max_tokens": 4096, "stream": True, } @@ -182,6 +215,7 @@ async def solve_question( ) prompt = build_prompt(question_data, typ_content) + image_contents = get_image_attachments(question_data) content = await call_api_streaming( session, @@ -191,6 +225,7 @@ async def solve_question( api_config["key"], api_config["model"], logger, + image_contents, ) if content is None: