homework-template/scripts/img2typ.py

#!/usr/bin/env python3
"""
img2typ.py - Convert image files to typst format using AI API.

This script scans the data directory for image files matching a pattern,
converts them to typst format using an OpenAI-compatible API, and generates
a questions.json manifest.
"""

import argparse
import asyncio
import json
import logging
import re
import sys
from dataclasses import dataclass
from pathlib import Path

import aiohttp
from common import DATA_DIR, console, load_env, load_prompt, setup_logging

IMAGE_PATTERN = re.compile(r"^(\S)\s?([\d.]+)$")
EXCLUDED_PREFIXES = {"答", "A", "a"}
IMAGE_EXTENSIONS = {
    ".png",
    ".jpg",
    ".jpeg",
    ".gif",
    ".bmp",
    ".webp",
    ".PNG",
    ".JPG",
    ".JPEG",
    ".GIF",
    ".BMP",
    ".WEBP",
}


@dataclass
class ConversionResult:
    """Result of an image to typst conversion."""

    question: str
    target: str
    skipped: bool
    success: bool
    error: str | None = None


def find_images() -> list[Path]:
    """Find all image files in data directory matching the pattern."""
    images = []
    for file_path in DATA_DIR.iterdir():
        if file_path.is_file() and file_path.suffix in IMAGE_EXTENSIONS:
            stem = file_path.stem
            match = IMAGE_PATTERN.match(stem)
            if match and match.group(1) not in EXCLUDED_PREFIXES:
                images.append(file_path)
    return images


def check_typ_exists(image_path: Path) -> bool:
    """Check if corresponding .typ file exists."""
    return image_path.with_suffix(".typ").exists()


def parse_markdown_blocks(text: str) -> str:
    """Remove markdown code blocks from text."""
    block_pattern = re.compile(r"```(?:typst)?\s*\n?(.*?)\n?```", re.DOTALL)
    matches = list(block_pattern.finditer(text))
    if matches:
        return matches[0].group(1).strip()
    return text.strip()


async def call_api(
    session: aiohttp.ClientSession,
    image_path: Path,
    prompt: str,
    endpoint: str,
    api_key: str,
    model: str,
    logger: logging.Logger,
) -> str | None:
    """Call the AI API to convert image to typst format."""
    import base64

    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}

    with open(image_path, "rb") as f:
        image_data = base64.b64encode(f.read()).decode("utf-8")

    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/{image_path.suffix[1:]};base64,{image_data}"
                        },
                    },
                ],
            }
        ],
        "max_tokens": 4096,
    }

    logger.info(f"[{image_path.stem}] Converting... (timeout 300s)")
    try:
        async with session.post(
            endpoint,
            headers=headers,
            json=payload,
            timeout=aiohttp.ClientTimeout(total=300),
        ) as response:
            response.raise_for_status()
            result = await response.json()

            if "choices" not in result or len(result["choices"]) == 0:
                logger.error(f"Invalid API response")
                return None

            content = result["choices"][0]["message"]["content"]
            usage = result.get("usage", {})
            input_tokens = usage.get("prompt_tokens", 0)
            output_tokens = usage.get("completion_tokens", 0)

            logger.info(
                f"[{image_path.stem}] Done: {input_tokens} in, {output_tokens} out"
            )
            return content

    except asyncio.TimeoutError:
        logger.error(f"[{image_path.stem}] Timeout")
        return None
    except asyncio.CancelledError:
        logger.warning(f"[{image_path.stem}] Cancelled")
        raise
    except Exception as e:
        logger.error(f"[{image_path.stem}] Error: {e}")
        return None


async def convert_image(
    session: aiohttp.ClientSession,
    image_path: Path,
    prompt: str,
    api_config: dict,
    logger: logging.Logger,
    dry_run: bool,
) -> ConversionResult:
    """Convert a single image to typst format."""
    stem = image_path.stem
    match = IMAGE_PATTERN.match(stem)
    question_name = match.group(1) + match.group(2) if match else stem
    typ_path = image_path.with_suffix(".typ")

    if typ_path.exists():
        logger.info(f"[{question_name}] Skipping: .typ already exists")
        return ConversionResult(
            question=question_name, target=typ_path.name, skipped=True, success=True
        )

    if dry_run:
        logger.info(f"[{question_name}] Would convert -> {typ_path.name}")
        return ConversionResult(
            question=question_name, target=typ_path.name, skipped=False, success=True
        )

    content = await call_api(
        session,
        image_path,
        prompt,
        str(api_config["endpoint"]),
        api_config["key"],
        api_config["model"],
        logger,
    )

    if content is None:
        return ConversionResult(
            question=question_name,
            target=typ_path.name,
            skipped=False,
            success=False,
            error="API call failed",
        )

    typst_code = parse_markdown_blocks(content)

    try:
        typ_path.write_text(typst_code, encoding="utf-8")
        logger.info(
            f"[{question_name}] Wrote {typ_path.name} ({len(typst_code)} bytes)"
        )
        return ConversionResult(
            question=question_name, target=typ_path.name, skipped=False, success=True
        )
    except IOError as e:
        logger.error(f"[{question_name}] Write failed: {e}")
        return ConversionResult(
            question=question_name,
            target=typ_path.name,
            skipped=False,
            success=False,
            error=str(e),
        )


def generate_questions_json(
    results: list[ConversionResult],
    logger: logging.Logger,
    dry_run: bool,
) -> None:
    """Generate questions.json from conversion results."""
    from common import find_attachments

    questions = []
    for r in results:
        attachments = find_attachments(r.question)
        questions.append(
            {
                "question": r.question,
                "format": "typst",
                "target": r.target,
                "attachments": attachments,
            }
        )

    output_path = DATA_DIR / "questions.json"
    if dry_run:
        logger.info(
            f"[DRY-RUN] Would write questions.json with {len(questions)} entries"
        )
        logger.debug(f"Content: {json.dumps(questions, indent=4, ensure_ascii=False)}")
    else:
        output_path.write_text(
            json.dumps(questions, indent=4, ensure_ascii=False), encoding="utf-8"
        )
        logger.info(f"Wrote {output_path.name} ({len(questions)} entries)")


def parse_args() -> argparse.Namespace:
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="Convert image files to typst format using AI API"
    )
    parser.add_argument(
        "-f",
        "--file",
        action="append",
        dest="files",
        help="Specific image files to process",
    )
    parser.add_argument(
        "--dry-run", action="store_true", help="Do not call AI or write files"
    )
    parser.add_argument("--verbose", action="store_true", help="Enable debug logging")
    parser.add_argument(
        "--retry", type=int, default=3, help="Retry attempts (default: 3)"
    )
    parser.add_argument("-n", type=int, default=3, help="Concurrent limit (default: 3)")
    return parser.parse_args()


async def async_main(args: argparse.Namespace, logger: logging.Logger) -> None:
    """Async main entry point."""
    if args.files:
        image_paths = []
        for f in args.files:
            p = Path(f)
            if not p.is_absolute():
                p = DATA_DIR / f
            image_paths.append(p)
    else:
        image_paths = find_images()

    logger.info(f"Found {len(image_paths)} images to process")

    if not image_paths:
        logger.warning("No images found to process")
        return

    api_config = load_env()
    prompt = load_prompt("img2typ.prompt.txt")

    semaphore = asyncio.Semaphore(args.n)

    async def limited_convert(
        session: aiohttp.ClientSession, img_path: Path
    ) -> ConversionResult:
        async with semaphore:
            return await convert_image(
                session, img_path, prompt, api_config, logger, args.dry_run
            )

    async with aiohttp.ClientSession() as session:
        tasks = [
            asyncio.create_task(limited_convert(session, img)) for img in image_paths
        ]

        results = []
        try:
            for coro in asyncio.as_completed(tasks):
                result = await coro
                results.append(result)
        except asyncio.CancelledError:
            logger.warning("Cancelled! Shutting down...")
            for task in tasks:
                task.cancel()
            await asyncio.gather(*tasks, return_exceptions=True)
            sys.exit(1)

    results.sort(key=lambda r: r.question)
    generate_questions_json(results, logger, args.dry_run)

    skipped = sum(1 for r in results if r.skipped)
    solved = sum(1 for r in results if r.success and not r.skipped)
    logger.info(f"Complete: {solved}/{len(results)} converted, {skipped} skipped")


def main() -> None:
    """Main entry point."""
    args = parse_args()
    logger = setup_logging("img2typ", args.verbose)

    logger.info(f"img2typ starting (Dry-run: {args.dry_run}, Workers: {args.n})")
    logger.info(f"Data directory: {DATA_DIR}")

    try:
        asyncio.run(async_main(args, logger))
    except KeyboardInterrupt:
        logger.warning("Interrupted by user")
        sys.exit(1)


if __name__ == "__main__":
    main()