Ai_GirlFriend/lover/vision.py

"""
Qwen-VL 封装：给定图片 URL，返回简短描述。
"""
from typing import Optional

import dashscope
from dashscope import MultiModalConversation
from fastapi import HTTPException

from .config import settings


def describe_image(
    image_url: str,
    *,
    model: Optional[str] = None,
    max_tokens: int = 200,
    prompt: str = "请用简洁中文描述这张图片的主要内容，限制在50字内。",
) -> str:
    api_key = settings.DASHSCOPE_API_KEY
    if not api_key:
        raise HTTPException(status_code=500, detail="未配置 DASHSCOPE_API_KEY")
    model_name = model or settings.VISION_MODEL or "qwen3-vl-flash"

    messages = [
        {
            "role": "user",
            "content": [
                {"image": image_url},
                {"text": prompt},
            ],
        }
    ]
    try:
        resp = MultiModalConversation.call(
            api_key=api_key,
            model=model_name,
            messages=messages,
            result_format="message",
            max_tokens=max_tokens,
        )
    except Exception as exc:
        raise HTTPException(status_code=502, detail=f"图片理解失败: {exc}") from exc

    try:
        content = resp.output.choices[0].message.content
        if isinstance(content, list) and content and "text" in content[0]:
            return str(content[0]["text"]).strip()
    except Exception:
        pass
    raise HTTPException(status_code=502, detail="图片理解未返回结果")