52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
|
|
"""
|
|||
|
|
Qwen-VL 封装:给定图片 URL,返回简短描述。
|
|||
|
|
"""
|
|||
|
|
from typing import Optional
|
|||
|
|
|
|||
|
|
import dashscope
|
|||
|
|
from dashscope import MultiModalConversation
|
|||
|
|
from fastapi import HTTPException
|
|||
|
|
|
|||
|
|
from .config import settings
|
|||
|
|
|
|||
|
|
|
|||
|
|
def describe_image(
|
|||
|
|
image_url: str,
|
|||
|
|
*,
|
|||
|
|
model: Optional[str] = None,
|
|||
|
|
max_tokens: int = 200,
|
|||
|
|
prompt: str = "请用简洁中文描述这张图片的主要内容,限制在50字内。",
|
|||
|
|
) -> str:
|
|||
|
|
api_key = settings.DASHSCOPE_API_KEY
|
|||
|
|
if not api_key:
|
|||
|
|
raise HTTPException(status_code=500, detail="未配置 DASHSCOPE_API_KEY")
|
|||
|
|
model_name = model or settings.VISION_MODEL or "qwen3-vl-flash"
|
|||
|
|
|
|||
|
|
messages = [
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": [
|
|||
|
|
{"image": image_url},
|
|||
|
|
{"text": prompt},
|
|||
|
|
],
|
|||
|
|
}
|
|||
|
|
]
|
|||
|
|
try:
|
|||
|
|
resp = MultiModalConversation.call(
|
|||
|
|
api_key=api_key,
|
|||
|
|
model=model_name,
|
|||
|
|
messages=messages,
|
|||
|
|
result_format="message",
|
|||
|
|
max_tokens=max_tokens,
|
|||
|
|
)
|
|||
|
|
except Exception as exc:
|
|||
|
|
raise HTTPException(status_code=502, detail=f"图片理解失败: {exc}") from exc
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
content = resp.output.choices[0].message.content
|
|||
|
|
if isinstance(content, list) and content and "text" in content[0]:
|
|||
|
|
return str(content[0]["text"]).strip()
|
|||
|
|
except Exception:
|
|||
|
|
pass
|
|||
|
|
raise HTTPException(status_code=502, detail="图片理解未返回结果")
|