52 lines
1.4 KiB
Python
52 lines
1.4 KiB
Python
"""
|
||
Qwen-VL 封装:给定图片 URL,返回简短描述。
|
||
"""
|
||
from typing import Optional
|
||
|
||
import dashscope
|
||
from dashscope import MultiModalConversation
|
||
from fastapi import HTTPException
|
||
|
||
from .config import settings
|
||
|
||
|
||
def describe_image(
|
||
image_url: str,
|
||
*,
|
||
model: Optional[str] = None,
|
||
max_tokens: int = 200,
|
||
prompt: str = "请用简洁中文描述这张图片的主要内容,限制在50字内。",
|
||
) -> str:
|
||
api_key = settings.DASHSCOPE_API_KEY
|
||
if not api_key:
|
||
raise HTTPException(status_code=500, detail="未配置 DASHSCOPE_API_KEY")
|
||
model_name = model or settings.VISION_MODEL or "qwen3-vl-flash"
|
||
|
||
messages = [
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"image": image_url},
|
||
{"text": prompt},
|
||
],
|
||
}
|
||
]
|
||
try:
|
||
resp = MultiModalConversation.call(
|
||
api_key=api_key,
|
||
model=model_name,
|
||
messages=messages,
|
||
result_format="message",
|
||
max_tokens=max_tokens,
|
||
)
|
||
except Exception as exc:
|
||
raise HTTPException(status_code=502, detail=f"图片理解失败: {exc}") from exc
|
||
|
||
try:
|
||
content = resp.output.choices[0].message.content
|
||
if isinstance(content, list) and content and "text" in content[0]:
|
||
return str(content[0]["text"]).strip()
|
||
except Exception:
|
||
pass
|
||
raise HTTPException(status_code=502, detail="图片理解未返回结果")
|