Ai_GirlFriend/lover/tts.py
2026-01-31 19:15:41 +08:00

66 lines
2.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
CosyVoice TTS 封装,返回二进制音频数据。
"""
import base64
from typing import Optional, Tuple
import dashscope
from dashscope.audio.tts_v2 import AudioFormat, SpeechSynthesizer
from fastapi import HTTPException
from .config import settings
def synthesize(
text: str,
*,
model: str,
voice: str,
audio_format: AudioFormat = AudioFormat.MP3_22050HZ_MONO_256KBPS,
) -> Tuple[bytes, str]:
"""
同步调用 cosyvoice返回 (音频二进制, 格式标识)。
"""
api_key = settings.DASHSCOPE_API_KEY
if not api_key:
raise HTTPException(status_code=500, detail="未配置 TTS API Key")
dashscope.api_key = api_key
resp_obj: Optional[object] = None
try:
synthesizer = SpeechSynthesizer(
model=model,
voice=voice,
format=audio_format,
)
resp_obj = synthesizer.call(text)
except HTTPException:
raise
except Exception as exc: # SDK/网络错误
raise HTTPException(status_code=502, detail=f"TTS 调用失败: {exc}") from exc
# 官方非流式调用直接返回音频二进制;兜底处理 base64/字典返回
audio_bytes: bytes = b""
if isinstance(resp_obj, (bytes, bytearray)):
audio_bytes = bytes(resp_obj)
elif isinstance(resp_obj, str):
try:
audio_bytes = base64.b64decode(resp_obj)
except Exception:
audio_bytes = b""
else:
output = getattr(resp_obj, "output", None)
if isinstance(output, dict):
audio_raw = output.get("audio") or output.get("audio_data") or output.get("audio_url")
if isinstance(audio_raw, (bytes, bytearray)):
audio_bytes = bytes(audio_raw)
elif isinstance(audio_raw, str):
try:
audio_bytes = base64.b64decode(audio_raw)
except Exception:
audio_bytes = b""
if not audio_bytes:
raise HTTPException(status_code=502, detail="TTS 未返回音频数据")
return audio_bytes, audio_format.name if hasattr(audio_format, "name") else str(audio_format)