66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
|
|
"""
|
|||
|
|
CosyVoice TTS 封装,返回二进制音频数据。
|
|||
|
|
"""
|
|||
|
|
import base64
|
|||
|
|
from typing import Optional, Tuple
|
|||
|
|
|
|||
|
|
import dashscope
|
|||
|
|
from dashscope.audio.tts_v2 import AudioFormat, SpeechSynthesizer
|
|||
|
|
from fastapi import HTTPException
|
|||
|
|
|
|||
|
|
from .config import settings
|
|||
|
|
|
|||
|
|
|
|||
|
|
def synthesize(
|
|||
|
|
text: str,
|
|||
|
|
*,
|
|||
|
|
model: str,
|
|||
|
|
voice: str,
|
|||
|
|
audio_format: AudioFormat = AudioFormat.MP3_22050HZ_MONO_256KBPS,
|
|||
|
|
) -> Tuple[bytes, str]:
|
|||
|
|
"""
|
|||
|
|
同步调用 cosyvoice,返回 (音频二进制, 格式标识)。
|
|||
|
|
"""
|
|||
|
|
api_key = settings.DASHSCOPE_API_KEY
|
|||
|
|
if not api_key:
|
|||
|
|
raise HTTPException(status_code=500, detail="未配置 TTS API Key")
|
|||
|
|
|
|||
|
|
dashscope.api_key = api_key
|
|||
|
|
resp_obj: Optional[object] = None
|
|||
|
|
try:
|
|||
|
|
synthesizer = SpeechSynthesizer(
|
|||
|
|
model=model,
|
|||
|
|
voice=voice,
|
|||
|
|
format=audio_format,
|
|||
|
|
)
|
|||
|
|
resp_obj = synthesizer.call(text)
|
|||
|
|
except HTTPException:
|
|||
|
|
raise
|
|||
|
|
except Exception as exc: # SDK/网络错误
|
|||
|
|
raise HTTPException(status_code=502, detail=f"TTS 调用失败: {exc}") from exc
|
|||
|
|
|
|||
|
|
# 官方非流式调用直接返回音频二进制;兜底处理 base64/字典返回
|
|||
|
|
audio_bytes: bytes = b""
|
|||
|
|
if isinstance(resp_obj, (bytes, bytearray)):
|
|||
|
|
audio_bytes = bytes(resp_obj)
|
|||
|
|
elif isinstance(resp_obj, str):
|
|||
|
|
try:
|
|||
|
|
audio_bytes = base64.b64decode(resp_obj)
|
|||
|
|
except Exception:
|
|||
|
|
audio_bytes = b""
|
|||
|
|
else:
|
|||
|
|
output = getattr(resp_obj, "output", None)
|
|||
|
|
if isinstance(output, dict):
|
|||
|
|
audio_raw = output.get("audio") or output.get("audio_data") or output.get("audio_url")
|
|||
|
|
if isinstance(audio_raw, (bytes, bytearray)):
|
|||
|
|
audio_bytes = bytes(audio_raw)
|
|||
|
|
elif isinstance(audio_raw, str):
|
|||
|
|
try:
|
|||
|
|
audio_bytes = base64.b64decode(audio_raw)
|
|||
|
|
except Exception:
|
|||
|
|
audio_bytes = b""
|
|||
|
|
|
|||
|
|
if not audio_bytes:
|
|||
|
|
raise HTTPException(status_code=502, detail="TTS 未返回音频数据")
|
|||
|
|
return audio_bytes, audio_format.name if hasattr(audio_format, "name") else str(audio_format)
|