66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
"""
|
||
CosyVoice TTS 封装,返回二进制音频数据。
|
||
"""
|
||
import base64
|
||
from typing import Optional, Tuple
|
||
|
||
import dashscope
|
||
from dashscope.audio.tts_v2 import AudioFormat, SpeechSynthesizer
|
||
from fastapi import HTTPException
|
||
|
||
from .config import settings
|
||
|
||
|
||
def synthesize(
|
||
text: str,
|
||
*,
|
||
model: str,
|
||
voice: str,
|
||
audio_format: AudioFormat = AudioFormat.MP3_22050HZ_MONO_256KBPS,
|
||
) -> Tuple[bytes, str]:
|
||
"""
|
||
同步调用 cosyvoice,返回 (音频二进制, 格式标识)。
|
||
"""
|
||
api_key = settings.DASHSCOPE_API_KEY
|
||
if not api_key:
|
||
raise HTTPException(status_code=500, detail="未配置 TTS API Key")
|
||
|
||
dashscope.api_key = api_key
|
||
resp_obj: Optional[object] = None
|
||
try:
|
||
synthesizer = SpeechSynthesizer(
|
||
model=model,
|
||
voice=voice,
|
||
format=audio_format,
|
||
)
|
||
resp_obj = synthesizer.call(text)
|
||
except HTTPException:
|
||
raise
|
||
except Exception as exc: # SDK/网络错误
|
||
raise HTTPException(status_code=502, detail=f"TTS 调用失败: {exc}") from exc
|
||
|
||
# 官方非流式调用直接返回音频二进制;兜底处理 base64/字典返回
|
||
audio_bytes: bytes = b""
|
||
if isinstance(resp_obj, (bytes, bytearray)):
|
||
audio_bytes = bytes(resp_obj)
|
||
elif isinstance(resp_obj, str):
|
||
try:
|
||
audio_bytes = base64.b64decode(resp_obj)
|
||
except Exception:
|
||
audio_bytes = b""
|
||
else:
|
||
output = getattr(resp_obj, "output", None)
|
||
if isinstance(output, dict):
|
||
audio_raw = output.get("audio") or output.get("audio_data") or output.get("audio_url")
|
||
if isinstance(audio_raw, (bytes, bytearray)):
|
||
audio_bytes = bytes(audio_raw)
|
||
elif isinstance(audio_raw, str):
|
||
try:
|
||
audio_bytes = base64.b64decode(audio_raw)
|
||
except Exception:
|
||
audio_bytes = b""
|
||
|
||
if not audio_bytes:
|
||
raise HTTPException(status_code=502, detail="TTS 未返回音频数据")
|
||
return audio_bytes, audio_format.name if hasattr(audio_format, "name") else str(audio_format)
|