Ai_GirlFriend/test_voice_conversation.py
2026-03-05 17:18:04 +08:00

131 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
测试完整的语音对话流程
ASR → LLM → TTS
"""
import os
import sys
import base64
import logging
# 添加 lover 目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lover'))
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_test_audio():
"""创建测试音频2秒的正弦波"""
import struct
import math
import wave
import tempfile
sample_rate = 16000
duration = 2
frequency = 440
samples = []
for i in range(sample_rate * duration):
value = int(32767 * 0.3 * math.sin(2 * math.pi * frequency * i / sample_rate))
samples.append(struct.pack('<h', value))
pcm_data = b''.join(samples)
# 转换为 WAV
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
with wave.open(temp_file.name, 'wb') as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
wav_file.writeframes(pcm_data)
temp_file_path = temp_file.name
with open(temp_file_path, 'rb') as f:
wav_data = f.read()
os.unlink(temp_file_path)
return wav_data
def test_conversation():
"""测试语音对话端点"""
print("=" * 60)
print("测试完整语音对话流程")
print("=" * 60)
# 创建测试音频
print("\n🎵 创建测试音频...")
audio_data = create_test_audio()
print(f" 音频大小: {len(audio_data)} 字节")
# 转换为 base64
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
print(f" Base64 长度: {len(audio_base64)}")
# 调用 API
print("\n📤 调用语音对话 API...")
import requests
# 注意:需要有效的 token
token = "test_token" # 替换为实际的 token
try:
response = requests.post(
'http://127.0.0.1:30101/voice/call/conversation',
json={
'audio_data': audio_base64,
'format': 'wav'
},
headers={
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
},
timeout=60
)
print(f"\n📋 响应状态: {response.status_code}")
if response.status_code == 200:
result = response.json()
print(f"\n✅ 对话成功!")
print(f" 用户说: {result.get('data', {}).get('user_text', 'N/A')}")
print(f" AI回复: {result.get('data', {}).get('ai_text', 'N/A')}")
audio_data_result = result.get('data', {}).get('audio_data')
if audio_data_result:
audio_bytes = base64.b64decode(audio_data_result)
print(f" AI语音大小: {len(audio_bytes)} 字节")
# 保存 AI 语音到文件
output_file = 'test_ai_voice.mp3'
with open(output_file, 'wb') as f:
f.write(audio_bytes)
print(f" AI语音已保存: {output_file}")
print("\n" + "=" * 60)
print("🎉 语音对话测试完成!")
print("=" * 60)
return True
else:
print(f"\n❌ 请求失败: {response.status_code}")
print(f" 响应: {response.text}")
return False
except Exception as e:
print(f"\n❌ 测试失败: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
print("\n⚠️ 注意:此测试需要:")
print(" 1. 后端服务运行在 http://127.0.0.1:30101")
print(" 2. 有效的用户 token")
print(" 3. 配置了 DASHSCOPE_API_KEY")
print(" 4. 配置了 OSS")
print("\n由于测试音频是正弦波ASR 可能无法识别")
print("但可以测试 LLM 和 TTS 部分\n")
success = test_conversation()
sys.exit(0 if success else 1)