Ai_GirlFriend/test_voice_conversation.py

"""
测试完整的语音对话流程
ASR → LLM → TTS
"""
import os
import sys
import base64
import logging

# 添加 lover 目录到路径
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'lover'))

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def create_test_audio():
    """创建测试音频（2秒的正弦波）"""
    import struct
    import math
    import wave
    import tempfile

    sample_rate = 16000
    duration = 2
    frequency = 440

    samples = []
    for i in range(sample_rate * duration):
        value = int(32767 * 0.3 * math.sin(2 * math.pi * frequency * i / sample_rate))
        samples.append(struct.pack('<h', value))

    pcm_data = b''.join(samples)

    # 转换为 WAV
    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
        with wave.open(temp_file.name, 'wb') as wav_file:
            wav_file.setnchannels(1)
            wav_file.setsampwidth(2)
            wav_file.setframerate(16000)
            wav_file.writeframes(pcm_data)

        temp_file_path = temp_file.name

    with open(temp_file_path, 'rb') as f:
        wav_data = f.read()

    os.unlink(temp_file_path)
    return wav_data

def test_conversation():
    """测试语音对话端点"""
    print("=" * 60)
    print("测试完整语音对话流程")
    print("=" * 60)

    # 创建测试音频
    print("\n🎵 创建测试音频...")
    audio_data = create_test_audio()
    print(f"  音频大小: {len(audio_data)} 字节")

    # 转换为 base64
    audio_base64 = base64.b64encode(audio_data).decode('utf-8')
    print(f"  Base64 长度: {len(audio_base64)}")

    # 调用 API
    print("\n📤 调用语音对话 API...")
    import requests

    # 注意：需要有效的 token
    token = "test_token"  # 替换为实际的 token

    try:
        response = requests.post(
            'http://127.0.0.1:30101/voice/call/conversation',
            json={
                'audio_data': audio_base64,
                'format': 'wav'
            },
            headers={
                'Authorization': f'Bearer {token}',
                'Content-Type': 'application/json'
            },
            timeout=60
        )

        print(f"\n📋 响应状态: {response.status_code}")

        if response.status_code == 200:
            result = response.json()
            print(f"\n✅ 对话成功!")
            print(f"  用户说: {result.get('data', {}).get('user_text', 'N/A')}")
            print(f"  AI回复: {result.get('data', {}).get('ai_text', 'N/A')}")

            audio_data_result = result.get('data', {}).get('audio_data')
            if audio_data_result:
                audio_bytes = base64.b64decode(audio_data_result)
                print(f"  AI语音大小: {len(audio_bytes)} 字节")

                # 保存 AI 语音到文件
                output_file = 'test_ai_voice.mp3'
                with open(output_file, 'wb') as f:
                    f.write(audio_bytes)
                print(f"  AI语音已保存: {output_file}")

            print("\n" + "=" * 60)
            print("🎉 语音对话测试完成！")
            print("=" * 60)
            return True
        else:
            print(f"\n❌ 请求失败: {response.status_code}")
            print(f"  响应: {response.text}")
            return False

    except Exception as e:
        print(f"\n❌ 测试失败: {e}")
        import traceback
        traceback.print_exc()
        return False

if __name__ == "__main__":
    print("\n⚠️  注意：此测试需要：")
    print("  1. 后端服务运行在 http://127.0.0.1:30101")
    print("  2. 有效的用户 token")
    print("  3. 配置了 DASHSCOPE_API_KEY")
    print("  4. 配置了 OSS")
    print("\n由于测试音频是正弦波，ASR 可能无法识别")
    print("但可以测试 LLM 和 TTS 部分\n")

    success = test_conversation()
    sys.exit(0 if success else 1)