Ai_GirlFriend/test_real_audio.py

142 lines
4.5 KiB
Python
Raw Normal View History

2026-03-04 12:04:21 +08:00
#!/usr/bin/env python3
"""
使用真实音频文件测试 DashScope ASR
"""
import sys
import os
sys.path.append('.')
import dashscope
from dashscope.audio.asr import Transcription
from lover.config import settings
from lover.oss_utils import upload_audio_file, delete_audio_file
import logging
import wave
import struct
# 设置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def create_test_wav():
"""创建一个简单的 WAV 测试文件"""
# 创建 1 秒的 16kHz 单声道 WAV 文件
sample_rate = 16000
duration = 1 # 1 秒
frequency = 440 # A4 音符
# 生成正弦波
samples = []
import math
for i in range(sample_rate * duration):
t = i / sample_rate
# 生成简单的正弦波,幅度控制在合理范围内
sample = int(16000 * math.sin(2 * math.pi * frequency * t))
# 确保在 16-bit 范围内
sample = max(-32767, min(32767, sample))
samples.append(sample)
# 写入 WAV 文件
wav_file = "test_audio.wav"
with wave.open(wav_file, 'wb') as wav:
wav.setnchannels(1) # 单声道
wav.setsampwidth(2) # 16-bit
wav.setframerate(sample_rate) # 16kHz
# 写入样本数据
for sample in samples:
wav.writeframes(struct.pack('<h', sample))
logger.info(f"创建测试 WAV 文件: {wav_file}")
return wav_file
def test_real_audio_asr():
"""使用真实音频文件测试 ASR"""
# 设置 API Key
dashscope.api_key = settings.DASHSCOPE_API_KEY
logger.info(f"DashScope API Key: {settings.DASHSCOPE_API_KEY[:10]}***")
# 创建测试 WAV 文件
wav_file = create_test_wav()
try:
# 读取 WAV 文件
with open(wav_file, 'rb') as f:
wav_data = f.read()
logger.info(f"WAV 文件大小: {len(wav_data)} 字节")
# 上传到 OSS
logger.info("上传 WAV 文件到 OSS...")
file_url = upload_audio_file(wav_data, "wav")
logger.info(f"上传成功: {file_url}")
# 调用 DashScope ASR
logger.info("调用 DashScope ASR...")
task_response = Transcription.async_call(
model='paraformer-v2',
file_urls=[file_url],
parameters={
'format': 'wav', # 使用 WAV 格式
'sample_rate': 16000,
'enable_words': False
}
)
logger.info(f"ASR 任务响应: status_code={task_response.status_code}")
if task_response.status_code == 200:
task_id = task_response.output.task_id
logger.info(f"任务创建成功: {task_id}")
# 等待结果
logger.info("等待 ASR 结果...")
try:
result = Transcription.wait(task=task_id)
logger.info(f"ASR 结果: status_code={result.status_code}")
if result.status_code == 200:
logger.info(f"任务状态: {result.output.task_status}")
if result.output.task_status == "FAILED":
logger.info(f"失败原因: {getattr(result.output, 'code', 'Unknown')}")
logger.info(f"失败消息: {getattr(result.output, 'message', 'Unknown')}")
elif result.output.task_status == "SUCCEEDED":
logger.info("✅ ASR 识别成功!")
logger.info(f"结果: {result.output}")
else:
logger.error(f"获取结果失败: {result.status_code}")
except Exception as wait_error:
logger.error(f"等待结果失败: {wait_error}")
else:
logger.error(f"任务创建失败: {task_response.status_code}")
# 清理文件
logger.info("清理文件...")
delete_audio_file(file_url)
os.remove(wav_file)
return True
except Exception as e:
logger.error(f"测试失败: {e}")
import traceback
logger.error(f"错误堆栈: {traceback.format_exc()}")
# 清理文件
try:
os.remove(wav_file)
except:
pass
return False
if __name__ == "__main__":
success = test_real_audio_asr()
if success:
logger.info("🎉 真实音频 ASR 测试完成")
else:
logger.error("💥 真实音频 ASR 测试失败")
sys.exit(1)