142 lines
4.5 KiB
Python
142 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
使用真实音频文件测试 DashScope ASR
|
|
"""
|
|
import sys
|
|
import os
|
|
sys.path.append('.')
|
|
|
|
import dashscope
|
|
from dashscope.audio.asr import Transcription
|
|
from lover.config import settings
|
|
from lover.oss_utils import upload_audio_file, delete_audio_file
|
|
import logging
|
|
import wave
|
|
import struct
|
|
|
|
# 设置日志
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def create_test_wav():
|
|
"""创建一个简单的 WAV 测试文件"""
|
|
# 创建 1 秒的 16kHz 单声道 WAV 文件
|
|
sample_rate = 16000
|
|
duration = 1 # 1 秒
|
|
frequency = 440 # A4 音符
|
|
|
|
# 生成正弦波
|
|
samples = []
|
|
import math
|
|
for i in range(sample_rate * duration):
|
|
t = i / sample_rate
|
|
# 生成简单的正弦波,幅度控制在合理范围内
|
|
sample = int(16000 * math.sin(2 * math.pi * frequency * t))
|
|
# 确保在 16-bit 范围内
|
|
sample = max(-32767, min(32767, sample))
|
|
samples.append(sample)
|
|
|
|
# 写入 WAV 文件
|
|
wav_file = "test_audio.wav"
|
|
with wave.open(wav_file, 'wb') as wav:
|
|
wav.setnchannels(1) # 单声道
|
|
wav.setsampwidth(2) # 16-bit
|
|
wav.setframerate(sample_rate) # 16kHz
|
|
|
|
# 写入样本数据
|
|
for sample in samples:
|
|
wav.writeframes(struct.pack('<h', sample))
|
|
|
|
logger.info(f"创建测试 WAV 文件: {wav_file}")
|
|
return wav_file
|
|
|
|
def test_real_audio_asr():
|
|
"""使用真实音频文件测试 ASR"""
|
|
|
|
# 设置 API Key
|
|
dashscope.api_key = settings.DASHSCOPE_API_KEY
|
|
logger.info(f"DashScope API Key: {settings.DASHSCOPE_API_KEY[:10]}***")
|
|
|
|
# 创建测试 WAV 文件
|
|
wav_file = create_test_wav()
|
|
|
|
try:
|
|
# 读取 WAV 文件
|
|
with open(wav_file, 'rb') as f:
|
|
wav_data = f.read()
|
|
|
|
logger.info(f"WAV 文件大小: {len(wav_data)} 字节")
|
|
|
|
# 上传到 OSS
|
|
logger.info("上传 WAV 文件到 OSS...")
|
|
file_url = upload_audio_file(wav_data, "wav")
|
|
logger.info(f"上传成功: {file_url}")
|
|
|
|
# 调用 DashScope ASR
|
|
logger.info("调用 DashScope ASR...")
|
|
|
|
task_response = Transcription.async_call(
|
|
model='paraformer-v2',
|
|
file_urls=[file_url],
|
|
parameters={
|
|
'format': 'wav', # 使用 WAV 格式
|
|
'sample_rate': 16000,
|
|
'enable_words': False
|
|
}
|
|
)
|
|
|
|
logger.info(f"ASR 任务响应: status_code={task_response.status_code}")
|
|
|
|
if task_response.status_code == 200:
|
|
task_id = task_response.output.task_id
|
|
logger.info(f"任务创建成功: {task_id}")
|
|
|
|
# 等待结果
|
|
logger.info("等待 ASR 结果...")
|
|
try:
|
|
result = Transcription.wait(task=task_id)
|
|
logger.info(f"ASR 结果: status_code={result.status_code}")
|
|
|
|
if result.status_code == 200:
|
|
logger.info(f"任务状态: {result.output.task_status}")
|
|
if result.output.task_status == "FAILED":
|
|
logger.info(f"失败原因: {getattr(result.output, 'code', 'Unknown')}")
|
|
logger.info(f"失败消息: {getattr(result.output, 'message', 'Unknown')}")
|
|
elif result.output.task_status == "SUCCEEDED":
|
|
logger.info("✅ ASR 识别成功!")
|
|
logger.info(f"结果: {result.output}")
|
|
else:
|
|
logger.error(f"获取结果失败: {result.status_code}")
|
|
|
|
except Exception as wait_error:
|
|
logger.error(f"等待结果失败: {wait_error}")
|
|
else:
|
|
logger.error(f"任务创建失败: {task_response.status_code}")
|
|
|
|
# 清理文件
|
|
logger.info("清理文件...")
|
|
delete_audio_file(file_url)
|
|
os.remove(wav_file)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"测试失败: {e}")
|
|
import traceback
|
|
logger.error(f"错误堆栈: {traceback.format_exc()}")
|
|
|
|
# 清理文件
|
|
try:
|
|
os.remove(wav_file)
|
|
except:
|
|
pass
|
|
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
success = test_real_audio_asr()
|
|
if success:
|
|
logger.info("🎉 真实音频 ASR 测试完成")
|
|
else:
|
|
logger.error("💥 真实音频 ASR 测试失败")
|
|
sys.exit(1) |