168 lines
4.6 KiB
Python
168 lines
4.6 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
PaddleSpeech 语音识别服务
|
||
Windows 版本
|
||
"""
|
||
|
||
from flask import Flask, request, jsonify
|
||
from flask_cors import CORS
|
||
import os
|
||
import time
|
||
from difflib import SequenceMatcher
|
||
|
||
app = Flask(__name__)
|
||
CORS(app) # 允许跨域
|
||
|
||
# 全局变量
|
||
asr_model = None
|
||
model_loaded = False
|
||
|
||
def init_model():
|
||
"""初始化语音识别模型"""
|
||
global asr_model, model_loaded
|
||
|
||
print("正在加载 PaddleSpeech 模型...")
|
||
try:
|
||
from paddlespeech.cli.asr.infer import ASRExecutor
|
||
|
||
asr_model = ASRExecutor()
|
||
model_loaded = True
|
||
print("✓ 模型加载成功!")
|
||
return True
|
||
except Exception as e:
|
||
print(f"✗ 模型加载失败: {str(e)}")
|
||
print("提示:首次运行会自动下载模型,需要等待...")
|
||
model_loaded = False
|
||
return False
|
||
|
||
def calculate_similarity(text1, text2):
|
||
"""计算文本相似度(0-100分)"""
|
||
if not text1 or not text2:
|
||
return 0
|
||
|
||
# 去除空格和标点
|
||
text1 = ''.join(filter(str.isalnum, text1))
|
||
text2 = ''.join(filter(str.isalnum, text2))
|
||
|
||
if not text1 or not text2:
|
||
return 0
|
||
|
||
# 计算相似度
|
||
similarity = SequenceMatcher(None, text1, text2).ratio()
|
||
return round(similarity * 100, 2)
|
||
|
||
@app.route('/api/speech/recognize', methods=['POST'])
|
||
def recognize():
|
||
"""语音识别接口"""
|
||
try:
|
||
# 检查模型是否加载
|
||
if not model_loaded:
|
||
return jsonify({
|
||
'code': 500,
|
||
'msg': '模型未加载,请稍后重试'
|
||
}), 500
|
||
|
||
# 检查文件
|
||
if 'audio' not in request.files:
|
||
return jsonify({
|
||
'code': 400,
|
||
'msg': '未上传音频文件'
|
||
}), 400
|
||
|
||
audio_file = request.files['audio']
|
||
reference_text = request.form.get('referenceText', '')
|
||
|
||
# 保存临时文件
|
||
temp_dir = './temp_audio'
|
||
if not os.path.exists(temp_dir):
|
||
os.makedirs(temp_dir)
|
||
|
||
timestamp = str(int(time.time() * 1000))
|
||
temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
|
||
audio_file.save(temp_path)
|
||
|
||
print(f"收到音频文件: {temp_path}")
|
||
print(f"参考文本: {reference_text}")
|
||
|
||
# 识别音频
|
||
try:
|
||
result = asr_model(audio_input=temp_path, force_yes=True)
|
||
recognized_text = result if isinstance(result, str) else str(result)
|
||
print(f"识别结果: {recognized_text}")
|
||
|
||
# 计算相似度
|
||
score = calculate_similarity(recognized_text, reference_text)
|
||
|
||
# 简单的发音和流利度评分(可以后续优化)
|
||
pronunciation_score = max(0, score - 5)
|
||
fluency_score = max(0, score - 3)
|
||
|
||
# 删除临时文件
|
||
try:
|
||
os.remove(temp_path)
|
||
except:
|
||
pass
|
||
|
||
return jsonify({
|
||
'code': 200,
|
||
'msg': '成功',
|
||
'data': {
|
||
'recognizedText': recognized_text,
|
||
'score': score,
|
||
'pronunciationScore': pronunciation_score,
|
||
'fluencyScore': fluency_score,
|
||
'status': 'completed'
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"识别失败: {str(e)}")
|
||
try:
|
||
os.remove(temp_path)
|
||
except:
|
||
pass
|
||
|
||
return jsonify({
|
||
'code': 500,
|
||
'msg': f'识别失败: {str(e)}'
|
||
}), 500
|
||
|
||
except Exception as e:
|
||
print(f"处理错误: {str(e)}")
|
||
return jsonify({
|
||
'code': 500,
|
||
'msg': f'处理失败: {str(e)}'
|
||
}), 500
|
||
|
||
@app.route('/api/speech/health', methods=['GET'])
|
||
def health():
|
||
"""健康检查"""
|
||
return jsonify({
|
||
'code': 200,
|
||
'msg': '服务正常',
|
||
'data': {
|
||
'model_loaded': model_loaded
|
||
}
|
||
})
|
||
|
||
if __name__ == '__main__':
|
||
print("=" * 50)
|
||
print("PaddleSpeech 语音识别服务")
|
||
print("=" * 50)
|
||
print("")
|
||
|
||
# 初始化模型(首次会下载,需要时间)
|
||
init_model()
|
||
|
||
print("")
|
||
print("=" * 50)
|
||
print("服务启动成功!")
|
||
print("访问地址: http://localhost:5000")
|
||
print("健康检查: http://localhost:5000/api/speech/health")
|
||
print("=" * 50)
|
||
print("")
|
||
|
||
# 启动服务
|
||
app.run(host='0.0.0.0', port=5000, debug=False)
|