guoyu/Test/python/speech_server.py

168 lines
4.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
PaddleSpeech 语音识别服务
Windows 版本
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import time
from difflib import SequenceMatcher
app = Flask(__name__)
CORS(app) # 允许跨域
# 全局变量
asr_model = None
model_loaded = False
def init_model():
"""初始化语音识别模型"""
global asr_model, model_loaded
print("正在加载 PaddleSpeech 模型...")
try:
from paddlespeech.cli.asr.infer import ASRExecutor
asr_model = ASRExecutor()
model_loaded = True
print("✓ 模型加载成功!")
return True
except Exception as e:
print(f"✗ 模型加载失败: {str(e)}")
print("提示:首次运行会自动下载模型,需要等待...")
model_loaded = False
return False
def calculate_similarity(text1, text2):
"""计算文本相似度0-100分"""
if not text1 or not text2:
return 0
# 去除空格和标点
text1 = ''.join(filter(str.isalnum, text1))
text2 = ''.join(filter(str.isalnum, text2))
if not text1 or not text2:
return 0
# 计算相似度
similarity = SequenceMatcher(None, text1, text2).ratio()
return round(similarity * 100, 2)
@app.route('/api/speech/recognize', methods=['POST'])
def recognize():
"""语音识别接口"""
try:
# 检查模型是否加载
if not model_loaded:
return jsonify({
'code': 500,
'msg': '模型未加载,请稍后重试'
}), 500
# 检查文件
if 'audio' not in request.files:
return jsonify({
'code': 400,
'msg': '未上传音频文件'
}), 400
audio_file = request.files['audio']
reference_text = request.form.get('referenceText', '')
# 保存临时文件
temp_dir = './temp_audio'
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
timestamp = str(int(time.time() * 1000))
temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
audio_file.save(temp_path)
print(f"收到音频文件: {temp_path}")
print(f"参考文本: {reference_text}")
# 识别音频
try:
result = asr_model(audio_input=temp_path, force_yes=True)
recognized_text = result if isinstance(result, str) else str(result)
print(f"识别结果: {recognized_text}")
# 计算相似度
score = calculate_similarity(recognized_text, reference_text)
# 简单的发音和流利度评分(可以后续优化)
pronunciation_score = max(0, score - 5)
fluency_score = max(0, score - 3)
# 删除临时文件
try:
os.remove(temp_path)
except:
pass
return jsonify({
'code': 200,
'msg': '成功',
'data': {
'recognizedText': recognized_text,
'score': score,
'pronunciationScore': pronunciation_score,
'fluencyScore': fluency_score,
'status': 'completed'
}
})
except Exception as e:
print(f"识别失败: {str(e)}")
try:
os.remove(temp_path)
except:
pass
return jsonify({
'code': 500,
'msg': f'识别失败: {str(e)}'
}), 500
except Exception as e:
print(f"处理错误: {str(e)}")
return jsonify({
'code': 500,
'msg': f'处理失败: {str(e)}'
}), 500
@app.route('/api/speech/health', methods=['GET'])
def health():
"""健康检查"""
return jsonify({
'code': 200,
'msg': '服务正常',
'data': {
'model_loaded': model_loaded
}
})
if __name__ == '__main__':
print("=" * 50)
print("PaddleSpeech 语音识别服务")
print("=" * 50)
print("")
# 初始化模型(首次会下载,需要时间)
init_model()
print("")
print("=" * 50)
print("服务启动成功!")
print("访问地址: http://localhost:5000")
print("健康检查: http://localhost:5000/api/speech/health")
print("=" * 50)
print("")
# 启动服务
app.run(host='0.0.0.0', port=5000, debug=False)