557 lines
18 KiB
Python
557 lines
18 KiB
Python
# 配置whisper语音测评功能的脚本,必须要在jar包启动前启动,要不然不会被识别到
|
||
|
||
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Whisper语音识别服务
|
||
支持语音识别和评测功能
|
||
多线程并发处理
|
||
作者: AI Assistant
|
||
版本: 2.0
|
||
"""
|
||
|
||
from flask import Flask, request, jsonify
|
||
from flask_cors import CORS
|
||
import whisper
|
||
import os
|
||
import tempfile
|
||
import logging
|
||
from logging.handlers import RotatingFileHandler
|
||
import traceback
|
||
from datetime import datetime
|
||
import sys
|
||
|
||
# ============================================
|
||
# 日志配置
|
||
# ============================================
|
||
def setup_logging():
|
||
"""配置日志系统"""
|
||
# 创建logger
|
||
logger = logging.getLogger('whisper_server')
|
||
logger.setLevel(logging.INFO)
|
||
|
||
# 控制台输出
|
||
console_handler = logging.StreamHandler(sys.stdout)
|
||
console_handler.setLevel(logging.INFO)
|
||
console_formatter = logging.Formatter(
|
||
'%(asctime)s [%(levelname)s] %(message)s',
|
||
datefmt='%Y-%m-%d %H:%M:%S'
|
||
)
|
||
console_handler.setFormatter(console_formatter)
|
||
logger.addHandler(console_handler)
|
||
|
||
# 文件输出(可选)
|
||
try:
|
||
log_dir = 'logs'
|
||
if not os.path.exists(log_dir):
|
||
os.makedirs(log_dir)
|
||
|
||
file_handler = RotatingFileHandler(
|
||
os.path.join(log_dir, 'whisper_server.log'),
|
||
maxBytes=10*1024*1024, # 10MB
|
||
backupCount=5,
|
||
encoding='utf-8'
|
||
)
|
||
file_handler.setLevel(logging.INFO)
|
||
file_handler.setFormatter(console_formatter)
|
||
logger.addHandler(file_handler)
|
||
except Exception as e:
|
||
logger.warning(f"无法创建日志文件: {e}")
|
||
|
||
return logger
|
||
|
||
# 初始化日志
|
||
logger = setup_logging()
|
||
|
||
# ============================================
|
||
# Flask应用配置
|
||
# ============================================
|
||
app = Flask(__name__)
|
||
CORS(app) # 允许跨域请求
|
||
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 最大16MB
|
||
|
||
# ============================================
|
||
# 全局变量
|
||
# ============================================
|
||
whisper_model = None
|
||
MODEL_NAME = "tiny" # 可选: tiny, base, small, medium, large
|
||
|
||
# ============================================
|
||
# 模型加载
|
||
# ============================================
|
||
def load_whisper_model():
|
||
"""加载Whisper模型(全局单例)"""
|
||
global whisper_model
|
||
if whisper_model is None:
|
||
logger.info(f"正在加载Whisper模型 ({MODEL_NAME})...")
|
||
try:
|
||
whisper_model = whisper.load_model(MODEL_NAME)
|
||
logger.info(f"✅ Whisper模型加载成功 ({MODEL_NAME})")
|
||
except Exception as e:
|
||
logger.error(f"❌ 模型加载失败: {e}")
|
||
raise
|
||
return whisper_model
|
||
|
||
# ============================================
|
||
# 工具函数
|
||
# ============================================
|
||
def convert_to_simplified(text):
|
||
"""将繁体中文转换为简体中文"""
|
||
if not text:
|
||
return text
|
||
|
||
# 方案1:使用zhconv(推荐,纯Python)
|
||
try:
|
||
import zhconv
|
||
result = zhconv.convert(text, 'zh-cn')
|
||
if result != text:
|
||
logger.info(f"繁简转换: {text} -> {result}")
|
||
return result
|
||
except ImportError:
|
||
pass
|
||
|
||
# 方案2:使用opencc
|
||
try:
|
||
from opencc import OpenCC
|
||
cc = OpenCC('t2s')
|
||
result = cc.convert(text)
|
||
if result != text:
|
||
logger.info(f"繁简转换(OpenCC): {text} -> {result}")
|
||
return result
|
||
except ImportError:
|
||
pass
|
||
|
||
# 都没安装,返回原文
|
||
logger.warning("繁简转换库未安装,请运行: pip install zhconv")
|
||
return text
|
||
|
||
def clean_text_strict(text):
|
||
"""
|
||
严格清理文本,只保留汉字、字母、数字
|
||
用于准确度计算
|
||
"""
|
||
import re
|
||
import unicodedata
|
||
|
||
if not text:
|
||
return ""
|
||
|
||
# Unicode标准化
|
||
text = unicodedata.normalize('NFKC', text)
|
||
|
||
# 只保留汉字、字母、数字
|
||
text = re.sub(r'[^\u4e00-\u9fffa-zA-Z0-9]', '', text)
|
||
|
||
return text.lower()
|
||
|
||
# ============================================
|
||
# API路由
|
||
# ============================================
|
||
|
||
@app.route('/health', methods=['GET'])
|
||
def health_check():
|
||
"""健康检查接口"""
|
||
return jsonify({
|
||
"status": "ok",
|
||
"service": "Whisper Speech Recognition",
|
||
"model": MODEL_NAME,
|
||
"version": "2.0",
|
||
"timestamp": datetime.now().isoformat()
|
||
})
|
||
|
||
@app.route('/recognize', methods=['POST'])
|
||
def recognize():
|
||
"""
|
||
语音识别接口
|
||
只识别,不评测
|
||
|
||
参数:
|
||
file: 音频文件 (multipart/form-data)
|
||
|
||
返回:
|
||
{
|
||
"code": 200,
|
||
"msg": "识别成功",
|
||
"data": {
|
||
"text": "识别的文本"
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
# 检查文件
|
||
if 'file' not in request.files:
|
||
logger.warning("请求缺少音频文件")
|
||
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
|
||
|
||
audio_file = request.files['file']
|
||
if audio_file.filename == '':
|
||
logger.warning("文件名为空")
|
||
return jsonify({"code": 400, "msg": "文件名为空", "data": None}), 400
|
||
|
||
logger.info(f"收到识别请求: {audio_file.filename} ({len(audio_file.read())} bytes)")
|
||
audio_file.seek(0) # 重置文件指针
|
||
|
||
# 保存临时文件
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
||
temp_path = temp_file.name
|
||
audio_file.save(temp_path)
|
||
|
||
try:
|
||
# 加载模型
|
||
model = load_whisper_model()
|
||
|
||
# 识别音频
|
||
logger.info(f"开始识别: {audio_file.filename}")
|
||
result = model.transcribe(temp_path, language='zh', fp16=False)
|
||
recognized_text = result['text'].strip()
|
||
|
||
# 繁体转简体
|
||
recognized_text = convert_to_simplified(recognized_text)
|
||
|
||
logger.info(f"✅ 识别成功: {recognized_text}")
|
||
|
||
return jsonify({
|
||
"code": 200,
|
||
"msg": "识别成功",
|
||
"data": {
|
||
"text": recognized_text
|
||
}
|
||
})
|
||
|
||
finally:
|
||
# 删除临时文件
|
||
try:
|
||
if os.path.exists(temp_path):
|
||
os.remove(temp_path)
|
||
except Exception as e:
|
||
logger.warning(f"删除临时文件失败: {e}")
|
||
|
||
except Exception as e:
|
||
logger.error(f"识别失败: {str(e)}")
|
||
logger.error(traceback.format_exc())
|
||
return jsonify({
|
||
"code": 500,
|
||
"msg": f"识别失败: {str(e)}",
|
||
"data": None
|
||
}), 500
|
||
|
||
@app.route('/evaluate', methods=['POST'])
|
||
def evaluate():
|
||
"""
|
||
语音评测接口
|
||
识别 + 评分
|
||
|
||
参数:
|
||
file: 音频文件 (multipart/form-data)
|
||
text: 标准文本 (form-data)
|
||
|
||
返回:
|
||
{
|
||
"code": 200,
|
||
"msg": "评测成功",
|
||
"data": {
|
||
"text": "识别的文本",
|
||
"score": 95,
|
||
"accuracy": 98,
|
||
"fluency": 95,
|
||
"completeness": 100,
|
||
"pronunciation": 96,
|
||
"similarity": 98.5
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
# 检查文件
|
||
if 'file' not in request.files:
|
||
logger.warning("请求缺少音频文件")
|
||
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
|
||
|
||
audio_file = request.files['file']
|
||
standard_text = request.form.get('text', '')
|
||
|
||
if not standard_text:
|
||
logger.warning("请求缺少标准文本")
|
||
return jsonify({"code": 400, "msg": "缺少标准文本", "data": None}), 400
|
||
|
||
logger.info(f"收到评测请求: {audio_file.filename}, 标准文本: {standard_text}")
|
||
|
||
# 保存临时文件
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
||
temp_path = temp_file.name
|
||
audio_file.save(temp_path)
|
||
|
||
try:
|
||
# 1. 识别音频
|
||
model = load_whisper_model()
|
||
logger.info("开始识别音频...")
|
||
result = model.transcribe(temp_path, language='zh', fp16=False)
|
||
recognized_text = result['text'].strip()
|
||
recognized_text = convert_to_simplified(recognized_text)
|
||
|
||
# 2. 计算评分
|
||
from difflib import SequenceMatcher
|
||
|
||
# 清理文本
|
||
clean_recognized = clean_text_strict(recognized_text)
|
||
clean_standard = clean_text_strict(standard_text)
|
||
|
||
# 相似度
|
||
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
|
||
|
||
# 调试日志
|
||
logger.info(f"📝 标准文本: {standard_text}")
|
||
logger.info(f"🎤 识别文本: {recognized_text}")
|
||
logger.info(f"🧹 清理后标准: {clean_standard}")
|
||
logger.info(f"🧹 清理后识别: {clean_recognized}")
|
||
logger.info(f"📊 相似度: {similarity:.4f} ({similarity*100:.2f}%)")
|
||
|
||
# 准确度计算(带阈值优化)
|
||
if similarity >= 0.98:
|
||
accuracy = 100
|
||
logger.info(f"✅ 相似度>=98%,准确度给满分: {accuracy}")
|
||
elif similarity >= 0.95:
|
||
accuracy = 99
|
||
logger.info(f"✅ 相似度>=95%,准确度给99分: {accuracy}")
|
||
else:
|
||
accuracy = similarity * 100
|
||
logger.info(f"📊 准确度: {accuracy:.2f}")
|
||
|
||
# 完整度
|
||
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)
|
||
|
||
# 流畅度和发音(如果准确度满分,其他也满分)
|
||
fluency = 100 if accuracy == 100 else accuracy * 0.95
|
||
pronunciation = 100 if accuracy == 100 else accuracy * 0.98
|
||
|
||
# 总分(加权)
|
||
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
|
||
|
||
logger.info(f"✅ 评测完成: 总分={total_score:.0f}, 准确度={accuracy:.0f}, 流畅度={fluency:.0f}, 完整度={completeness:.0f}, 发音={pronunciation:.0f}")
|
||
|
||
return jsonify({
|
||
"code": 200,
|
||
"msg": "评测成功",
|
||
"data": {
|
||
"text": recognized_text,
|
||
"score": round(total_score),
|
||
"accuracy": round(accuracy),
|
||
"fluency": round(fluency),
|
||
"completeness": round(completeness),
|
||
"pronunciation": round(pronunciation),
|
||
"similarity": round(similarity * 100, 2)
|
||
}
|
||
})
|
||
|
||
finally:
|
||
# 删除临时文件
|
||
try:
|
||
if os.path.exists(temp_path):
|
||
os.remove(temp_path)
|
||
except Exception as e:
|
||
logger.warning(f"删除临时文件失败: {e}")
|
||
|
||
except Exception as e:
|
||
logger.error(f"评测失败: {str(e)}")
|
||
logger.error(traceback.format_exc())
|
||
return jsonify({
|
||
"code": 500,
|
||
"msg": f"评测失败: {str(e)}",
|
||
"data": None
|
||
}), 500
|
||
|
||
# ============================================
|
||
# 错误处理
|
||
# ============================================
|
||
@app.errorhandler(413)
|
||
def request_entity_too_large(error):
|
||
logger.warning("请求文件过大")
|
||
return jsonify({
|
||
"code": 413,
|
||
"msg": "文件太大,最大支持16MB",
|
||
"data": None
|
||
}), 413
|
||
|
||
@app.errorhandler(500)
|
||
def internal_error(error):
|
||
logger.error(f"服务器内部错误: {error}")
|
||
return jsonify({
|
||
"code": 500,
|
||
"msg": "服务器内部错误",
|
||
"data": None
|
||
}), 500
|
||
|
||
@app.errorhandler(404)
|
||
def not_found(error):
|
||
return jsonify({
|
||
"code": 404,
|
||
"msg": "接口不存在",
|
||
"data": None
|
||
}), 404
|
||
|
||
# ============================================
|
||
# 环境检查
|
||
# ============================================
|
||
def check_environment():
|
||
"""检查运行环境和依赖"""
|
||
logger.info("")
|
||
logger.info("=" * 70)
|
||
logger.info("🔍 环境检查")
|
||
logger.info("=" * 70)
|
||
|
||
all_ok = True
|
||
|
||
# 1. 检查 Python 版本
|
||
py_version = sys.version.split()[0]
|
||
logger.info(f" Python版本: {py_version} ✅")
|
||
|
||
# 2. 检查 whisper
|
||
try:
|
||
import whisper
|
||
logger.info(f" Whisper: 已安装 ✅")
|
||
except ImportError:
|
||
logger.error(f" Whisper: 未安装 ❌ (pip install openai-whisper)")
|
||
all_ok = False
|
||
|
||
# 3. 检查 Flask
|
||
try:
|
||
import flask
|
||
logger.info(f" Flask: {flask.__version__} ✅")
|
||
except ImportError:
|
||
logger.error(f" Flask: 未安装 ❌ (pip install flask)")
|
||
all_ok = False
|
||
|
||
# 4. 检查 flask-cors
|
||
try:
|
||
import flask_cors
|
||
logger.info(f" Flask-CORS: 已安装 ✅")
|
||
except ImportError:
|
||
logger.error(f" Flask-CORS: 未安装 ❌ (pip install flask-cors)")
|
||
all_ok = False
|
||
|
||
# 5. 检查 waitress
|
||
try:
|
||
import waitress
|
||
logger.info(f" Waitress: 已安装 ✅")
|
||
except ImportError:
|
||
logger.error(f" Waitress: 未安装 ❌ (pip install waitress)")
|
||
all_ok = False
|
||
|
||
# 6. 检查繁简转换库
|
||
zhconv_ok = False
|
||
opencc_ok = False
|
||
|
||
try:
|
||
import zhconv
|
||
zhconv_ok = True
|
||
logger.info(f" zhconv: 已安装 ✅ (繁简转换)")
|
||
except ImportError:
|
||
pass
|
||
|
||
try:
|
||
from opencc import OpenCC
|
||
opencc_ok = True
|
||
logger.info(f" OpenCC: 已安装 ✅ (繁简转换备用)")
|
||
except ImportError:
|
||
pass
|
||
|
||
if not zhconv_ok and not opencc_ok:
|
||
logger.warning(f" 繁简转换: 未安装 ⚠️ (pip install zhconv)")
|
||
logger.warning(f" 识别结果可能包含繁体字!")
|
||
|
||
# 7. 检查 ffmpeg(Whisper需要)
|
||
import shutil
|
||
if shutil.which('ffmpeg'):
|
||
logger.info(f" FFmpeg: 已安装 ✅")
|
||
else:
|
||
logger.warning(f" FFmpeg: 未找到 ⚠️ (某些音频格式可能无法处理)")
|
||
|
||
logger.info("=" * 70)
|
||
|
||
if all_ok:
|
||
logger.info("✅ 环境检查通过!")
|
||
else:
|
||
logger.error("❌ 缺少必要依赖,请先安装后再启动!")
|
||
sys.exit(1)
|
||
|
||
logger.info("")
|
||
return all_ok
|
||
|
||
# ============================================
|
||
# 启动服务(多线程模式)
|
||
# ============================================
|
||
if __name__ == '__main__':
|
||
try:
|
||
# 环境检查
|
||
check_environment()
|
||
|
||
from waitress import serve
|
||
|
||
# 打印启动信息
|
||
logger.info("=" * 70)
|
||
logger.info("🚀 正在启动Whisper语音识别服务...")
|
||
logger.info("=" * 70)
|
||
|
||
# 预加载模型
|
||
load_whisper_model()
|
||
|
||
# 启动信息
|
||
logger.info("")
|
||
logger.info("=" * 70)
|
||
logger.info("✅ Whisper服务启动成功!")
|
||
logger.info("=" * 70)
|
||
logger.info(f"📍 本地地址: http://127.0.0.1:5001")
|
||
logger.info(f"📍 局域网地址: http://0.0.0.0:5001")
|
||
logger.info(f"📍 访问地址: http://192.168.0.106:5001")
|
||
logger.info("=" * 70)
|
||
logger.info(f"⚙️ 运行模式: 多线程并发")
|
||
logger.info(f"⚙️ Whisper模型: {MODEL_NAME}")
|
||
logger.info(f"⚙️ 工作线程: 8 个")
|
||
logger.info(f"⚙️ 并发能力: 40-60 人同时使用")
|
||
logger.info(f"⚙️ 超时时间: 300 秒")
|
||
logger.info(f"⚙️ 最大连接: 100 个")
|
||
logger.info(f"⚙️ 最大文件: 16 MB")
|
||
logger.info("=" * 70)
|
||
logger.info("")
|
||
logger.info("📌 API接口列表:")
|
||
logger.info(" [GET] /health - 健康检查")
|
||
logger.info(" [POST] /recognize - 语音识别(只识别)")
|
||
logger.info(" [POST] /evaluate - 语音评测(识别+评分)")
|
||
logger.info("=" * 70)
|
||
logger.info("")
|
||
logger.info("💡 使用示例:")
|
||
logger.info(" 健康检查: curl http://192.168.0.106:5001/health")
|
||
logger.info(" 语音识别: curl -F 'file=@audio.mp3' http://192.168.0.106:5001/recognize")
|
||
logger.info(" 语音评测: curl -F 'file=@audio.mp3' -F 'text=你好' http://192.168.0.106:5001/evaluate")
|
||
logger.info("=" * 70)
|
||
logger.info("")
|
||
logger.info("✨ 服务已就绪,等待请求...")
|
||
logger.info("✨ 按 Ctrl+C 停止服务")
|
||
logger.info("")
|
||
|
||
# 使用waitress启动(支持多线程)
|
||
serve(
|
||
app,
|
||
host='0.0.0.0',
|
||
port=5001,
|
||
threads=8, # 8个工作线程,支持40-60人并发
|
||
channel_timeout=300, # 单个请求超时5分钟
|
||
connection_limit=100, # 最多100个并发连接
|
||
backlog=64, # 连接队列长度
|
||
recv_bytes=65536, # 接收缓冲区 64KB
|
||
send_bytes=65536, # 发送缓冲区 64KB
|
||
url_scheme='http'
|
||
)
|
||
|
||
except KeyboardInterrupt:
|
||
logger.info("")
|
||
logger.info("=" * 70)
|
||
logger.info("⏹️ 收到停止信号,正在关闭服务...")
|
||
logger.info("=" * 70)
|
||
logger.info("👋 服务已停止")
|
||
|
||
except Exception as e:
|
||
logger.error("=" * 70)
|
||
logger.error(f"❌ 服务启动失败: {e}")
|
||
logger.error(traceback.format_exc())
|
||
logger.error("=" * 70)
|
||
sys.exit(1) |