guoyu/log/whisper_server.py
2025-12-14 11:30:30 +08:00

557 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 配置whisper语音测评功能的脚本必须要在jar包启动前启动要不然不会被识别到
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Whisper语音识别服务
支持语音识别和评测功能
多线程并发处理
作者: AI Assistant
版本: 2.0
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import whisper
import os
import tempfile
import logging
from logging.handlers import RotatingFileHandler
import traceback
from datetime import datetime
import sys
# ============================================
# 日志配置
# ============================================
def setup_logging():
"""配置日志系统"""
# 创建logger
logger = logging.getLogger('whisper_server')
logger.setLevel(logging.INFO)
# 控制台输出
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# 文件输出(可选)
try:
log_dir = 'logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
file_handler = RotatingFileHandler(
os.path.join(log_dir, 'whisper_server.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=5,
encoding='utf-8'
)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
except Exception as e:
logger.warning(f"无法创建日志文件: {e}")
return logger
# 初始化日志
logger = setup_logging()
# ============================================
# Flask应用配置
# ============================================
app = Flask(__name__)
CORS(app) # 允许跨域请求
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 最大16MB
# ============================================
# 全局变量
# ============================================
whisper_model = None
MODEL_NAME = "tiny" # 可选: tiny, base, small, medium, large
# ============================================
# 模型加载
# ============================================
def load_whisper_model():
"""加载Whisper模型全局单例"""
global whisper_model
if whisper_model is None:
logger.info(f"正在加载Whisper模型 ({MODEL_NAME})...")
try:
whisper_model = whisper.load_model(MODEL_NAME)
logger.info(f"✅ Whisper模型加载成功 ({MODEL_NAME})")
except Exception as e:
logger.error(f"❌ 模型加载失败: {e}")
raise
return whisper_model
# ============================================
# 工具函数
# ============================================
def convert_to_simplified(text):
"""将繁体中文转换为简体中文"""
if not text:
return text
# 方案1使用zhconv推荐纯Python
try:
import zhconv
result = zhconv.convert(text, 'zh-cn')
if result != text:
logger.info(f"繁简转换: {text} -> {result}")
return result
except ImportError:
pass
# 方案2使用opencc
try:
from opencc import OpenCC
cc = OpenCC('t2s')
result = cc.convert(text)
if result != text:
logger.info(f"繁简转换(OpenCC): {text} -> {result}")
return result
except ImportError:
pass
# 都没安装,返回原文
logger.warning("繁简转换库未安装,请运行: pip install zhconv")
return text
def clean_text_strict(text):
"""
严格清理文本,只保留汉字、字母、数字
用于准确度计算
"""
import re
import unicodedata
if not text:
return ""
# Unicode标准化
text = unicodedata.normalize('NFKC', text)
# 只保留汉字、字母、数字
text = re.sub(r'[^\u4e00-\u9fffa-zA-Z0-9]', '', text)
return text.lower()
# ============================================
# API路由
# ============================================
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查接口"""
return jsonify({
"status": "ok",
"service": "Whisper Speech Recognition",
"model": MODEL_NAME,
"version": "2.0",
"timestamp": datetime.now().isoformat()
})
@app.route('/recognize', methods=['POST'])
def recognize():
"""
语音识别接口
只识别,不评测
参数:
file: 音频文件 (multipart/form-data)
返回:
{
"code": 200,
"msg": "识别成功",
"data": {
"text": "识别的文本"
}
}
"""
try:
# 检查文件
if 'file' not in request.files:
logger.warning("请求缺少音频文件")
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
audio_file = request.files['file']
if audio_file.filename == '':
logger.warning("文件名为空")
return jsonify({"code": 400, "msg": "文件名为空", "data": None}), 400
logger.info(f"收到识别请求: {audio_file.filename} ({len(audio_file.read())} bytes)")
audio_file.seek(0) # 重置文件指针
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
temp_path = temp_file.name
audio_file.save(temp_path)
try:
# 加载模型
model = load_whisper_model()
# 识别音频
logger.info(f"开始识别: {audio_file.filename}")
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
# 繁体转简体
recognized_text = convert_to_simplified(recognized_text)
logger.info(f"✅ 识别成功: {recognized_text}")
return jsonify({
"code": 200,
"msg": "识别成功",
"data": {
"text": recognized_text
}
})
finally:
# 删除临时文件
try:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.warning(f"删除临时文件失败: {e}")
except Exception as e:
logger.error(f"识别失败: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
"code": 500,
"msg": f"识别失败: {str(e)}",
"data": None
}), 500
@app.route('/evaluate', methods=['POST'])
def evaluate():
"""
语音评测接口
识别 + 评分
参数:
file: 音频文件 (multipart/form-data)
text: 标准文本 (form-data)
返回:
{
"code": 200,
"msg": "评测成功",
"data": {
"text": "识别的文本",
"score": 95,
"accuracy": 98,
"fluency": 95,
"completeness": 100,
"pronunciation": 96,
"similarity": 98.5
}
}
"""
try:
# 检查文件
if 'file' not in request.files:
logger.warning("请求缺少音频文件")
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
audio_file = request.files['file']
standard_text = request.form.get('text', '')
if not standard_text:
logger.warning("请求缺少标准文本")
return jsonify({"code": 400, "msg": "缺少标准文本", "data": None}), 400
logger.info(f"收到评测请求: {audio_file.filename}, 标准文本: {standard_text}")
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
temp_path = temp_file.name
audio_file.save(temp_path)
try:
# 1. 识别音频
model = load_whisper_model()
logger.info("开始识别音频...")
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
recognized_text = convert_to_simplified(recognized_text)
# 2. 计算评分
from difflib import SequenceMatcher
# 清理文本
clean_recognized = clean_text_strict(recognized_text)
clean_standard = clean_text_strict(standard_text)
# 相似度
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
# 调试日志
logger.info(f"📝 标准文本: {standard_text}")
logger.info(f"🎤 识别文本: {recognized_text}")
logger.info(f"🧹 清理后标准: {clean_standard}")
logger.info(f"🧹 清理后识别: {clean_recognized}")
logger.info(f"📊 相似度: {similarity:.4f} ({similarity*100:.2f}%)")
# 准确度计算(带阈值优化)
if similarity >= 0.98:
accuracy = 100
logger.info(f"✅ 相似度>=98%,准确度给满分: {accuracy}")
elif similarity >= 0.95:
accuracy = 99
logger.info(f"✅ 相似度>=95%准确度给99分: {accuracy}")
else:
accuracy = similarity * 100
logger.info(f"📊 准确度: {accuracy:.2f}")
# 完整度
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)
# 流畅度和发音(如果准确度满分,其他也满分)
fluency = 100 if accuracy == 100 else accuracy * 0.95
pronunciation = 100 if accuracy == 100 else accuracy * 0.98
# 总分(加权)
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
logger.info(f"✅ 评测完成: 总分={total_score:.0f}, 准确度={accuracy:.0f}, 流畅度={fluency:.0f}, 完整度={completeness:.0f}, 发音={pronunciation:.0f}")
return jsonify({
"code": 200,
"msg": "评测成功",
"data": {
"text": recognized_text,
"score": round(total_score),
"accuracy": round(accuracy),
"fluency": round(fluency),
"completeness": round(completeness),
"pronunciation": round(pronunciation),
"similarity": round(similarity * 100, 2)
}
})
finally:
# 删除临时文件
try:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.warning(f"删除临时文件失败: {e}")
except Exception as e:
logger.error(f"评测失败: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
"code": 500,
"msg": f"评测失败: {str(e)}",
"data": None
}), 500
# ============================================
# 错误处理
# ============================================
@app.errorhandler(413)
def request_entity_too_large(error):
logger.warning("请求文件过大")
return jsonify({
"code": 413,
"msg": "文件太大最大支持16MB",
"data": None
}), 413
@app.errorhandler(500)
def internal_error(error):
logger.error(f"服务器内部错误: {error}")
return jsonify({
"code": 500,
"msg": "服务器内部错误",
"data": None
}), 500
@app.errorhandler(404)
def not_found(error):
return jsonify({
"code": 404,
"msg": "接口不存在",
"data": None
}), 404
# ============================================
# 环境检查
# ============================================
def check_environment():
"""检查运行环境和依赖"""
logger.info("")
logger.info("=" * 70)
logger.info("🔍 环境检查")
logger.info("=" * 70)
all_ok = True
# 1. 检查 Python 版本
py_version = sys.version.split()[0]
logger.info(f" Python版本: {py_version}")
# 2. 检查 whisper
try:
import whisper
logger.info(f" Whisper: 已安装 ✅")
except ImportError:
logger.error(f" Whisper: 未安装 ❌ (pip install openai-whisper)")
all_ok = False
# 3. 检查 Flask
try:
import flask
logger.info(f" Flask: {flask.__version__}")
except ImportError:
logger.error(f" Flask: 未安装 ❌ (pip install flask)")
all_ok = False
# 4. 检查 flask-cors
try:
import flask_cors
logger.info(f" Flask-CORS: 已安装 ✅")
except ImportError:
logger.error(f" Flask-CORS: 未安装 ❌ (pip install flask-cors)")
all_ok = False
# 5. 检查 waitress
try:
import waitress
logger.info(f" Waitress: 已安装 ✅")
except ImportError:
logger.error(f" Waitress: 未安装 ❌ (pip install waitress)")
all_ok = False
# 6. 检查繁简转换库
zhconv_ok = False
opencc_ok = False
try:
import zhconv
zhconv_ok = True
logger.info(f" zhconv: 已安装 ✅ (繁简转换)")
except ImportError:
pass
try:
from opencc import OpenCC
opencc_ok = True
logger.info(f" OpenCC: 已安装 ✅ (繁简转换备用)")
except ImportError:
pass
if not zhconv_ok and not opencc_ok:
logger.warning(f" 繁简转换: 未安装 ⚠️ (pip install zhconv)")
logger.warning(f" 识别结果可能包含繁体字!")
# 7. 检查 ffmpegWhisper需要
import shutil
if shutil.which('ffmpeg'):
logger.info(f" FFmpeg: 已安装 ✅")
else:
logger.warning(f" FFmpeg: 未找到 ⚠️ (某些音频格式可能无法处理)")
logger.info("=" * 70)
if all_ok:
logger.info("✅ 环境检查通过!")
else:
logger.error("❌ 缺少必要依赖,请先安装后再启动!")
sys.exit(1)
logger.info("")
return all_ok
# ============================================
# 启动服务(多线程模式)
# ============================================
if __name__ == '__main__':
try:
# 环境检查
check_environment()
from waitress import serve
# 打印启动信息
logger.info("=" * 70)
logger.info("🚀 正在启动Whisper语音识别服务...")
logger.info("=" * 70)
# 预加载模型
load_whisper_model()
# 启动信息
logger.info("")
logger.info("=" * 70)
logger.info("✅ Whisper服务启动成功")
logger.info("=" * 70)
logger.info(f"📍 本地地址: http://127.0.0.1:5001")
logger.info(f"📍 局域网地址: http://0.0.0.0:5001")
logger.info(f"📍 访问地址: http://192.168.0.106:5001")
logger.info("=" * 70)
logger.info(f"⚙️ 运行模式: 多线程并发")
logger.info(f"⚙️ Whisper模型: {MODEL_NAME}")
logger.info(f"⚙️ 工作线程: 8 个")
logger.info(f"⚙️ 并发能力: 40-60 人同时使用")
logger.info(f"⚙️ 超时时间: 300 秒")
logger.info(f"⚙️ 最大连接: 100 个")
logger.info(f"⚙️ 最大文件: 16 MB")
logger.info("=" * 70)
logger.info("")
logger.info("📌 API接口列表")
logger.info(" [GET] /health - 健康检查")
logger.info(" [POST] /recognize - 语音识别(只识别)")
logger.info(" [POST] /evaluate - 语音评测(识别+评分)")
logger.info("=" * 70)
logger.info("")
logger.info("💡 使用示例:")
logger.info(" 健康检查: curl http://192.168.0.106:5001/health")
logger.info(" 语音识别: curl -F 'file=@audio.mp3' http://192.168.0.106:5001/recognize")
logger.info(" 语音评测: curl -F 'file=@audio.mp3' -F 'text=你好' http://192.168.0.106:5001/evaluate")
logger.info("=" * 70)
logger.info("")
logger.info("✨ 服务已就绪,等待请求...")
logger.info("✨ 按 Ctrl+C 停止服务")
logger.info("")
# 使用waitress启动支持多线程
serve(
app,
host='0.0.0.0',
port=5001,
threads=8, # 8个工作线程支持40-60人并发
channel_timeout=300, # 单个请求超时5分钟
connection_limit=100, # 最多100个并发连接
backlog=64, # 连接队列长度
recv_bytes=65536, # 接收缓冲区 64KB
send_bytes=65536, # 发送缓冲区 64KB
url_scheme='http'
)
except KeyboardInterrupt:
logger.info("")
logger.info("=" * 70)
logger.info("⏹️ 收到停止信号,正在关闭服务...")
logger.info("=" * 70)
logger.info("👋 服务已停止")
except Exception as e:
logger.error("=" * 70)
logger.error(f"❌ 服务启动失败: {e}")
logger.error(traceback.format_exc())
logger.error("=" * 70)
sys.exit(1)