guoyu/log/whisper_server (1).py

555 lines
18 KiB
Python
Raw Normal View History

2025-12-13 13:36:18 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Whisper语音识别服务
支持语音识别和评测功能
多线程并发处理
作者: AI Assistant
版本: 2.0
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import whisper
import os
import tempfile
import logging
from logging.handlers import RotatingFileHandler
import traceback
from datetime import datetime
import sys
# ============================================
# 日志配置
# ============================================
def setup_logging():
"""配置日志系统"""
# 创建logger
logger = logging.getLogger('whisper_server')
logger.setLevel(logging.INFO)
# 控制台输出
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter(
'%(asctime)s [%(levelname)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
# 文件输出(可选)
try:
log_dir = 'logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
file_handler = RotatingFileHandler(
os.path.join(log_dir, 'whisper_server.log'),
maxBytes=10*1024*1024, # 10MB
backupCount=5,
encoding='utf-8'
)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
except Exception as e:
logger.warning(f"无法创建日志文件: {e}")
return logger
# 初始化日志
logger = setup_logging()
# ============================================
# Flask应用配置
# ============================================
app = Flask(__name__)
CORS(app) # 允许跨域请求
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 最大16MB
# ============================================
# 全局变量
# ============================================
whisper_model = None
MODEL_NAME = "tiny" # 可选: tiny, base, small, medium, large
# ============================================
# 模型加载
# ============================================
def load_whisper_model():
"""加载Whisper模型全局单例"""
global whisper_model
if whisper_model is None:
logger.info(f"正在加载Whisper模型 ({MODEL_NAME})...")
try:
whisper_model = whisper.load_model(MODEL_NAME)
logger.info(f"✅ Whisper模型加载成功 ({MODEL_NAME})")
except Exception as e:
logger.error(f"❌ 模型加载失败: {e}")
raise
return whisper_model
# ============================================
# 工具函数
# ============================================
def convert_to_simplified(text):
"""将繁体中文转换为简体中文"""
if not text:
return text
# 方案1使用zhconv推荐纯Python
try:
import zhconv
result = zhconv.convert(text, 'zh-cn')
if result != text:
logger.info(f"繁简转换: {text} -> {result}")
return result
except ImportError:
pass
# 方案2使用opencc
try:
from opencc import OpenCC
cc = OpenCC('t2s')
result = cc.convert(text)
if result != text:
logger.info(f"繁简转换(OpenCC): {text} -> {result}")
return result
except ImportError:
pass
# 都没安装,返回原文
logger.warning("繁简转换库未安装,请运行: pip install zhconv")
return text
def clean_text_strict(text):
"""
严格清理文本只保留汉字字母数字
用于准确度计算
"""
import re
import unicodedata
if not text:
return ""
# Unicode标准化
text = unicodedata.normalize('NFKC', text)
# 只保留汉字、字母、数字
text = re.sub(r'[^\u4e00-\u9fffa-zA-Z0-9]', '', text)
return text.lower()
# ============================================
# API路由
# ============================================
@app.route('/health', methods=['GET'])
def health_check():
"""健康检查接口"""
return jsonify({
"status": "ok",
"service": "Whisper Speech Recognition",
"model": MODEL_NAME,
"version": "2.0",
"timestamp": datetime.now().isoformat()
})
@app.route('/recognize', methods=['POST'])
def recognize():
"""
语音识别接口
只识别不评测
参数:
file: 音频文件 (multipart/form-data)
返回:
{
"code": 200,
"msg": "识别成功",
"data": {
"text": "识别的文本"
}
}
"""
try:
# 检查文件
if 'file' not in request.files:
logger.warning("请求缺少音频文件")
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
audio_file = request.files['file']
if audio_file.filename == '':
logger.warning("文件名为空")
return jsonify({"code": 400, "msg": "文件名为空", "data": None}), 400
logger.info(f"收到识别请求: {audio_file.filename} ({len(audio_file.read())} bytes)")
audio_file.seek(0) # 重置文件指针
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
temp_path = temp_file.name
audio_file.save(temp_path)
try:
# 加载模型
model = load_whisper_model()
# 识别音频
logger.info(f"开始识别: {audio_file.filename}")
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
# 繁体转简体
recognized_text = convert_to_simplified(recognized_text)
logger.info(f"✅ 识别成功: {recognized_text}")
return jsonify({
"code": 200,
"msg": "识别成功",
"data": {
"text": recognized_text
}
})
finally:
# 删除临时文件
try:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.warning(f"删除临时文件失败: {e}")
except Exception as e:
logger.error(f"识别失败: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
"code": 500,
"msg": f"识别失败: {str(e)}",
"data": None
}), 500
@app.route('/evaluate', methods=['POST'])
def evaluate():
"""
语音评测接口
识别 + 评分
参数:
file: 音频文件 (multipart/form-data)
text: 标准文本 (form-data)
返回:
{
"code": 200,
"msg": "评测成功",
"data": {
"text": "识别的文本",
"score": 95,
"accuracy": 98,
"fluency": 95,
"completeness": 100,
"pronunciation": 96,
"similarity": 98.5
}
}
"""
try:
# 检查文件
if 'file' not in request.files:
logger.warning("请求缺少音频文件")
return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400
audio_file = request.files['file']
standard_text = request.form.get('text', '')
if not standard_text:
logger.warning("请求缺少标准文本")
return jsonify({"code": 400, "msg": "缺少标准文本", "data": None}), 400
logger.info(f"收到评测请求: {audio_file.filename}, 标准文本: {standard_text}")
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
temp_path = temp_file.name
audio_file.save(temp_path)
try:
# 1. 识别音频
model = load_whisper_model()
logger.info("开始识别音频...")
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
recognized_text = convert_to_simplified(recognized_text)
# 2. 计算评分
from difflib import SequenceMatcher
# 清理文本
clean_recognized = clean_text_strict(recognized_text)
clean_standard = clean_text_strict(standard_text)
# 相似度
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
# 调试日志
logger.info(f"📝 标准文本: {standard_text}")
logger.info(f"🎤 识别文本: {recognized_text}")
logger.info(f"🧹 清理后标准: {clean_standard}")
logger.info(f"🧹 清理后识别: {clean_recognized}")
logger.info(f"📊 相似度: {similarity:.4f} ({similarity*100:.2f}%)")
# 准确度计算(带阈值优化)
if similarity >= 0.98:
accuracy = 100
logger.info(f"✅ 相似度>=98%,准确度给满分: {accuracy}")
elif similarity >= 0.95:
accuracy = 99
logger.info(f"✅ 相似度>=95%准确度给99分: {accuracy}")
else:
accuracy = similarity * 100
logger.info(f"📊 准确度: {accuracy:.2f}")
# 完整度
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)
# 流畅度和发音(如果准确度满分,其他也满分)
fluency = 100 if accuracy == 100 else accuracy * 0.95
pronunciation = 100 if accuracy == 100 else accuracy * 0.98
# 总分(加权)
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
logger.info(f"✅ 评测完成: 总分={total_score:.0f}, 准确度={accuracy:.0f}, 流畅度={fluency:.0f}, 完整度={completeness:.0f}, 发音={pronunciation:.0f}")
return jsonify({
"code": 200,
"msg": "评测成功",
"data": {
"text": recognized_text,
"score": round(total_score),
"accuracy": round(accuracy),
"fluency": round(fluency),
"completeness": round(completeness),
"pronunciation": round(pronunciation),
"similarity": round(similarity * 100, 2)
}
})
finally:
# 删除临时文件
try:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.warning(f"删除临时文件失败: {e}")
except Exception as e:
logger.error(f"评测失败: {str(e)}")
logger.error(traceback.format_exc())
return jsonify({
"code": 500,
"msg": f"评测失败: {str(e)}",
"data": None
}), 500
# ============================================
# 错误处理
# ============================================
@app.errorhandler(413)
def request_entity_too_large(error):
logger.warning("请求文件过大")
return jsonify({
"code": 413,
"msg": "文件太大最大支持16MB",
"data": None
}), 413
@app.errorhandler(500)
def internal_error(error):
logger.error(f"服务器内部错误: {error}")
return jsonify({
"code": 500,
"msg": "服务器内部错误",
"data": None
}), 500
@app.errorhandler(404)
def not_found(error):
return jsonify({
"code": 404,
"msg": "接口不存在",
"data": None
}), 404
# ============================================
# 环境检查
# ============================================
def check_environment():
"""检查运行环境和依赖"""
logger.info("")
logger.info("=" * 70)
logger.info("🔍 环境检查")
logger.info("=" * 70)
all_ok = True
# 1. 检查 Python 版本
py_version = sys.version.split()[0]
logger.info(f" Python版本: {py_version}")
# 2. 检查 whisper
try:
import whisper
logger.info(f" Whisper: 已安装 ✅")
except ImportError:
logger.error(f" Whisper: 未安装 ❌ (pip install openai-whisper)")
all_ok = False
# 3. 检查 Flask
try:
import flask
logger.info(f" Flask: {flask.__version__}")
except ImportError:
logger.error(f" Flask: 未安装 ❌ (pip install flask)")
all_ok = False
# 4. 检查 flask-cors
try:
import flask_cors
logger.info(f" Flask-CORS: 已安装 ✅")
except ImportError:
logger.error(f" Flask-CORS: 未安装 ❌ (pip install flask-cors)")
all_ok = False
# 5. 检查 waitress
try:
import waitress
logger.info(f" Waitress: 已安装 ✅")
except ImportError:
logger.error(f" Waitress: 未安装 ❌ (pip install waitress)")
all_ok = False
# 6. 检查繁简转换库
zhconv_ok = False
opencc_ok = False
try:
import zhconv
zhconv_ok = True
logger.info(f" zhconv: 已安装 ✅ (繁简转换)")
except ImportError:
pass
try:
from opencc import OpenCC
opencc_ok = True
logger.info(f" OpenCC: 已安装 ✅ (繁简转换备用)")
except ImportError:
pass
if not zhconv_ok and not opencc_ok:
logger.warning(f" 繁简转换: 未安装 ⚠️ (pip install zhconv)")
logger.warning(f" 识别结果可能包含繁体字!")
# 7. 检查 ffmpegWhisper需要
import shutil
if shutil.which('ffmpeg'):
logger.info(f" FFmpeg: 已安装 ✅")
else:
logger.warning(f" FFmpeg: 未找到 ⚠️ (某些音频格式可能无法处理)")
logger.info("=" * 70)
if all_ok:
logger.info("✅ 环境检查通过!")
else:
logger.error("❌ 缺少必要依赖,请先安装后再启动!")
sys.exit(1)
logger.info("")
return all_ok
# ============================================
# 启动服务(多线程模式)
# ============================================
if __name__ == '__main__':
try:
# 环境检查
check_environment()
from waitress import serve
# 打印启动信息
logger.info("=" * 70)
logger.info("🚀 正在启动Whisper语音识别服务...")
logger.info("=" * 70)
# 预加载模型
load_whisper_model()
# 启动信息
logger.info("")
logger.info("=" * 70)
logger.info("✅ Whisper服务启动成功")
logger.info("=" * 70)
logger.info(f"📍 本地地址: http://127.0.0.1:5001")
logger.info(f"📍 局域网地址: http://0.0.0.0:5001")
logger.info(f"📍 访问地址: http://192.168.0.106:5001")
logger.info("=" * 70)
logger.info(f"⚙️ 运行模式: 多线程并发")
logger.info(f"⚙️ Whisper模型: {MODEL_NAME}")
logger.info(f"⚙️ 工作线程: 8 个")
logger.info(f"⚙️ 并发能力: 40-60 人同时使用")
logger.info(f"⚙️ 超时时间: 300 秒")
logger.info(f"⚙️ 最大连接: 100 个")
logger.info(f"⚙️ 最大文件: 16 MB")
logger.info("=" * 70)
logger.info("")
logger.info("📌 API接口列表")
logger.info(" [GET] /health - 健康检查")
logger.info(" [POST] /recognize - 语音识别(只识别)")
logger.info(" [POST] /evaluate - 语音评测(识别+评分)")
logger.info("=" * 70)
logger.info("")
logger.info("💡 使用示例:")
logger.info(" 健康检查: curl http://192.168.0.106:5001/health")
logger.info(" 语音识别: curl -F 'file=@audio.mp3' http://192.168.0.106:5001/recognize")
logger.info(" 语音评测: curl -F 'file=@audio.mp3' -F 'text=你好' http://192.168.0.106:5001/evaluate")
logger.info("=" * 70)
logger.info("")
logger.info("✨ 服务已就绪,等待请求...")
logger.info("✨ 按 Ctrl+C 停止服务")
logger.info("")
# 使用waitress启动支持多线程
serve(
app,
host='0.0.0.0',
port=5001,
threads=8, # 8个工作线程支持40-60人并发
channel_timeout=300, # 单个请求超时5分钟
connection_limit=100, # 最多100个并发连接
backlog=64, # 连接队列长度
recv_bytes=65536, # 接收缓冲区 64KB
send_bytes=65536, # 发送缓冲区 64KB
url_scheme='http'
)
except KeyboardInterrupt:
logger.info("")
logger.info("=" * 70)
logger.info("⏹️ 收到停止信号,正在关闭服务...")
logger.info("=" * 70)
logger.info("👋 服务已停止")
except Exception as e:
logger.error("=" * 70)
logger.error(f"❌ 服务启动失败: {e}")
logger.error(traceback.format_exc())
logger.error("=" * 70)
sys.exit(1)