#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Whisper语音识别服务 支持语音识别和评测功能 多线程并发处理 作者: AI Assistant 版本: 2.0 """ from flask import Flask, request, jsonify from flask_cors import CORS import whisper import os import tempfile import logging from logging.handlers import RotatingFileHandler import traceback from datetime import datetime import sys # ============================================ # 日志配置 # ============================================ def setup_logging(): """配置日志系统""" # 创建logger logger = logging.getLogger('whisper_server') logger.setLevel(logging.INFO) # 控制台输出 console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) console_formatter = logging.Formatter( '%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) console_handler.setFormatter(console_formatter) logger.addHandler(console_handler) # 文件输出(可选) try: log_dir = 'logs' if not os.path.exists(log_dir): os.makedirs(log_dir) file_handler = RotatingFileHandler( os.path.join(log_dir, 'whisper_server.log'), maxBytes=10*1024*1024, # 10MB backupCount=5, encoding='utf-8' ) file_handler.setLevel(logging.INFO) file_handler.setFormatter(console_formatter) logger.addHandler(file_handler) except Exception as e: logger.warning(f"无法创建日志文件: {e}") return logger # 初始化日志 logger = setup_logging() # ============================================ # Flask应用配置 # ============================================ app = Flask(__name__) CORS(app) # 允许跨域请求 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 最大16MB # ============================================ # 全局变量 # ============================================ whisper_model = None MODEL_NAME = "tiny" # 可选: tiny, base, small, medium, large # ============================================ # 模型加载 # ============================================ def load_whisper_model(): """加载Whisper模型(全局单例)""" global whisper_model if whisper_model is None: logger.info(f"正在加载Whisper模型 ({MODEL_NAME})...") try: whisper_model = whisper.load_model(MODEL_NAME) logger.info(f"✅ Whisper模型加载成功 ({MODEL_NAME})") except Exception as e: logger.error(f"❌ 模型加载失败: {e}") raise return whisper_model # ============================================ # 工具函数 # ============================================ def convert_to_simplified(text): """将繁体中文转换为简体中文""" if not text: return text # 方案1:使用zhconv(推荐,纯Python) try: import zhconv result = zhconv.convert(text, 'zh-cn') if result != text: logger.info(f"繁简转换: {text} -> {result}") return result except ImportError: pass # 方案2:使用opencc try: from opencc import OpenCC cc = OpenCC('t2s') result = cc.convert(text) if result != text: logger.info(f"繁简转换(OpenCC): {text} -> {result}") return result except ImportError: pass # 都没安装,返回原文 logger.warning("繁简转换库未安装,请运行: pip install zhconv") return text def clean_text_strict(text): """ 严格清理文本,只保留汉字、字母、数字 用于准确度计算 """ import re import unicodedata if not text: return "" # Unicode标准化 text = unicodedata.normalize('NFKC', text) # 只保留汉字、字母、数字 text = re.sub(r'[^\u4e00-\u9fffa-zA-Z0-9]', '', text) return text.lower() # ============================================ # API路由 # ============================================ @app.route('/health', methods=['GET']) def health_check(): """健康检查接口""" return jsonify({ "status": "ok", "service": "Whisper Speech Recognition", "model": MODEL_NAME, "version": "2.0", "timestamp": datetime.now().isoformat() }) @app.route('/recognize', methods=['POST']) def recognize(): """ 语音识别接口 只识别,不评测 参数: file: 音频文件 (multipart/form-data) 返回: { "code": 200, "msg": "识别成功", "data": { "text": "识别的文本" } } """ try: # 检查文件 if 'file' not in request.files: logger.warning("请求缺少音频文件") return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400 audio_file = request.files['file'] if audio_file.filename == '': logger.warning("文件名为空") return jsonify({"code": 400, "msg": "文件名为空", "data": None}), 400 logger.info(f"收到识别请求: {audio_file.filename} ({len(audio_file.read())} bytes)") audio_file.seek(0) # 重置文件指针 # 保存临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file: temp_path = temp_file.name audio_file.save(temp_path) try: # 加载模型 model = load_whisper_model() # 识别音频 logger.info(f"开始识别: {audio_file.filename}") result = model.transcribe(temp_path, language='zh', fp16=False) recognized_text = result['text'].strip() # 繁体转简体 recognized_text = convert_to_simplified(recognized_text) logger.info(f"✅ 识别成功: {recognized_text}") return jsonify({ "code": 200, "msg": "识别成功", "data": { "text": recognized_text } }) finally: # 删除临时文件 try: if os.path.exists(temp_path): os.remove(temp_path) except Exception as e: logger.warning(f"删除临时文件失败: {e}") except Exception as e: logger.error(f"识别失败: {str(e)}") logger.error(traceback.format_exc()) return jsonify({ "code": 500, "msg": f"识别失败: {str(e)}", "data": None }), 500 @app.route('/evaluate', methods=['POST']) def evaluate(): """ 语音评测接口 识别 + 评分 参数: file: 音频文件 (multipart/form-data) text: 标准文本 (form-data) 返回: { "code": 200, "msg": "评测成功", "data": { "text": "识别的文本", "score": 95, "accuracy": 98, "fluency": 95, "completeness": 100, "pronunciation": 96, "similarity": 98.5 } } """ try: # 检查文件 if 'file' not in request.files: logger.warning("请求缺少音频文件") return jsonify({"code": 400, "msg": "缺少音频文件", "data": None}), 400 audio_file = request.files['file'] standard_text = request.form.get('text', '') if not standard_text: logger.warning("请求缺少标准文本") return jsonify({"code": 400, "msg": "缺少标准文本", "data": None}), 400 logger.info(f"收到评测请求: {audio_file.filename}, 标准文本: {standard_text}") # 保存临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file: temp_path = temp_file.name audio_file.save(temp_path) try: # 1. 识别音频 model = load_whisper_model() logger.info("开始识别音频...") result = model.transcribe(temp_path, language='zh', fp16=False) recognized_text = result['text'].strip() recognized_text = convert_to_simplified(recognized_text) # 2. 计算评分 from difflib import SequenceMatcher # 清理文本 clean_recognized = clean_text_strict(recognized_text) clean_standard = clean_text_strict(standard_text) # 相似度 similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio() # 调试日志 logger.info(f"📝 标准文本: {standard_text}") logger.info(f"🎤 识别文本: {recognized_text}") logger.info(f"🧹 清理后标准: {clean_standard}") logger.info(f"🧹 清理后识别: {clean_recognized}") logger.info(f"📊 相似度: {similarity:.4f} ({similarity*100:.2f}%)") # 准确度计算(带阈值优化) if similarity >= 0.98: accuracy = 100 logger.info(f"✅ 相似度>=98%,准确度给满分: {accuracy}") elif similarity >= 0.95: accuracy = 99 logger.info(f"✅ 相似度>=95%,准确度给99分: {accuracy}") else: accuracy = similarity * 100 logger.info(f"📊 准确度: {accuracy:.2f}") # 完整度 completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 流畅度和发音(如果准确度满分,其他也满分) fluency = 100 if accuracy == 100 else accuracy * 0.95 pronunciation = 100 if accuracy == 100 else accuracy * 0.98 # 总分(加权) total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15) logger.info(f"✅ 评测完成: 总分={total_score:.0f}, 准确度={accuracy:.0f}, 流畅度={fluency:.0f}, 完整度={completeness:.0f}, 发音={pronunciation:.0f}") return jsonify({ "code": 200, "msg": "评测成功", "data": { "text": recognized_text, "score": round(total_score), "accuracy": round(accuracy), "fluency": round(fluency), "completeness": round(completeness), "pronunciation": round(pronunciation), "similarity": round(similarity * 100, 2) } }) finally: # 删除临时文件 try: if os.path.exists(temp_path): os.remove(temp_path) except Exception as e: logger.warning(f"删除临时文件失败: {e}") except Exception as e: logger.error(f"评测失败: {str(e)}") logger.error(traceback.format_exc()) return jsonify({ "code": 500, "msg": f"评测失败: {str(e)}", "data": None }), 500 # ============================================ # 错误处理 # ============================================ @app.errorhandler(413) def request_entity_too_large(error): logger.warning("请求文件过大") return jsonify({ "code": 413, "msg": "文件太大,最大支持16MB", "data": None }), 413 @app.errorhandler(500) def internal_error(error): logger.error(f"服务器内部错误: {error}") return jsonify({ "code": 500, "msg": "服务器内部错误", "data": None }), 500 @app.errorhandler(404) def not_found(error): return jsonify({ "code": 404, "msg": "接口不存在", "data": None }), 404 # ============================================ # 环境检查 # ============================================ def check_environment(): """检查运行环境和依赖""" logger.info("") logger.info("=" * 70) logger.info("🔍 环境检查") logger.info("=" * 70) all_ok = True # 1. 检查 Python 版本 py_version = sys.version.split()[0] logger.info(f" Python版本: {py_version} ✅") # 2. 检查 whisper try: import whisper logger.info(f" Whisper: 已安装 ✅") except ImportError: logger.error(f" Whisper: 未安装 ❌ (pip install openai-whisper)") all_ok = False # 3. 检查 Flask try: import flask logger.info(f" Flask: {flask.__version__} ✅") except ImportError: logger.error(f" Flask: 未安装 ❌ (pip install flask)") all_ok = False # 4. 检查 flask-cors try: import flask_cors logger.info(f" Flask-CORS: 已安装 ✅") except ImportError: logger.error(f" Flask-CORS: 未安装 ❌ (pip install flask-cors)") all_ok = False # 5. 检查 waitress try: import waitress logger.info(f" Waitress: 已安装 ✅") except ImportError: logger.error(f" Waitress: 未安装 ❌ (pip install waitress)") all_ok = False # 6. 检查繁简转换库 zhconv_ok = False opencc_ok = False try: import zhconv zhconv_ok = True logger.info(f" zhconv: 已安装 ✅ (繁简转换)") except ImportError: pass try: from opencc import OpenCC opencc_ok = True logger.info(f" OpenCC: 已安装 ✅ (繁简转换备用)") except ImportError: pass if not zhconv_ok and not opencc_ok: logger.warning(f" 繁简转换: 未安装 ⚠️ (pip install zhconv)") logger.warning(f" 识别结果可能包含繁体字!") # 7. 检查 ffmpeg(Whisper需要) import shutil if shutil.which('ffmpeg'): logger.info(f" FFmpeg: 已安装 ✅") else: logger.warning(f" FFmpeg: 未找到 ⚠️ (某些音频格式可能无法处理)") logger.info("=" * 70) if all_ok: logger.info("✅ 环境检查通过!") else: logger.error("❌ 缺少必要依赖,请先安装后再启动!") sys.exit(1) logger.info("") return all_ok # ============================================ # 启动服务(多线程模式) # ============================================ if __name__ == '__main__': try: # 环境检查 check_environment() from waitress import serve # 打印启动信息 logger.info("=" * 70) logger.info("🚀 正在启动Whisper语音识别服务...") logger.info("=" * 70) # 预加载模型 load_whisper_model() # 启动信息 logger.info("") logger.info("=" * 70) logger.info("✅ Whisper服务启动成功!") logger.info("=" * 70) logger.info(f"📍 本地地址: http://127.0.0.1:5001") logger.info(f"📍 局域网地址: http://0.0.0.0:5001") logger.info(f"📍 访问地址: http://192.168.0.106:5001") logger.info("=" * 70) logger.info(f"⚙️ 运行模式: 多线程并发") logger.info(f"⚙️ Whisper模型: {MODEL_NAME}") logger.info(f"⚙️ 工作线程: 8 个") logger.info(f"⚙️ 并发能力: 40-60 人同时使用") logger.info(f"⚙️ 超时时间: 300 秒") logger.info(f"⚙️ 最大连接: 100 个") logger.info(f"⚙️ 最大文件: 16 MB") logger.info("=" * 70) logger.info("") logger.info("📌 API接口列表:") logger.info(" [GET] /health - 健康检查") logger.info(" [POST] /recognize - 语音识别(只识别)") logger.info(" [POST] /evaluate - 语音评测(识别+评分)") logger.info("=" * 70) logger.info("") logger.info("💡 使用示例:") logger.info(" 健康检查: curl http://192.168.0.106:5001/health") logger.info(" 语音识别: curl -F 'file=@audio.mp3' http://192.168.0.106:5001/recognize") logger.info(" 语音评测: curl -F 'file=@audio.mp3' -F 'text=你好' http://192.168.0.106:5001/evaluate") logger.info("=" * 70) logger.info("") logger.info("✨ 服务已就绪,等待请求...") logger.info("✨ 按 Ctrl+C 停止服务") logger.info("") # 使用waitress启动(支持多线程) serve( app, host='0.0.0.0', port=5001, threads=8, # 8个工作线程,支持40-60人并发 channel_timeout=300, # 单个请求超时5分钟 connection_limit=100, # 最多100个并发连接 backlog=64, # 连接队列长度 recv_bytes=65536, # 接收缓冲区 64KB send_bytes=65536, # 发送缓冲区 64KB url_scheme='http' ) except KeyboardInterrupt: logger.info("") logger.info("=" * 70) logger.info("⏹️ 收到停止信号,正在关闭服务...") logger.info("=" * 70) logger.info("👋 服务已停止") except Exception as e: logger.error("=" * 70) logger.error(f"❌ 服务启动失败: {e}") logger.error(traceback.format_exc()) logger.error("=" * 70) sys.exit(1)