#!/usr/bin/env python # -*- coding: utf-8 -*- """ 本地Whisper语音识别服务 替代百度API,完全离线运行 安装: pip install openai-whisper flask flask-cors 运行: python whisper_server.py 优点: 1. 完全免费,无限次调用 2. 离线运行,不需要网络 3. 识别准确率高(接近百度API) 4. 支持中文、英文等多语言 """ from flask import Flask, request, jsonify from flask_cors import CORS import whisper import os import tempfile import logging # 繁简体转换 try: import zhconv HAS_ZHCONV = True except ImportError: HAS_ZHCONV = False print("⚠️ 未安装zhconv库,无法进行繁简体转换") print(" 安装命令: pip install zhconv") app = Flask(__name__) CORS(app) # 配置日志 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 全局变量:Whisper模型 whisper_model = None def load_whisper_model(): """加载Whisper模型(懒加载)""" global whisper_model if whisper_model is None: logger.info("正在加载Whisper模型...") # 使用base模型(平衡速度和准确度) # 可选: tiny, base, small, medium, large # tiny: 最快,准确度一般 # base: 快速,准确度好 ✅ 推荐 # small: 较慢,准确度高 # medium/large: 很慢,准确度最高 whisper_model = whisper.load_model("base") logger.info("✅ Whisper模型加载成功") return whisper_model def convert_to_simplified(text): """ 将繁体中文转换为简体中文 参数: text: 待转换的文本 返回: 简体中文文本 """ if not text: return text if HAS_ZHCONV: try: # 使用zhconv进行繁简转换 simplified = zhconv.convert(text, 'zh-cn') logger.info(f"繁简转换: {text} -> {simplified}") return simplified except Exception as e: logger.warning(f"繁简转换失败: {e}") return text else: # 如果没有安装zhconv,返回原文 return text @app.route('/health', methods=['GET']) def health(): """健康检查""" return jsonify({ "status": "ok", "service": "Whisper语音识别服务", "model": "base" }) @app.route('/recognize', methods=['POST']) def recognize(): """ 语音识别接口 参数: - file: 音频文件(支持MP3, WAV, M4A等) - language: 语言(可选,默认自动检测) 返回: { "code": 200, "msg": "识别成功", "data": { "text": "识别的文本", "language": "zh", "confidence": 0.95 } } """ try: # 检查是否有文件 if 'file' not in request.files: return jsonify({ "code": 400, "msg": "未找到音频文件", "data": None }), 400 audio_file = request.files['file'] language = request.form.get('language', 'zh') # 默认中文 # 保存临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file: audio_file.save(temp_file.name) temp_path = temp_file.name try: # 加载模型 model = load_whisper_model() # 识别音频 logger.info(f"开始识别音频: {audio_file.filename}") result = model.transcribe( temp_path, language=language, task='transcribe', # transcribe=识别,translate=翻译成英文 fp16=False # CPU模式 ) recognized_text = result['text'].strip() detected_language = result.get('language', language) # ✅ 繁体转简体 if detected_language == 'zh': recognized_text = convert_to_simplified(recognized_text) logger.info(f"✅ 识别成功: {recognized_text}") return jsonify({ "code": 200, "msg": "识别成功", "data": { "text": recognized_text, "language": detected_language, "segments": len(result.get('segments', [])), "duration": result.get('duration', 0) } }) finally: # 删除临时文件 if os.path.exists(temp_path): os.remove(temp_path) except Exception as e: logger.error(f"识别失败: {str(e)}", exc_info=True) return jsonify({ "code": 500, "msg": f"识别失败: {str(e)}", "data": None }), 500 @app.route('/evaluate', methods=['POST']) def evaluate(): """ 语音评测接口(完整功能) 参数: - file: 音频文件 - text: 标准文本(用于对比) 返回: { "code": 200, "msg": "评测成功", "data": { "text": "识别的文本", "score": 95, "accuracy": 98, "fluency": 92, "completeness": 95, "pronunciation": 94 } } """ try: if 'file' not in request.files: return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400 audio_file = request.files['file'] standard_text = request.form.get('text', '') # 保存临时文件 with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file: audio_file.save(temp_file.name) temp_path = temp_file.name try: # 1. 识别音频 model = load_whisper_model() result = model.transcribe(temp_path, language='zh', fp16=False) recognized_text = result['text'].strip() # ✅ 繁体转简体 recognized_text = convert_to_simplified(recognized_text) # 2. 计算评分 from difflib import SequenceMatcher import re # 清理文本:去除所有标点符号和空格,只保留汉字、字母、数字 # \w 匹配字母、数字、下划线、汉字 clean_recognized = re.sub(r'[^\w]', '', recognized_text, flags=re.UNICODE).lower() clean_standard = re.sub(r'[^\w]', '', standard_text, flags=re.UNICODE).lower() # 相似度 similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio() # 计算各项评分 accuracy = similarity * 100 # 准确度 completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 完整度 fluency = accuracy * 0.95 # 流利度(基于准确度) pronunciation = accuracy * 0.98 # 发音(基于准确度) # 总分 total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15) logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}") return jsonify({ "code": 200, "msg": "评测成功", "data": { "text": recognized_text, "score": round(total_score), "accuracy": round(accuracy), "fluency": round(fluency), "completeness": round(completeness), "pronunciation": round(pronunciation), "similarity": round(similarity * 100, 2) } }) finally: if os.path.exists(temp_path): os.remove(temp_path) except Exception as e: logger.error(f"评测失败: {str(e)}", exc_info=True) return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500 if __name__ == '__main__': print("=" * 60) print("🎤 本地Whisper语音识别服务") print("=" * 60) print("") print("✅ 优势:") print(" 1. 完全免费,无限次调用") print(" 2. 离线运行,不需要网络") print(" 3. 识别准确率高") print(" 4. 数据完全私有") print("") print("📦 安装依赖:") print(" pip install openai-whisper flask flask-cors") print("") print("🚀 启动服务:") print(" python whisper_server.py") print("") print("📌 API接口:") print(" 健康检查: GET http://localhost:5001/health") print(" 语音识别: POST http://localhost:5001/recognize") print(" 语音评测: POST http://localhost:5001/evaluate") print("") print("=" * 60) print("") # 启动服务 app.run(host='0.0.0.0', port=5001, debug=True)