guoyu/Test/python/whisper_server (1).py

293 lines
8.7 KiB
Python
Raw Normal View History

2025-12-11 23:28:07 +08:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
本地Whisper语音识别服务
替代百度API完全离线运行
安装
pip install openai-whisper flask flask-cors
运行
python whisper_server.py
优点
1. 完全免费无限次调用
2. 离线运行不需要网络
3. 识别准确率高接近百度API
4. 支持中文英文等多语言
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import whisper
import os
import tempfile
import logging
# 繁简体转换
try:
import zhconv
HAS_ZHCONV = True
except ImportError:
HAS_ZHCONV = False
print("⚠️ 未安装zhconv库无法进行繁简体转换")
print(" 安装命令: pip install zhconv")
app = Flask(__name__)
CORS(app)
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 全局变量Whisper模型
whisper_model = None
def load_whisper_model():
"""加载Whisper模型懒加载"""
global whisper_model
if whisper_model is None:
logger.info("正在加载Whisper模型...")
# 使用base模型平衡速度和准确度
# 可选: tiny, base, small, medium, large
# tiny: 最快,准确度一般
# base: 快速,准确度好 ✅ 推荐
# small: 较慢,准确度高
# medium/large: 很慢,准确度最高
whisper_model = whisper.load_model("base")
logger.info("✅ Whisper模型加载成功")
return whisper_model
def convert_to_simplified(text):
"""
将繁体中文转换为简体中文
参数:
text: 待转换的文本
返回:
简体中文文本
"""
if not text:
return text
if HAS_ZHCONV:
try:
# 使用zhconv进行繁简转换
simplified = zhconv.convert(text, 'zh-cn')
logger.info(f"繁简转换: {text} -> {simplified}")
return simplified
except Exception as e:
logger.warning(f"繁简转换失败: {e}")
return text
else:
# 如果没有安装zhconv返回原文
return text
@app.route('/health', methods=['GET'])
def health():
"""健康检查"""
return jsonify({
"status": "ok",
"service": "Whisper语音识别服务",
"model": "base"
})
@app.route('/recognize', methods=['POST'])
def recognize():
"""
语音识别接口
参数
- file: 音频文件支持MP3, WAV, M4A等
- language: 语言可选默认自动检测
返回
{
"code": 200,
"msg": "识别成功",
"data": {
"text": "识别的文本",
"language": "zh",
"confidence": 0.95
}
}
"""
try:
# 检查是否有文件
if 'file' not in request.files:
return jsonify({
"code": 400,
"msg": "未找到音频文件",
"data": None
}), 400
audio_file = request.files['file']
language = request.form.get('language', 'zh') # 默认中文
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 加载模型
model = load_whisper_model()
# 识别音频
logger.info(f"开始识别音频: {audio_file.filename}")
result = model.transcribe(
temp_path,
language=language,
task='transcribe', # transcribe=识别translate=翻译成英文
fp16=False # CPU模式
)
recognized_text = result['text'].strip()
detected_language = result.get('language', language)
# ✅ 繁体转简体
if detected_language == 'zh':
recognized_text = convert_to_simplified(recognized_text)
logger.info(f"✅ 识别成功: {recognized_text}")
return jsonify({
"code": 200,
"msg": "识别成功",
"data": {
"text": recognized_text,
"language": detected_language,
"segments": len(result.get('segments', [])),
"duration": result.get('duration', 0)
}
})
finally:
# 删除临时文件
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"识别失败: {str(e)}", exc_info=True)
return jsonify({
"code": 500,
"msg": f"识别失败: {str(e)}",
"data": None
}), 500
@app.route('/evaluate', methods=['POST'])
def evaluate():
"""
语音评测接口完整功能
参数
- file: 音频文件
- text: 标准文本用于对比
返回
{
"code": 200,
"msg": "评测成功",
"data": {
"text": "识别的文本",
"score": 95,
"accuracy": 98,
"fluency": 92,
"completeness": 95,
"pronunciation": 94
}
}
"""
try:
if 'file' not in request.files:
return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
audio_file = request.files['file']
standard_text = request.form.get('text', '')
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 1. 识别音频
model = load_whisper_model()
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
# ✅ 繁体转简体
recognized_text = convert_to_simplified(recognized_text)
# 2. 计算评分
from difflib import SequenceMatcher
# 清理文本
clean_recognized = ''.join(recognized_text.split()).lower()
clean_standard = ''.join(standard_text.split()).lower()
# 相似度
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
# 计算各项评分
accuracy = similarity * 100 # 准确度
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 完整度
fluency = accuracy * 0.95 # 流利度(基于准确度)
pronunciation = accuracy * 0.98 # 发音(基于准确度)
# 总分
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
return jsonify({
"code": 200,
"msg": "评测成功",
"data": {
"text": recognized_text,
"score": round(total_score),
"accuracy": round(accuracy),
"fluency": round(fluency),
"completeness": round(completeness),
"pronunciation": round(pronunciation),
"similarity": round(similarity * 100, 2)
}
})
finally:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"评测失败: {str(e)}", exc_info=True)
return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500
if __name__ == '__main__':
print("=" * 60)
print("🎤 本地Whisper语音识别服务")
print("=" * 60)
print("")
print("✅ 优势:")
print(" 1. 完全免费,无限次调用")
print(" 2. 离线运行,不需要网络")
print(" 3. 识别准确率高")
print(" 4. 数据完全私有")
print("")
print("📦 安装依赖:")
print(" pip install openai-whisper flask flask-cors")
print("")
print("🚀 启动服务:")
print(" python whisper_server.py")
print("")
print("📌 API接口")
print(" 健康检查: GET http://localhost:5001/health")
print(" 语音识别: POST http://localhost:5001/recognize")
print(" 语音评测: POST http://localhost:5001/evaluate")
print("")
print("=" * 60)
print("")
# 启动服务
app.run(host='0.0.0.0', port=5001, debug=True)