guoyu/log/whisper_server.py
2025-12-12 11:42:50 +08:00

295 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
本地Whisper语音识别服务
替代百度API完全离线运行
安装:
pip install openai-whisper flask flask-cors
运行:
python whisper_server.py
优点:
1. 完全免费,无限次调用
2. 离线运行,不需要网络
3. 识别准确率高接近百度API
4. 支持中文、英文等多语言
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import whisper
import os
import tempfile
import logging
# 繁简体转换
try:
import zhconv
HAS_ZHCONV = True
except ImportError:
HAS_ZHCONV = False
print("⚠️ 未安装zhconv库无法进行繁简体转换")
print(" 安装命令: pip install zhconv")
app = Flask(__name__)
CORS(app)
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 全局变量Whisper模型
whisper_model = None
def load_whisper_model():
"""加载Whisper模型懒加载"""
global whisper_model
if whisper_model is None:
logger.info("正在加载Whisper模型...")
# 使用base模型平衡速度和准确度
# 可选: tiny, base, small, medium, large
# tiny: 最快,准确度一般
# base: 快速,准确度好 ✅ 推荐
# small: 较慢,准确度高
# medium/large: 很慢,准确度最高
whisper_model = whisper.load_model("base")
logger.info("✅ Whisper模型加载成功")
return whisper_model
def convert_to_simplified(text):
"""
将繁体中文转换为简体中文
参数:
text: 待转换的文本
返回:
简体中文文本
"""
if not text:
return text
if HAS_ZHCONV:
try:
# 使用zhconv进行繁简转换
simplified = zhconv.convert(text, 'zh-cn')
logger.info(f"繁简转换: {text} -> {simplified}")
return simplified
except Exception as e:
logger.warning(f"繁简转换失败: {e}")
return text
else:
# 如果没有安装zhconv返回原文
return text
@app.route('/health', methods=['GET'])
def health():
"""健康检查"""
return jsonify({
"status": "ok",
"service": "Whisper语音识别服务",
"model": "base"
})
@app.route('/recognize', methods=['POST'])
def recognize():
"""
语音识别接口
参数:
- file: 音频文件支持MP3, WAV, M4A等
- language: 语言(可选,默认自动检测)
返回:
{
"code": 200,
"msg": "识别成功",
"data": {
"text": "识别的文本",
"language": "zh",
"confidence": 0.95
}
}
"""
try:
# 检查是否有文件
if 'file' not in request.files:
return jsonify({
"code": 400,
"msg": "未找到音频文件",
"data": None
}), 400
audio_file = request.files['file']
language = request.form.get('language', 'zh') # 默认中文
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 加载模型
model = load_whisper_model()
# 识别音频
logger.info(f"开始识别音频: {audio_file.filename}")
result = model.transcribe(
temp_path,
language=language,
task='transcribe', # transcribe=识别translate=翻译成英文
fp16=False # CPU模式
)
recognized_text = result['text'].strip()
detected_language = result.get('language', language)
# ✅ 繁体转简体
if detected_language == 'zh':
recognized_text = convert_to_simplified(recognized_text)
logger.info(f"✅ 识别成功: {recognized_text}")
return jsonify({
"code": 200,
"msg": "识别成功",
"data": {
"text": recognized_text,
"language": detected_language,
"segments": len(result.get('segments', [])),
"duration": result.get('duration', 0)
}
})
finally:
# 删除临时文件
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"识别失败: {str(e)}", exc_info=True)
return jsonify({
"code": 500,
"msg": f"识别失败: {str(e)}",
"data": None
}), 500
@app.route('/evaluate', methods=['POST'])
def evaluate():
"""
语音评测接口(完整功能)
参数:
- file: 音频文件
- text: 标准文本(用于对比)
返回:
{
"code": 200,
"msg": "评测成功",
"data": {
"text": "识别的文本",
"score": 95,
"accuracy": 98,
"fluency": 92,
"completeness": 95,
"pronunciation": 94
}
}
"""
try:
if 'file' not in request.files:
return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
audio_file = request.files['file']
standard_text = request.form.get('text', '')
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 1. 识别音频
model = load_whisper_model()
result = model.transcribe(temp_path, language='zh', fp16=False)
recognized_text = result['text'].strip()
# ✅ 繁体转简体
recognized_text = convert_to_simplified(recognized_text)
# 2. 计算评分
from difflib import SequenceMatcher
import re
# 清理文本:去除所有标点符号和空格,只保留汉字、字母、数字
# \w 匹配字母、数字、下划线、汉字
clean_recognized = re.sub(r'[^\w]', '', recognized_text, flags=re.UNICODE).lower()
clean_standard = re.sub(r'[^\w]', '', standard_text, flags=re.UNICODE).lower()
# 相似度
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
# 计算各项评分
accuracy = similarity * 100 # 准确度
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 完整度
fluency = accuracy * 0.95 # 流利度(基于准确度)
pronunciation = accuracy * 0.98 # 发音(基于准确度)
# 总分
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
return jsonify({
"code": 200,
"msg": "评测成功",
"data": {
"text": recognized_text,
"score": round(total_score),
"accuracy": round(accuracy),
"fluency": round(fluency),
"completeness": round(completeness),
"pronunciation": round(pronunciation),
"similarity": round(similarity * 100, 2)
}
})
finally:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"评测失败: {str(e)}", exc_info=True)
return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500
if __name__ == '__main__':
print("=" * 60)
print("🎤 本地Whisper语音识别服务")
print("=" * 60)
print("")
print("✅ 优势:")
print(" 1. 完全免费,无限次调用")
print(" 2. 离线运行,不需要网络")
print(" 3. 识别准确率高")
print(" 4. 数据完全私有")
print("")
print("📦 安装依赖:")
print(" pip install openai-whisper flask flask-cors")
print("")
print("🚀 启动服务:")
print(" python whisper_server.py")
print("")
print("📌 API接口")
print(" 健康检查: GET http://localhost:5001/health")
print(" 语音识别: POST http://localhost:5001/recognize")
print(" 语音评测: POST http://localhost:5001/evaluate")
print("")
print("=" * 60)
print("")
# 启动服务
app.run(host='0.0.0.0', port=5001, debug=True)