293 lines
8.7 KiB
Python
293 lines
8.7 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
本地Whisper语音识别服务
|
||
替代百度API,完全离线运行
|
||
|
||
安装:
|
||
pip install openai-whisper flask flask-cors
|
||
|
||
运行:
|
||
python whisper_server.py
|
||
|
||
优点:
|
||
1. 完全免费,无限次调用
|
||
2. 离线运行,不需要网络
|
||
3. 识别准确率高(接近百度API)
|
||
4. 支持中文、英文等多语言
|
||
"""
|
||
|
||
from flask import Flask, request, jsonify
|
||
from flask_cors import CORS
|
||
import whisper
|
||
import os
|
||
import tempfile
|
||
import logging
|
||
|
||
# 繁简体转换
|
||
try:
|
||
import zhconv
|
||
HAS_ZHCONV = True
|
||
except ImportError:
|
||
HAS_ZHCONV = False
|
||
print("⚠️ 未安装zhconv库,无法进行繁简体转换")
|
||
print(" 安装命令: pip install zhconv")
|
||
|
||
app = Flask(__name__)
|
||
CORS(app)
|
||
|
||
# 配置日志
|
||
logging.basicConfig(level=logging.INFO)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 全局变量:Whisper模型
|
||
whisper_model = None
|
||
|
||
def load_whisper_model():
|
||
"""加载Whisper模型(懒加载)"""
|
||
global whisper_model
|
||
if whisper_model is None:
|
||
logger.info("正在加载Whisper模型...")
|
||
# 使用base模型(平衡速度和准确度)
|
||
# 可选: tiny, base, small, medium, large
|
||
# tiny: 最快,准确度一般
|
||
# base: 快速,准确度好 ✅ 推荐
|
||
# small: 较慢,准确度高
|
||
# medium/large: 很慢,准确度最高
|
||
whisper_model = whisper.load_model("base")
|
||
logger.info("✅ Whisper模型加载成功")
|
||
return whisper_model
|
||
|
||
def convert_to_simplified(text):
|
||
"""
|
||
将繁体中文转换为简体中文
|
||
|
||
参数:
|
||
text: 待转换的文本
|
||
|
||
返回:
|
||
简体中文文本
|
||
"""
|
||
if not text:
|
||
return text
|
||
|
||
if HAS_ZHCONV:
|
||
try:
|
||
# 使用zhconv进行繁简转换
|
||
simplified = zhconv.convert(text, 'zh-cn')
|
||
logger.info(f"繁简转换: {text} -> {simplified}")
|
||
return simplified
|
||
except Exception as e:
|
||
logger.warning(f"繁简转换失败: {e}")
|
||
return text
|
||
else:
|
||
# 如果没有安装zhconv,返回原文
|
||
return text
|
||
|
||
@app.route('/health', methods=['GET'])
|
||
def health():
|
||
"""健康检查"""
|
||
return jsonify({
|
||
"status": "ok",
|
||
"service": "Whisper语音识别服务",
|
||
"model": "base"
|
||
})
|
||
|
||
@app.route('/recognize', methods=['POST'])
|
||
def recognize():
|
||
"""
|
||
语音识别接口
|
||
|
||
参数:
|
||
- file: 音频文件(支持MP3, WAV, M4A等)
|
||
- language: 语言(可选,默认自动检测)
|
||
|
||
返回:
|
||
{
|
||
"code": 200,
|
||
"msg": "识别成功",
|
||
"data": {
|
||
"text": "识别的文本",
|
||
"language": "zh",
|
||
"confidence": 0.95
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
# 检查是否有文件
|
||
if 'file' not in request.files:
|
||
return jsonify({
|
||
"code": 400,
|
||
"msg": "未找到音频文件",
|
||
"data": None
|
||
}), 400
|
||
|
||
audio_file = request.files['file']
|
||
language = request.form.get('language', 'zh') # 默认中文
|
||
|
||
# 保存临时文件
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
||
audio_file.save(temp_file.name)
|
||
temp_path = temp_file.name
|
||
|
||
try:
|
||
# 加载模型
|
||
model = load_whisper_model()
|
||
|
||
# 识别音频
|
||
logger.info(f"开始识别音频: {audio_file.filename}")
|
||
result = model.transcribe(
|
||
temp_path,
|
||
language=language,
|
||
task='transcribe', # transcribe=识别,translate=翻译成英文
|
||
fp16=False # CPU模式
|
||
)
|
||
|
||
recognized_text = result['text'].strip()
|
||
detected_language = result.get('language', language)
|
||
|
||
# ✅ 繁体转简体
|
||
if detected_language == 'zh':
|
||
recognized_text = convert_to_simplified(recognized_text)
|
||
|
||
logger.info(f"✅ 识别成功: {recognized_text}")
|
||
|
||
return jsonify({
|
||
"code": 200,
|
||
"msg": "识别成功",
|
||
"data": {
|
||
"text": recognized_text,
|
||
"language": detected_language,
|
||
"segments": len(result.get('segments', [])),
|
||
"duration": result.get('duration', 0)
|
||
}
|
||
})
|
||
|
||
finally:
|
||
# 删除临时文件
|
||
if os.path.exists(temp_path):
|
||
os.remove(temp_path)
|
||
|
||
except Exception as e:
|
||
logger.error(f"识别失败: {str(e)}", exc_info=True)
|
||
return jsonify({
|
||
"code": 500,
|
||
"msg": f"识别失败: {str(e)}",
|
||
"data": None
|
||
}), 500
|
||
|
||
@app.route('/evaluate', methods=['POST'])
|
||
def evaluate():
|
||
"""
|
||
语音评测接口(完整功能)
|
||
|
||
参数:
|
||
- file: 音频文件
|
||
- text: 标准文本(用于对比)
|
||
|
||
返回:
|
||
{
|
||
"code": 200,
|
||
"msg": "评测成功",
|
||
"data": {
|
||
"text": "识别的文本",
|
||
"score": 95,
|
||
"accuracy": 98,
|
||
"fluency": 92,
|
||
"completeness": 95,
|
||
"pronunciation": 94
|
||
}
|
||
}
|
||
"""
|
||
try:
|
||
if 'file' not in request.files:
|
||
return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
|
||
|
||
audio_file = request.files['file']
|
||
standard_text = request.form.get('text', '')
|
||
|
||
# 保存临时文件
|
||
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
||
audio_file.save(temp_file.name)
|
||
temp_path = temp_file.name
|
||
|
||
try:
|
||
# 1. 识别音频
|
||
model = load_whisper_model()
|
||
result = model.transcribe(temp_path, language='zh', fp16=False)
|
||
recognized_text = result['text'].strip()
|
||
|
||
# ✅ 繁体转简体
|
||
recognized_text = convert_to_simplified(recognized_text)
|
||
|
||
# 2. 计算评分
|
||
from difflib import SequenceMatcher
|
||
|
||
# 清理文本
|
||
clean_recognized = ''.join(recognized_text.split()).lower()
|
||
clean_standard = ''.join(standard_text.split()).lower()
|
||
|
||
# 相似度
|
||
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
|
||
|
||
# 计算各项评分
|
||
accuracy = similarity * 100 # 准确度
|
||
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 完整度
|
||
fluency = accuracy * 0.95 # 流利度(基于准确度)
|
||
pronunciation = accuracy * 0.98 # 发音(基于准确度)
|
||
|
||
# 总分
|
||
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
|
||
|
||
logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
|
||
|
||
return jsonify({
|
||
"code": 200,
|
||
"msg": "评测成功",
|
||
"data": {
|
||
"text": recognized_text,
|
||
"score": round(total_score),
|
||
"accuracy": round(accuracy),
|
||
"fluency": round(fluency),
|
||
"completeness": round(completeness),
|
||
"pronunciation": round(pronunciation),
|
||
"similarity": round(similarity * 100, 2)
|
||
}
|
||
})
|
||
|
||
finally:
|
||
if os.path.exists(temp_path):
|
||
os.remove(temp_path)
|
||
|
||
except Exception as e:
|
||
logger.error(f"评测失败: {str(e)}", exc_info=True)
|
||
return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500
|
||
|
||
if __name__ == '__main__':
|
||
print("=" * 60)
|
||
print("🎤 本地Whisper语音识别服务")
|
||
print("=" * 60)
|
||
print("")
|
||
print("✅ 优势:")
|
||
print(" 1. 完全免费,无限次调用")
|
||
print(" 2. 离线运行,不需要网络")
|
||
print(" 3. 识别准确率高")
|
||
print(" 4. 数据完全私有")
|
||
print("")
|
||
print("📦 安装依赖:")
|
||
print(" pip install openai-whisper flask flask-cors")
|
||
print("")
|
||
print("🚀 启动服务:")
|
||
print(" python whisper_server.py")
|
||
print("")
|
||
print("📌 API接口:")
|
||
print(" 健康检查: GET http://localhost:5001/health")
|
||
print(" 语音识别: POST http://localhost:5001/recognize")
|
||
print(" 语音评测: POST http://localhost:5001/evaluate")
|
||
print("")
|
||
print("=" * 60)
|
||
print("")
|
||
|
||
# 启动服务
|
||
app.run(host='0.0.0.0', port=5001, debug=True)
|