293 lines
8.7 KiB
Python
293 lines
8.7 KiB
Python
|
|
#!/usr/bin/env python
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
本地Whisper语音识别服务
|
|||
|
|
替代百度API,完全离线运行
|
|||
|
|
|
|||
|
|
安装:
|
|||
|
|
pip install openai-whisper flask flask-cors
|
|||
|
|
|
|||
|
|
运行:
|
|||
|
|
python whisper_server.py
|
|||
|
|
|
|||
|
|
优点:
|
|||
|
|
1. 完全免费,无限次调用
|
|||
|
|
2. 离线运行,不需要网络
|
|||
|
|
3. 识别准确率高(接近百度API)
|
|||
|
|
4. 支持中文、英文等多语言
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
from flask import Flask, request, jsonify
|
|||
|
|
from flask_cors import CORS
|
|||
|
|
import whisper
|
|||
|
|
import os
|
|||
|
|
import tempfile
|
|||
|
|
import logging
|
|||
|
|
|
|||
|
|
# 繁简体转换
|
|||
|
|
try:
|
|||
|
|
import zhconv
|
|||
|
|
HAS_ZHCONV = True
|
|||
|
|
except ImportError:
|
|||
|
|
HAS_ZHCONV = False
|
|||
|
|
print("⚠️ 未安装zhconv库,无法进行繁简体转换")
|
|||
|
|
print(" 安装命令: pip install zhconv")
|
|||
|
|
|
|||
|
|
app = Flask(__name__)
|
|||
|
|
CORS(app)
|
|||
|
|
|
|||
|
|
# 配置日志
|
|||
|
|
logging.basicConfig(level=logging.INFO)
|
|||
|
|
logger = logging.getLogger(__name__)
|
|||
|
|
|
|||
|
|
# 全局变量:Whisper模型
|
|||
|
|
whisper_model = None
|
|||
|
|
|
|||
|
|
def load_whisper_model():
|
|||
|
|
"""加载Whisper模型(懒加载)"""
|
|||
|
|
global whisper_model
|
|||
|
|
if whisper_model is None:
|
|||
|
|
logger.info("正在加载Whisper模型...")
|
|||
|
|
# 使用base模型(平衡速度和准确度)
|
|||
|
|
# 可选: tiny, base, small, medium, large
|
|||
|
|
# tiny: 最快,准确度一般
|
|||
|
|
# base: 快速,准确度好 ✅ 推荐
|
|||
|
|
# small: 较慢,准确度高
|
|||
|
|
# medium/large: 很慢,准确度最高
|
|||
|
|
whisper_model = whisper.load_model("base")
|
|||
|
|
logger.info("✅ Whisper模型加载成功")
|
|||
|
|
return whisper_model
|
|||
|
|
|
|||
|
|
def convert_to_simplified(text):
|
|||
|
|
"""
|
|||
|
|
将繁体中文转换为简体中文
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
text: 待转换的文本
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
简体中文文本
|
|||
|
|
"""
|
|||
|
|
if not text:
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
if HAS_ZHCONV:
|
|||
|
|
try:
|
|||
|
|
# 使用zhconv进行繁简转换
|
|||
|
|
simplified = zhconv.convert(text, 'zh-cn')
|
|||
|
|
logger.info(f"繁简转换: {text} -> {simplified}")
|
|||
|
|
return simplified
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.warning(f"繁简转换失败: {e}")
|
|||
|
|
return text
|
|||
|
|
else:
|
|||
|
|
# 如果没有安装zhconv,返回原文
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
@app.route('/health', methods=['GET'])
|
|||
|
|
def health():
|
|||
|
|
"""健康检查"""
|
|||
|
|
return jsonify({
|
|||
|
|
"status": "ok",
|
|||
|
|
"service": "Whisper语音识别服务",
|
|||
|
|
"model": "base"
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
@app.route('/recognize', methods=['POST'])
|
|||
|
|
def recognize():
|
|||
|
|
"""
|
|||
|
|
语音识别接口
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
- file: 音频文件(支持MP3, WAV, M4A等)
|
|||
|
|
- language: 语言(可选,默认自动检测)
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
{
|
|||
|
|
"code": 200,
|
|||
|
|
"msg": "识别成功",
|
|||
|
|
"data": {
|
|||
|
|
"text": "识别的文本",
|
|||
|
|
"language": "zh",
|
|||
|
|
"confidence": 0.95
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# 检查是否有文件
|
|||
|
|
if 'file' not in request.files:
|
|||
|
|
return jsonify({
|
|||
|
|
"code": 400,
|
|||
|
|
"msg": "未找到音频文件",
|
|||
|
|
"data": None
|
|||
|
|
}), 400
|
|||
|
|
|
|||
|
|
audio_file = request.files['file']
|
|||
|
|
language = request.form.get('language', 'zh') # 默认中文
|
|||
|
|
|
|||
|
|
# 保存临时文件
|
|||
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
|||
|
|
audio_file.save(temp_file.name)
|
|||
|
|
temp_path = temp_file.name
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 加载模型
|
|||
|
|
model = load_whisper_model()
|
|||
|
|
|
|||
|
|
# 识别音频
|
|||
|
|
logger.info(f"开始识别音频: {audio_file.filename}")
|
|||
|
|
result = model.transcribe(
|
|||
|
|
temp_path,
|
|||
|
|
language=language,
|
|||
|
|
task='transcribe', # transcribe=识别,translate=翻译成英文
|
|||
|
|
fp16=False # CPU模式
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
recognized_text = result['text'].strip()
|
|||
|
|
detected_language = result.get('language', language)
|
|||
|
|
|
|||
|
|
# ✅ 繁体转简体
|
|||
|
|
if detected_language == 'zh':
|
|||
|
|
recognized_text = convert_to_simplified(recognized_text)
|
|||
|
|
|
|||
|
|
logger.info(f"✅ 识别成功: {recognized_text}")
|
|||
|
|
|
|||
|
|
return jsonify({
|
|||
|
|
"code": 200,
|
|||
|
|
"msg": "识别成功",
|
|||
|
|
"data": {
|
|||
|
|
"text": recognized_text,
|
|||
|
|
"language": detected_language,
|
|||
|
|
"segments": len(result.get('segments', [])),
|
|||
|
|
"duration": result.get('duration', 0)
|
|||
|
|
}
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
finally:
|
|||
|
|
# 删除临时文件
|
|||
|
|
if os.path.exists(temp_path):
|
|||
|
|
os.remove(temp_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"识别失败: {str(e)}", exc_info=True)
|
|||
|
|
return jsonify({
|
|||
|
|
"code": 500,
|
|||
|
|
"msg": f"识别失败: {str(e)}",
|
|||
|
|
"data": None
|
|||
|
|
}), 500
|
|||
|
|
|
|||
|
|
@app.route('/evaluate', methods=['POST'])
|
|||
|
|
def evaluate():
|
|||
|
|
"""
|
|||
|
|
语音评测接口(完整功能)
|
|||
|
|
|
|||
|
|
参数:
|
|||
|
|
- file: 音频文件
|
|||
|
|
- text: 标准文本(用于对比)
|
|||
|
|
|
|||
|
|
返回:
|
|||
|
|
{
|
|||
|
|
"code": 200,
|
|||
|
|
"msg": "评测成功",
|
|||
|
|
"data": {
|
|||
|
|
"text": "识别的文本",
|
|||
|
|
"score": 95,
|
|||
|
|
"accuracy": 98,
|
|||
|
|
"fluency": 92,
|
|||
|
|
"completeness": 95,
|
|||
|
|
"pronunciation": 94
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
if 'file' not in request.files:
|
|||
|
|
return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
|
|||
|
|
|
|||
|
|
audio_file = request.files['file']
|
|||
|
|
standard_text = request.form.get('text', '')
|
|||
|
|
|
|||
|
|
# 保存临时文件
|
|||
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
|
|||
|
|
audio_file.save(temp_file.name)
|
|||
|
|
temp_path = temp_file.name
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 1. 识别音频
|
|||
|
|
model = load_whisper_model()
|
|||
|
|
result = model.transcribe(temp_path, language='zh', fp16=False)
|
|||
|
|
recognized_text = result['text'].strip()
|
|||
|
|
|
|||
|
|
# ✅ 繁体转简体
|
|||
|
|
recognized_text = convert_to_simplified(recognized_text)
|
|||
|
|
|
|||
|
|
# 2. 计算评分
|
|||
|
|
from difflib import SequenceMatcher
|
|||
|
|
|
|||
|
|
# 清理文本
|
|||
|
|
clean_recognized = ''.join(recognized_text.split()).lower()
|
|||
|
|
clean_standard = ''.join(standard_text.split()).lower()
|
|||
|
|
|
|||
|
|
# 相似度
|
|||
|
|
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
|
|||
|
|
|
|||
|
|
# 计算各项评分
|
|||
|
|
accuracy = similarity * 100 # 准确度
|
|||
|
|
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100) # 完整度
|
|||
|
|
fluency = accuracy * 0.95 # 流利度(基于准确度)
|
|||
|
|
pronunciation = accuracy * 0.98 # 发音(基于准确度)
|
|||
|
|
|
|||
|
|
# 总分
|
|||
|
|
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
|
|||
|
|
|
|||
|
|
logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
|
|||
|
|
|
|||
|
|
return jsonify({
|
|||
|
|
"code": 200,
|
|||
|
|
"msg": "评测成功",
|
|||
|
|
"data": {
|
|||
|
|
"text": recognized_text,
|
|||
|
|
"score": round(total_score),
|
|||
|
|
"accuracy": round(accuracy),
|
|||
|
|
"fluency": round(fluency),
|
|||
|
|
"completeness": round(completeness),
|
|||
|
|
"pronunciation": round(pronunciation),
|
|||
|
|
"similarity": round(similarity * 100, 2)
|
|||
|
|
}
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
finally:
|
|||
|
|
if os.path.exists(temp_path):
|
|||
|
|
os.remove(temp_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"评测失败: {str(e)}", exc_info=True)
|
|||
|
|
return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("🎤 本地Whisper语音识别服务")
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("")
|
|||
|
|
print("✅ 优势:")
|
|||
|
|
print(" 1. 完全免费,无限次调用")
|
|||
|
|
print(" 2. 离线运行,不需要网络")
|
|||
|
|
print(" 3. 识别准确率高")
|
|||
|
|
print(" 4. 数据完全私有")
|
|||
|
|
print("")
|
|||
|
|
print("📦 安装依赖:")
|
|||
|
|
print(" pip install openai-whisper flask flask-cors")
|
|||
|
|
print("")
|
|||
|
|
print("🚀 启动服务:")
|
|||
|
|
print(" python whisper_server.py")
|
|||
|
|
print("")
|
|||
|
|
print("📌 API接口:")
|
|||
|
|
print(" 健康检查: GET http://localhost:5001/health")
|
|||
|
|
print(" 语音识别: POST http://localhost:5001/recognize")
|
|||
|
|
print(" 语音评测: POST http://localhost:5001/evaluate")
|
|||
|
|
print("")
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("")
|
|||
|
|
|
|||
|
|
# 启动服务
|
|||
|
|
app.run(host='0.0.0.0', port=5001, debug=True)
|