guoyu/Test/python/whisper_server_faster.py
2025-12-11 23:28:07 +08:00

204 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Faster-Whisper 语音识别服务
使用 Faster-Whisper速度更快兼容性更好
安装:
pip install faster-whisper flask flask-cors -i https://pypi.tuna.tsinghua.edu.cn/simple
运行:
python whisper_server_faster.py
"""
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import tempfile
import logging
app = Flask(__name__)
CORS(app)
# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 全局变量Whisper模型
whisper_model = None
def load_whisper_model():
"""加载Faster-Whisper模型懒加载"""
global whisper_model
if whisper_model is None:
logger.info("正在加载Faster-Whisper模型...")
try:
from faster_whisper import WhisperModel
# 使用base模型CPU运行
whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
logger.info("✅ Faster-Whisper模型加载成功")
except Exception as e:
logger.error(f"模型加载失败: {e}")
raise
return whisper_model
@app.route('/health', methods=['GET'])
def health():
"""健康检查"""
return jsonify({
"status": "ok",
"service": "Faster-Whisper语音识别服务",
"model": "base"
})
@app.route('/recognize', methods=['POST'])
def recognize():
"""
语音识别接口
"""
try:
if 'file' not in request.files:
return jsonify({
"code": 400,
"msg": "未找到音频文件",
"data": None
}), 400
audio_file = request.files['file']
language = request.form.get('language', 'zh')
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 加载模型
model = load_whisper_model()
# 识别音频
logger.info(f"开始识别音频: {audio_file.filename}")
segments, info = model.transcribe(temp_path, language=language)
# 合并所有片段
recognized_text = " ".join([segment.text for segment in segments]).strip()
logger.info(f"✅ 识别成功: {recognized_text}")
return jsonify({
"code": 200,
"msg": "识别成功",
"data": {
"text": recognized_text,
"language": info.language,
"duration": info.duration
}
})
finally:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"识别失败: {str(e)}", exc_info=True)
return jsonify({
"code": 500,
"msg": f"识别失败: {str(e)}",
"data": None
}), 500
@app.route('/evaluate', methods=['POST'])
def evaluate():
"""
语音评测接口
"""
try:
if 'file' not in request.files:
return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
audio_file = request.files['file']
standard_text = request.form.get('text', '')
# 保存临时文件
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
audio_file.save(temp_file.name)
temp_path = temp_file.name
try:
# 1. 识别音频
model = load_whisper_model()
segments, info = model.transcribe(temp_path, language='zh')
recognized_text = " ".join([segment.text for segment in segments]).strip()
# 2. 计算评分
from difflib import SequenceMatcher
# 清理文本
clean_recognized = ''.join(recognized_text.split()).lower()
clean_standard = ''.join(standard_text.split()).lower()
# 相似度
similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
# 计算各项评分
accuracy = similarity * 100
completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)
fluency = accuracy * 0.95
pronunciation = accuracy * 0.98
# 总分
total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
return jsonify({
"code": 200,
"msg": "评测成功",
"data": {
"text": recognized_text,
"score": round(total_score),
"accuracy": round(accuracy),
"fluency": round(fluency),
"completeness": round(completeness),
"pronunciation": round(pronunciation),
"similarity": round(similarity * 100, 2)
}
})
finally:
if os.path.exists(temp_path):
os.remove(temp_path)
except Exception as e:
logger.error(f"评测失败: {str(e)}", exc_info=True)
return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500
if __name__ == '__main__':
print("=" * 60)
print("🎤 Faster-Whisper 语音识别服务")
print("=" * 60)
print("")
print("✅ 优势:")
print(" 1. 速度更快(比原版快 4-5 倍)")
print(" 2. 内存占用更小")
print(" 3. 兼容性更好(支持 Python 3.8-3.14")
print(" 4. 完全免费,离线运行")
print("")
print("📦 安装依赖:")
print(" pip install faster-whisper flask flask-cors")
print("")
print("🚀 启动服务:")
print(" python whisper_server_faster.py")
print("")
print("📌 API接口")
print(" 健康检查: GET http://localhost:5001/health")
print(" 语音识别: POST http://localhost:5001/recognize")
print(" 语音评测: POST http://localhost:5001/evaluate")
print("")
print("=" * 60)
print("")
# 启动服务
app.run(host='0.0.0.0', port=5001, debug=True)