guoyu/Test/python/whisper_server_faster.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Faster-Whisper 语音识别服务
使用 Faster-Whisper，速度更快，兼容性更好

安装：
pip install faster-whisper flask flask-cors -i https://pypi.tuna.tsinghua.edu.cn/simple

运行：
python whisper_server_faster.py
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import tempfile
import logging

app = Flask(__name__)
CORS(app)

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 全局变量：Whisper模型
whisper_model = None

def load_whisper_model():
    """加载Faster-Whisper模型（懒加载）"""
    global whisper_model
    if whisper_model is None:
        logger.info("正在加载Faster-Whisper模型...")
        try:
            from faster_whisper import WhisperModel
            
            # 使用base模型，CPU运行
            whisper_model = WhisperModel("base", device="cpu", compute_type="int8")
            logger.info("✅ Faster-Whisper模型加载成功")
        except Exception as e:
            logger.error(f"模型加载失败: {e}")
            raise
    return whisper_model

@app.route('/health', methods=['GET'])
def health():
    """健康检查"""
    return jsonify({
        "status": "ok",
        "service": "Faster-Whisper语音识别服务",
        "model": "base"
    })

@app.route('/recognize', methods=['POST'])
def recognize():
    """
    语音识别接口
    """
    try:
        if 'file' not in request.files:
            return jsonify({
                "code": 400,
                "msg": "未找到音频文件",
                "data": None
            }), 400
        
        audio_file = request.files['file']
        language = request.form.get('language', 'zh')
        
        # 保存临时文件
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            audio_file.save(temp_file.name)
            temp_path = temp_file.name
        
        try:
            # 加载模型
            model = load_whisper_model()
            
            # 识别音频
            logger.info(f"开始识别音频: {audio_file.filename}")
            segments, info = model.transcribe(temp_path, language=language)
            
            # 合并所有片段
            recognized_text = " ".join([segment.text for segment in segments]).strip()
            
            logger.info(f"✅ 识别成功: {recognized_text}")
            
            return jsonify({
                "code": 200,
                "msg": "识别成功",
                "data": {
                    "text": recognized_text,
                    "language": info.language,
                    "duration": info.duration
                }
            })
            
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)
    
    except Exception as e:
        logger.error(f"识别失败: {str(e)}", exc_info=True)
        return jsonify({
            "code": 500,
            "msg": f"识别失败: {str(e)}",
            "data": None
        }), 500

@app.route('/evaluate', methods=['POST'])
def evaluate():
    """
    语音评测接口
    """
    try:
        if 'file' not in request.files:
            return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400
        
        audio_file = request.files['file']
        standard_text = request.form.get('text', '')
        
        # 保存临时文件
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            audio_file.save(temp_file.name)
            temp_path = temp_file.name
        
        try:
            # 1. 识别音频
            model = load_whisper_model()
            segments, info = model.transcribe(temp_path, language='zh')
            recognized_text = " ".join([segment.text for segment in segments]).strip()
            
            # 2. 计算评分
            from difflib import SequenceMatcher
            
            # 清理文本
            clean_recognized = ''.join(recognized_text.split()).lower()
            clean_standard = ''.join(standard_text.split()).lower()
            
            # 相似度
            similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()
            
            # 计算各项评分
            accuracy = similarity * 100
            completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)
            fluency = accuracy * 0.95
            pronunciation = accuracy * 0.98
            
            # 总分
            total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)
            
            logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")
            
            return jsonify({
                "code": 200,
                "msg": "评测成功",
                "data": {
                    "text": recognized_text,
                    "score": round(total_score),
                    "accuracy": round(accuracy),
                    "fluency": round(fluency),
                    "completeness": round(completeness),
                    "pronunciation": round(pronunciation),
                    "similarity": round(similarity * 100, 2)
                }
            })
            
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)
    
    except Exception as e:
        logger.error(f"评测失败: {str(e)}", exc_info=True)
        return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500

if __name__ == '__main__':
    print("=" * 60)
    print("🎤 Faster-Whisper 语音识别服务")
    print("=" * 60)
    print("")
    print("✅ 优势：")
    print("  1. 速度更快（比原版快 4-5 倍）")
    print("  2. 内存占用更小")
    print("  3. 兼容性更好（支持 Python 3.8-3.14）")
    print("  4. 完全免费，离线运行")
    print("")
    print("📦 安装依赖：")
    print("  pip install faster-whisper flask flask-cors")
    print("")
    print("🚀 启动服务：")
    print("  python whisper_server_faster.py")
    print("")
    print("📌 API接口：")
    print("  健康检查: GET  http://localhost:5001/health")
    print("  语音识别: POST http://localhost:5001/recognize")
    print("  语音评测: POST http://localhost:5001/evaluate")
    print("")
    print("=" * 60)
    print("")
    
    # 启动服务
    app.run(host='0.0.0.0', port=5001, debug=True)