guoyu/log/whisper_server.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
本地Whisper语音识别服务
替代百度API，完全离线运行

安装：
pip install openai-whisper flask flask-cors

运行：
python whisper_server.py

优点：
1. 完全免费，无限次调用
2. 离线运行，不需要网络
3. 识别准确率高（接近百度API）
4. 支持中文、英文等多语言
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
import whisper
import os
import tempfile
import logging

# 繁简体转换
try:
    import zhconv
    HAS_ZHCONV = True
except ImportError:
    HAS_ZHCONV = False
    print("⚠️ 未安装zhconv库，无法进行繁简体转换")
    print("   安装命令: pip install zhconv")

app = Flask(__name__)
CORS(app)

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 全局变量：Whisper模型
whisper_model = None

def load_whisper_model():
    """加载Whisper模型（懒加载）"""
    global whisper_model
    if whisper_model is None:
        logger.info("正在加载Whisper模型...")
        # 使用base模型（平衡速度和准确度）
        # 可选: tiny, base, small, medium, large
        # tiny: 最快，准确度一般
        # base: 快速，准确度好 ✅ 推荐
        # small: 较慢，准确度高
        # medium/large: 很慢，准确度最高
        whisper_model = whisper.load_model("base")
        logger.info("✅ Whisper模型加载成功")
    return whisper_model

def convert_to_simplified(text):
    """
    将繁体中文转换为简体中文

    参数:
        text: 待转换的文本

    返回:
        简体中文文本
    """
    if not text:
        return text

    if HAS_ZHCONV:
        try:
            # 使用zhconv进行繁简转换
            simplified = zhconv.convert(text, 'zh-cn')
            logger.info(f"繁简转换: {text} -> {simplified}")
            return simplified
        except Exception as e:
            logger.warning(f"繁简转换失败: {e}")
            return text
    else:
        # 如果没有安装zhconv，返回原文
        return text

@app.route('/health', methods=['GET'])
def health():
    """健康检查"""
    return jsonify({
        "status": "ok",
        "service": "Whisper语音识别服务",
        "model": "base"
    })

@app.route('/recognize', methods=['POST'])
def recognize():
    """
    语音识别接口

    参数：
    - file: 音频文件（支持MP3, WAV, M4A等）
    - language: 语言（可选，默认自动检测）

    返回：
    {
        "code": 200,
        "msg": "识别成功",
        "data": {
            "text": "识别的文本",
            "language": "zh",
            "confidence": 0.95
        }
    }
    """
    try:
        # 检查是否有文件
        if 'file' not in request.files:
            return jsonify({
                "code": 400,
                "msg": "未找到音频文件",
                "data": None
            }), 400

        audio_file = request.files['file']
        language = request.form.get('language', 'zh')  # 默认中文

        # 保存临时文件
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            audio_file.save(temp_file.name)
            temp_path = temp_file.name

        try:
            # 加载模型
            model = load_whisper_model()

            # 识别音频
            logger.info(f"开始识别音频: {audio_file.filename}")
            result = model.transcribe(
                temp_path,
                language=language,
                task='transcribe',  # transcribe=识别，translate=翻译成英文
                fp16=False  # CPU模式
            )

            recognized_text = result['text'].strip()
            detected_language = result.get('language', language)

            # ✅ 繁体转简体
            if detected_language == 'zh':
                recognized_text = convert_to_simplified(recognized_text)

            logger.info(f"✅ 识别成功: {recognized_text}")

            return jsonify({
                "code": 200,
                "msg": "识别成功",
                "data": {
                    "text": recognized_text,
                    "language": detected_language,
                    "segments": len(result.get('segments', [])),
                    "duration": result.get('duration', 0)
                }
            })

        finally:
            # 删除临时文件
            if os.path.exists(temp_path):
                os.remove(temp_path)

    except Exception as e:
        logger.error(f"识别失败: {str(e)}", exc_info=True)
        return jsonify({
            "code": 500,
            "msg": f"识别失败: {str(e)}",
            "data": None
        }), 500

@app.route('/evaluate', methods=['POST'])
def evaluate():
    """
    语音评测接口（完整功能）

    参数：
    - file: 音频文件
    - text: 标准文本（用于对比）

    返回：
    {
        "code": 200,
        "msg": "评测成功",
        "data": {
            "text": "识别的文本",
            "score": 95,
            "accuracy": 98,
            "fluency": 92,
            "completeness": 95,
            "pronunciation": 94
        }
    }
    """
    try:
        if 'file' not in request.files:
            return jsonify({"code": 400, "msg": "未找到音频文件", "data": None}), 400

        audio_file = request.files['file']
        standard_text = request.form.get('text', '')

        # 保存临时文件
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            audio_file.save(temp_file.name)
            temp_path = temp_file.name

        try:
            # 1. 识别音频
            model = load_whisper_model()
            result = model.transcribe(temp_path, language='zh', fp16=False)
            recognized_text = result['text'].strip()

            # ✅ 繁体转简体
            recognized_text = convert_to_simplified(recognized_text)

            # 2. 计算评分
            from difflib import SequenceMatcher
            import re

            # 清理文本：去除所有标点符号和空格，只保留汉字、字母、数字
            # \w 匹配字母、数字、下划线、汉字
            clean_recognized = re.sub(r'[^\w]', '', recognized_text, flags=re.UNICODE).lower()
            clean_standard = re.sub(r'[^\w]', '', standard_text, flags=re.UNICODE).lower()

            # 相似度
            similarity = SequenceMatcher(None, clean_recognized, clean_standard).ratio()

            # 计算各项评分
            accuracy = similarity * 100  # 准确度
            completeness = min(len(clean_recognized) / max(len(clean_standard), 1) * 100, 100)  # 完整度
            fluency = accuracy * 0.95  # 流利度（基于准确度）
            pronunciation = accuracy * 0.98  # 发音（基于准确度）

            # 总分
            total_score = (accuracy * 0.3 + completeness * 0.25 + fluency * 0.3 + pronunciation * 0.15)

            logger.info(f"✅ 评测完成: {recognized_text} | 得分: {total_score:.0f}")

            return jsonify({
                "code": 200,
                "msg": "评测成功",
                "data": {
                    "text": recognized_text,
                    "score": round(total_score),
                    "accuracy": round(accuracy),
                    "fluency": round(fluency),
                    "completeness": round(completeness),
                    "pronunciation": round(pronunciation),
                    "similarity": round(similarity * 100, 2)
                }
            })

        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)

    except Exception as e:
        logger.error(f"评测失败: {str(e)}", exc_info=True)
        return jsonify({"code": 500, "msg": f"评测失败: {str(e)}", "data": None}), 500

if __name__ == '__main__':
    print("=" * 60)
    print("🎤 本地Whisper语音识别服务")
    print("=" * 60)
    print("")
    print("✅ 优势：")
    print("  1. 完全免费，无限次调用")
    print("  2. 离线运行，不需要网络")
    print("  3. 识别准确率高")
    print("  4. 数据完全私有")
    print("")
    print("📦 安装依赖：")
    print("  pip install openai-whisper flask flask-cors")
    print("")
    print("🚀 启动服务：")
    print("  python whisper_server.py")
    print("")
    print("📌 API接口：")
    print("  健康检查: GET  http://localhost:5001/health")
    print("  语音识别: POST http://localhost:5001/recognize")
    print("  语音评测: POST http://localhost:5001/evaluate")
    print("")
    print("=" * 60)
    print("")

    # 启动服务
    app.run(host='0.0.0.0', port=5001, debug=True)