guoyu/speech_server.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
PaddleSpeech 语音识别服务
Windows 版本
"""

from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import time
from difflib import SequenceMatcher

app = Flask(__name__)
CORS(app)  # 允许跨域

# 全局变量
asr_model = None
model_loaded = False

def init_model():
    """初始化语音识别模型"""
    global asr_model, model_loaded

    print("正在加载 PaddleSpeech 模型...")
    try:
        from paddlespeech.cli.asr.infer import ASRExecutor

        asr_model = ASRExecutor()
        model_loaded = True
        print("✓ 模型加载成功！")
        return True
    except Exception as e:
        print(f"✗ 模型加载失败: {str(e)}")
        print("提示：首次运行会自动下载模型，需要等待...")
        model_loaded = False
        return False

def calculate_similarity(text1, text2):
    """计算文本相似度（0-100分）"""
    if not text1 or not text2:
        return 0

    # 去除空格和标点
    text1 = ''.join(filter(str.isalnum, text1))
    text2 = ''.join(filter(str.isalnum, text2))

    if not text1 or not text2:
        return 0

    # 计算相似度
    similarity = SequenceMatcher(None, text1, text2).ratio()
    return round(similarity * 100, 2)

@app.route('/api/speech/recognize', methods=['POST'])
def recognize():
    """语音识别接口"""
    try:
        # 检查模型是否加载
        if not model_loaded:
            return jsonify({
                'code': 500,
                'msg': '模型未加载，请稍后重试'
            }), 500

        # 检查文件
        if 'audio' not in request.files:
            return jsonify({
                'code': 400,
                'msg': '未上传音频文件'
            }), 400

        audio_file = request.files['audio']
        reference_text = request.form.get('referenceText', '')

        # 保存临时文件
        temp_dir = './temp_audio'
        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        timestamp = str(int(time.time() * 1000))
        temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
        audio_file.save(temp_path)

        print(f"收到音频文件: {temp_path}")
        print(f"参考文本: {reference_text}")

        # 识别音频
        try:
            result = asr_model(audio_input=temp_path, force_yes=True)
            recognized_text = result if isinstance(result, str) else str(result)
            print(f"识别结果: {recognized_text}")

            # 计算相似度
            score = calculate_similarity(recognized_text, reference_text)

            # 简单的发音和流利度评分（可以后续优化）
            pronunciation_score = max(0, score - 5)
            fluency_score = max(0, score - 3)

            # 删除临时文件
            try:
                os.remove(temp_path)
            except:
                pass

            return jsonify({
                'code': 200,
                'msg': '成功',
                'data': {
                    'recognizedText': recognized_text,
                    'score': score,
                    'pronunciationScore': pronunciation_score,
                    'fluencyScore': fluency_score,
                    'status': 'completed'
                }
            })

        except Exception as e:
            print(f"识别失败: {str(e)}")
            try:
                os.remove(temp_path)
            except:
                pass

            return jsonify({
                'code': 500,
                'msg': f'识别失败: {str(e)}'
            }), 500

    except Exception as e:
        print(f"处理错误: {str(e)}")
        return jsonify({
            'code': 500,
            'msg': f'处理失败: {str(e)}'
        }), 500

@app.route('/api/speech/health', methods=['GET'])
def health():
    """健康检查"""
    return jsonify({
        'code': 200,
        'msg': '服务正常',
        'data': {
            'model_loaded': model_loaded
        }
    })

if __name__ == '__main__':
    print("=" * 50)
    print("PaddleSpeech 语音识别服务")
    print("=" * 50)
    print("")

    # 初始化模型（首次会下载，需要时间）
    init_model()

    print("")
    print("=" * 50)
    print("服务启动成功！")
    print("访问地址: http://localhost:5000")
    print("健康检查: http://localhost:5000/api/speech/health")
    print("=" * 50)
    print("")

    # 启动服务
    app.run(host='0.0.0.0', port=5000, debug=False)