diff --git a/lover/config.py b/lover/config.py index 19b8f4f..eb7867a 100644 --- a/lover/config.py +++ b/lover/config.py @@ -155,7 +155,7 @@ class Settings(BaseSettings): ) model_config = SettingsConfigDict( - env_file=".env", + env_file=[".env", "../.env"], # 先查找当前目录,再查找父目录 env_file_encoding="utf-8", case_sensitive=True, extra="ignore", diff --git a/lover/main_simple.py b/lover/main_simple.py new file mode 100644 index 0000000..96fcbba --- /dev/null +++ b/lover/main_simple.py @@ -0,0 +1,70 @@ +import sys +import os +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from fastapi import FastAPI, HTTPException, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse +from fastapi.staticfiles import StaticFiles +import logging +import dashscope +from pathlib import Path + +from lover.routers import voice_call as voice_call_router +from lover.response import ApiResponse +from lover.config import settings + +# 初始化 DashScope API Key +if settings.DASHSCOPE_API_KEY: + dashscope.api_key = settings.DASHSCOPE_API_KEY + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) + +app = FastAPI(title="LOVER API - Simple") + +# 创建 TTS 文件目录 +tts_dir = Path("public/tts") +tts_dir.mkdir(parents=True, exist_ok=True) + +# 挂载静态文件服务(用于提供 TTS 音频文件) +app.mount("/tts", StaticFiles(directory=str(tts_dir)), name="tts") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], # 简化 CORS 配置 + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 只包含语音通话路由 +app.include_router(voice_call_router.router) + +@app.exception_handler(HTTPException) +async def http_exception_handler(request: Request, exc: HTTPException): + detail = exc.detail + msg = detail if isinstance(detail, str) else str(detail) + return JSONResponse( + status_code=exc.status_code, + content={"code": exc.status_code, "msg": msg, "data": None}, + ) + +@app.exception_handler(Exception) +async def generic_exception_handler(request: Request, exc: Exception): + logging.exception("Unhandled error", exc_info=exc) + return JSONResponse( + status_code=500, + content={"code": 500, "msg": "服务器内部错误", "data": None}, + ) + +@app.get("/health", response_model=ApiResponse[dict]) +async def health(): + return ApiResponse(code=1, msg="ok", data={"status": "ok"}) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=30102) \ No newline at end of file diff --git a/lover/routers/voice_call.py b/lover/routers/voice_call.py index 75d34cb..d3d6779 100644 --- a/lover/routers/voice_call.py +++ b/lover/routers/voice_call.py @@ -712,6 +712,282 @@ class VoiceCallSession: @router.post("/call/asr") +async def json_asr( + request: dict, + user: AuthedUser = Depends(get_current_user) +): + """JSON ASR:接收 base64 编码的音频数据并返回识别结果""" + try: + # 从请求中提取音频数据 + if 'audio_data' not in request: + logger.error("请求中缺少 audio_data 字段") + raise HTTPException(status_code=400, detail="缺少 audio_data 字段") + + audio_base64 = request['audio_data'] + audio_format = request.get('format', 'mp3') + + logger.info(f"收到 JSON ASR 请求,格式: {audio_format}") + + # 解码 base64 音频数据 + try: + import base64 + audio_data = base64.b64decode(audio_base64) + logger.info(f"解码音频数据成功,大小: {len(audio_data)} 字节") + except Exception as decode_error: + logger.error(f"base64 解码失败: {decode_error}") + raise HTTPException(status_code=400, detail="音频数据解码失败") + + # 检查音频数据是否为空 + if not audio_data: + logger.error("解码后的音频数据为空") + raise HTTPException(status_code=400, detail="音频数据为空") + + # 计算预期的音频时长 + if audio_format.lower() == 'mp3': + # MP3 文件,粗略估算时长 + expected_duration = len(audio_data) / 16000 # 粗略估算 + logger.info(f"MP3 音频数据,预估时长: {expected_duration:.2f} 秒") + else: + # PCM 格式:16kHz 单声道 16bit,每秒需要 32000 字节 + expected_duration = len(audio_data) / 32000 + logger.info(f"PCM 音频数据,预期时长: {expected_duration:.2f} 秒") + + if expected_duration < 0.1: + logger.warning("音频时长太短,可能无法识别") + test_text = f"音频时长太短({expected_duration:.2f}秒),请说话时间长一些" + from ..response import success_response + return success_response({"text": test_text}) + + # 检查 DashScope 配置 + if not settings.DASHSCOPE_API_KEY: + logger.error("未配置 DASHSCOPE_API_KEY") + test_text = f"ASR 未配置,收到 {expected_duration:.1f}秒 音频" + from ..response import success_response + return success_response({"text": test_text}) + + # 设置 API Key + dashscope.api_key = settings.DASHSCOPE_API_KEY + + # 使用 DashScope 进行批量 ASR + logger.info("开始调用 DashScope ASR...") + + try: + from dashscope.audio.asr import Transcription + from ..oss_utils import upload_audio_file, delete_audio_file, test_oss_connection + + # 首先测试 OSS 连接 + logger.info("测试 OSS 连接...") + if not test_oss_connection(): + # OSS 连接失败,使用临时方案 + logger.warning("OSS 连接失败,使用临时测试方案") + test_text = f"OSS 暂不可用,但成功接收到 {expected_duration:.1f}秒 {audio_format.upper()} 音频数据({len(audio_data)} 字节)" + from ..response import success_response + return success_response({"text": test_text}) + + logger.info("OSS 连接测试通过") + + # 上传音频文件到 OSS + logger.info(f"上传 {audio_format.upper()} 音频到 OSS...") + file_url = upload_audio_file(audio_data, audio_format) + logger.info(f"音频文件上传成功: {file_url}") + + # 调用 DashScope ASR + try: + logger.info("调用 DashScope Transcription API...") + logger.info(f"使用文件 URL: {file_url}") + + task_response = Transcription.async_call( + model='paraformer-v2', + file_urls=[file_url], + parameters={ + 'format': audio_format, + 'sample_rate': 16000, + 'enable_words': False + } + ) + + logger.info(f"ASR 任务响应: status_code={task_response.status_code}") + + if task_response.status_code != 200: + error_msg = getattr(task_response, 'message', 'Unknown error') + logger.error(f"ASR 任务创建失败: {error_msg}") + raise Exception(f"ASR 任务创建失败: {error_msg}") + + task_id = task_response.output.task_id + logger.info(f"ASR 任务已创建: {task_id}") + + # 等待识别完成 + logger.info("等待 ASR 识别完成...") + import time + + max_wait_time = 30 + start_time = time.time() + + transcribe_response = None + + try: + import threading + import queue + + result_queue = queue.Queue() + exception_queue = queue.Queue() + + def wait_for_result(): + try: + result = Transcription.wait(task=task_id) + result_queue.put(result) + except Exception as e: + exception_queue.put(e) + + # 启动等待线程 + wait_thread = threading.Thread(target=wait_for_result) + wait_thread.daemon = True + wait_thread.start() + + # 轮询检查结果或超时 + while time.time() - start_time < max_wait_time: + try: + transcribe_response = result_queue.get_nowait() + logger.info("ASR 任务完成") + break + except queue.Empty: + pass + + try: + exception = exception_queue.get_nowait() + logger.error(f"ASR 等待过程中出错: {exception}") + raise exception + except queue.Empty: + pass + + elapsed = time.time() - start_time + logger.info(f"ASR 任务仍在处理中... 已等待 {elapsed:.1f}秒") + time.sleep(2) + + if transcribe_response is None: + logger.error(f"ASR 任务超时({max_wait_time}秒)") + from ..response import success_response + return success_response({"text": f"语音识别处理时间较长,请稍后重试(音频时长: {expected_duration:.1f}秒)"}) + + except Exception as wait_error: + logger.error(f"ASR 等待过程中出错: {wait_error}") + from ..response import success_response + return success_response({"text": f"语音识别服务暂时不可用,请稍后重试"}) + + logger.info(f"ASR 识别响应: status_code={transcribe_response.status_code}") + + if transcribe_response.status_code != 200: + error_msg = getattr(transcribe_response, 'message', 'Unknown error') + logger.error(f"ASR 识别失败: {error_msg}") + raise Exception(f"ASR 识别失败: {error_msg}") + + # 检查任务状态 + result = transcribe_response.output + logger.info(f"ASR 任务状态: {result.task_status}") + + if result.task_status == "SUCCEEDED": + logger.info("ASR 识别成功,开始解析结果...") + elif result.task_status == "FAILED": + error_code = getattr(result, 'code', 'Unknown') + error_message = getattr(result, 'message', 'Unknown error') + + logger.error(f"ASR 任务失败: {error_code} - {error_message}") + + if error_code == "SUCCESS_WITH_NO_VALID_FRAGMENT": + user_message = "音频中未检测到有效语音,请确保录音时有说话内容" + elif error_code == "DECODE_ERROR": + user_message = "音频格式解码失败,请检查录音设置" + logger.error("音频解码失败 - 可能的原因:") + logger.error("1. 音频格式不正确或损坏") + logger.error("2. 编码参数不匹配(建议:16kHz, 单声道, 64kbps)") + logger.error("3. 文件头信息缺失或错误") + elif error_code == "FILE_DOWNLOAD_FAILED": + user_message = "无法下载音频文件,请检查网络连接" + elif error_code == "AUDIO_FORMAT_UNSUPPORTED": + user_message = "音频格式不支持,请使用标准格式录音" + else: + user_message = f"语音识别失败: {error_message}" + + from ..response import success_response + return success_response({"text": user_message}) + else: + logger.warning(f"ASR 任务状态未知: {result.task_status}") + from ..response import success_response + return success_response({"text": f"语音识别状态异常: {result.task_status}"}) + + # 解析识别结果 + text_result = "" + + if hasattr(result, 'results') and result.results: + logger.info(f"找到 results 字段,长度: {len(result.results)}") + + for i, item in enumerate(result.results): + if isinstance(item, dict) and 'transcription_url' in item and item['transcription_url']: + transcription_url = item['transcription_url'] + logger.info(f"找到 transcription_url: {transcription_url}") + + try: + import requests + response = requests.get(transcription_url, timeout=10) + if response.status_code == 200: + transcription_data = response.json() + logger.info(f"转录数据: {transcription_data}") + + if 'transcripts' in transcription_data: + for transcript in transcription_data['transcripts']: + if 'text' in transcript: + text_result += transcript['text'] + " " + logger.info(f"提取转录文本: {transcript['text']}") + + if text_result.strip(): + break + + except Exception as e: + logger.error(f"处理 transcription_url 失败: {e}") + + text_result = text_result.strip() + + if not text_result: + logger.warning("ASR 未识别到文本内容") + text_result = f"未识别到语音内容({expected_duration:.1f}秒音频)" + + logger.info(f"最终 ASR 识别结果: {text_result}") + + from ..response import success_response + return success_response({"text": text_result}) + + finally: + # 清理 OSS 上的临时文件 + try: + delete_audio_file(file_url) + logger.info("OSS 临时文件已清理") + except Exception as e: + logger.warning(f"清理 OSS 文件失败: {e}") + + except Exception as asr_error: + logger.error(f"DashScope ASR 调用失败: {asr_error}", exc_info=True) + + error_msg = str(asr_error) + if "OSS" in error_msg: + test_text = f"OSS 配置问题,收到 {expected_duration:.1f}秒 音频" + elif "Transcription" in error_msg: + test_text = f"ASR 服务异常,收到 {expected_duration:.1f}秒 音频" + else: + test_text = f"ASR 处理失败,收到 {expected_duration:.1f}秒 音频" + + logger.info(f"返回备用文本: {test_text}") + + from ..response import success_response + return success_response({"text": test_text}) + + except HTTPException: + raise + except Exception as e: + logger.error(f"JSON ASR 处理错误: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"ASR 处理失败: {str(e)}") + + +@router.post("/call/batch_asr") async def batch_asr( audio: UploadFile = File(...), user: AuthedUser = Depends(get_current_user) diff --git a/test_asr_fix.py b/test_asr_fix.py new file mode 100644 index 0000000..b2896e1 --- /dev/null +++ b/test_asr_fix.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +测试 ASR 修复是否有效 +""" +import sys +import os +sys.path.append('.') + +import requests +import base64 +import wave +import struct +import math +import logging + +# 设置日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def create_speech_like_audio(): + """创建类似语音的音频数据""" + sample_rate = 16000 + duration = 3 # 3 秒 + + # 生成更复杂的音频,模拟语音特征 + samples = [] + for i in range(sample_rate * duration): + t = i / sample_rate + + # 基频(模拟人声基频) + f0 = 150 + 50 * math.sin(2 * math.pi * 0.5 * t) # 变化的基频 + + # 多个谐波分量 + sample = 0 + for harmonic in range(1, 6): # 前5个谐波 + amplitude = 1.0 / harmonic # 谐波幅度递减 + sample += amplitude * math.sin(2 * math.pi * f0 * harmonic * t) + + # 添加包络(模拟语音的动态变化) + envelope = 0.5 * (1 + math.sin(2 * math.pi * 2 * t)) # 2Hz 的包络变化 + + # 添加一些噪声(模拟语音的复杂性) + noise = 0.1 * (math.sin(2 * math.pi * 1000 * t) + 0.5 * math.sin(2 * math.pi * 2000 * t)) + + # 组合所有分量 + final_sample = (sample + noise) * envelope * 0.3 # 控制总体音量 + + # 转换为 16-bit 整数 + sample_int = int(16000 * final_sample) + sample_int = max(-32767, min(32767, sample_int)) + samples.append(sample_int) + + # 转换为字节数据 + audio_bytes = bytearray() + for sample in samples: + audio_bytes.extend(struct.pack(' { - entry.file((file) => { - const reader = new plus.io.FileReader() - reader.onload = async (e) => { - const dataUrl = e.target.result - const base64 = dataUrl.split(',')[1] - const binaryString = atob(base64) - const bytes = new Uint8Array(binaryString.length) - for (let i = 0; i < binaryString.length; i++) { - bytes[i] = binaryString.charCodeAt(i) - } - - console.log('✅ 文件读取成功,开始通过WebSocket发送') - console.log('📊 音频数据大小:', bytes.length, 'bytes') - - // 检查WebSocket连接状态 - if (!that.socketTask || that.socketTask.readyState !== 1) { - console.error('❌ WebSocket未连接,无法发送音频') - uni.showToast({ - title: 'WebSocket未连接', - icon: 'none' - }) - return - } - - // 显示处理进度 - uni.showLoading({ - title: '识别中...', - mask: true - }) - - try { - // 通过WebSocket发送完整音频文件 - // 分块发送大文件,避免WebSocket消息过大 - const chunkSize = 8192 // 8KB per chunk - const totalChunks = Math.ceil(bytes.length / chunkSize) - - console.log(`📦 将音频分为 ${totalChunks} 个片段发送`) - - for (let i = 0; i < totalChunks; i++) { - const start = i * chunkSize - const end = Math.min(start + chunkSize, bytes.length) - const chunk = bytes.slice(start, end) - - console.log(`📤 发送第 ${i + 1}/${totalChunks} 片,大小: ${chunk.byteLength} bytes`) - - await new Promise((resolve, reject) => { - that.socketTask.send({ - data: chunk.buffer, - success: () => { - console.log(`✅ 第 ${i + 1} 片发送成功`) - resolve() - }, - fail: (err) => { - console.error(`❌ 第 ${i + 1} 片发送失败:`, err) - reject(err) - } - }) - }) - - // 小延迟避免发送过快 - if (i < totalChunks - 1) { - await new Promise(resolve => setTimeout(resolve, 10)) - } - } - - // 发送结束标记,触发ASR处理 - await new Promise((resolve, reject) => { - console.log('📤 发送结束标记 "end"') - that.socketTask.send({ - data: 'end', - success: () => { - console.log('✅ 结束标记发送成功') - resolve() - }, - fail: (err) => { - console.error('❌ 结束标记发送失败:', err) - reject(err) - } - }) - }) - - console.log('🎉 完整音频文件已通过WebSocket发送完成') - uni.hideLoading() - - } catch (error) { - console.error('❌ WebSocket发送失败:', error) - uni.hideLoading() - uni.showToast({ - title: '发送失败: ' + error.message, - icon: 'none' - }) - } - } - - reader.onerror = (error) => { - console.error('❌ 文件读取失败:', error) - } - - reader.readAsDataURL(file) - }, (error) => { - console.error('❌ 获取文件失败:', error) - }) - }, (error) => { - console.error('❌ 解析文件路径失败:', error) - }) - } else { - console.error('❌ plus.io 不可用') - } + // 新的处理方式:不通过WebSocket,而是在onStop回调中通过HTTP发送 + console.log('⚠️ 未收到音频帧,将在 onStop 回调中通过 HTTP 发送到 ASR 端点') + console.log('⚠️ 不发送 WebSocket 信号,避免触发旧的 finalize_asr 流程') } }) @@ -598,6 +472,63 @@ console.log('📊 是否收到过音频帧:', hasReceivedFrames) this.isRecording = false + + // 处理录音文件 + if (!res.tempFilePath) { + console.error('❌ 没有录音文件') + hasReceivedFrames = false // 重置标记 + frameCount = 0 // 重置计数 + return + } + + console.log('📁 开始处理录音文件:', res.tempFilePath) + + // 使用之前成功的文件读取方法 + let filePath = res.tempFilePath + if (!filePath.startsWith('/') && !filePath.includes('://')) { + if (typeof plus !== 'undefined' && plus.io) { + filePath = plus.io.convertLocalFileSystemURL(filePath) + } + } + + console.log('📁 转换后文件路径:', filePath) + const that = this + + if (typeof plus !== 'undefined' && plus.io) { + plus.io.resolveLocalFileSystemURL(filePath, (entry) => { + entry.file((file) => { + const reader = new plus.io.FileReader() + reader.onload = async (e) => { + const dataUrl = e.target.result + const base64 = dataUrl.split(',')[1] + const binaryString = atob(base64) + const bytes = new Uint8Array(binaryString.length) + for (let i = 0; i < binaryString.length; i++) { + bytes[i] = binaryString.charCodeAt(i) + } + + console.log('✅ 文件读取成功,开始发送到ASR') + console.log('📊 音频数据大小:', bytes.length, 'bytes') + + // 发送到ASR端点进行处理 + try { + const response = await that.sendAudioToASR(bytes) + console.log('✅ ASR处理成功:', response) + } catch (error) { + console.error('❌ ASR处理失败:', error) + } + } + reader.readAsDataURL(file) + }, (error) => { + console.error('❌ 文件读取失败:', error) + }) + }, (error) => { + console.error('❌ 文件路径解析失败:', error) + }) + } else { + console.error('❌ plus.io 不可用') + } + hasReceivedFrames = false // 重置标记 frameCount = 0 // 重置计数 }) @@ -763,8 +694,8 @@ duration: 600000, // 10 分钟 sampleRate: 16000, // 必须 16kHz,匹配服务器 numberOfChannels: 1, // 单声道 - encodeBitRate: 128000, // 128kbps,适合语音 - format: 'mp3', // 改用 MP3 格式,可能更稳定 + encodeBitRate: 64000, // 降低到 64kbps,更适合语音识别 + format: 'mp3', // 使用 MP3 格式 audioSource: 'mic' // 明确指定麦克风作为音频源 // 完全移除 frameSize,避免任何实时处理 } @@ -1338,6 +1269,73 @@ delta: 2, }); }, + // 发送音频到ASR端点进行处理 + async sendAudioToASR(audioBytes) { + console.log('📤 开始发送音频到ASR端点') + console.log('📊 音频数据大小:', audioBytes.length, 'bytes') + + // 显示加载提示 + uni.showLoading({ + title: '语音识别中...', + mask: true + }) + + try { + // 将音频数据转换为base64 + let base64Audio = '' + for (let i = 0; i < audioBytes.length; i++) { + base64Audio += String.fromCharCode(audioBytes[i]) + } + base64Audio = btoa(base64Audio) + + console.log('📤 发送ASR请求...') + const response = await uni.request({ + url: this.baseURLPy + '/voice/call/asr', + method: 'POST', + header: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + uni.getStorageSync("token") + }, + data: { + audio_data: base64Audio, + format: 'mp3' + } + }) + + console.log('✅ ASR响应:', response) + + // 隐藏加载提示 + uni.hideLoading() + + if (response.statusCode === 200 && response.data) { + const result = response.data + console.log('✅ ASR识别结果:', result.text) + + // 显示识别结果 + uni.showToast({ + title: `识别: ${result.text}`, + icon: 'none', + duration: 3000 + }) + + return result + } else { + throw new Error(`ASR请求失败: ${response.statusCode}`) + } + } catch (error) { + console.error('❌ ASR请求失败:', error) + + // 隐藏加载提示 + uni.hideLoading() + + uni.showToast({ + title: 'ASR识别失败', + icon: 'none', + duration: 2000 + }) + throw error + } + }, goRecharge() { uni.showToast({ title: '充值功能开发中', diff --git a/xuniYou/utils/request.js b/xuniYou/utils/request.js index 3900195..57707e5 100644 --- a/xuniYou/utils/request.js +++ b/xuniYou/utils/request.js @@ -1,7 +1,7 @@ // Windows 本地开发 - 混合架构 export const baseURL = 'http://192.168.1.141:30100' // PHP 处理用户管理和界面 // export const baseURL = 'http://1.15.149.240:30100' // PHP 处理用户管理和界面 -export const baseURLPy = 'http://192.168.1.141:30101' // FastAPI 处理 AI 功能 +export const baseURLPy = 'http://192.168.1.141:30102' // FastAPI 处理 AI 功能 (更新端口) // export const baseURLPy = 'http://1.15.149.240:30101' // FastAPI 处理 AI 功能 // 远程服务器 - 需要时取消注释