尝试部署之前
This commit is contained in:
parent
1dc2883922
commit
32641ad519
|
|
@ -73,6 +73,13 @@
|
|||
<artifactId>spring-boot-starter-websocket</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 百度语音识别SDK -->
|
||||
<dependency>
|
||||
<groupId>com.baidu.aip</groupId>
|
||||
<artifactId>java-sdk</artifactId>
|
||||
<version>4.16.18</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,212 @@
|
|||
package com.ddnai.web.controller.study;
|
||||
|
||||
import com.baidu.aip.speech.AipSpeech;
|
||||
import org.json.JSONObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* 百度语音识别服务
|
||||
* 主机访问百度云,内网设备通过主机使用语音识别
|
||||
*
|
||||
* @author ddnai
|
||||
*/
|
||||
@Service
|
||||
public class BaiduSpeechService
|
||||
{
|
||||
private static final Logger log = LoggerFactory.getLogger(BaiduSpeechService.class);
|
||||
|
||||
// 百度语音识别配置
|
||||
// 申请地址:https://console.bce.baidu.com/ai/#/ai/speech/overview/index
|
||||
private static final String APP_ID = "7307076";
|
||||
private static final String API_KEY = "RtL2IfV3FbLnVDDacRV6QDae";
|
||||
private static final String SECRET_KEY = "NobJaGFov7II95fnFUBNGBk0Wm3fcNIB";
|
||||
|
||||
private AipSpeech client;
|
||||
|
||||
/**
|
||||
* 初始化百度语音客户端
|
||||
*/
|
||||
public BaiduSpeechService()
|
||||
{
|
||||
try
|
||||
{
|
||||
client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY);
|
||||
// 设置超时
|
||||
client.setConnectionTimeoutInMillis(5000);
|
||||
client.setSocketTimeoutInMillis(30000);
|
||||
log.info("百度语音客户端初始化成功");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
log.error("百度语音客户端初始化失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 识别音频文件
|
||||
*
|
||||
* @param audioFile 音频文件(支持MP3、WAV、PCM等)
|
||||
* @return 识别结果文本
|
||||
*/
|
||||
public String recognizeAudio(File audioFile) throws IOException
|
||||
{
|
||||
return recognizeAudio(audioFile, getAudioFormat(audioFile.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* 识别音频文件(指定格式)
|
||||
*
|
||||
* @param audioFile 音频文件
|
||||
* @param format 音频格式(pcm/wav/mp3/m4a)
|
||||
* @return 识别结果文本
|
||||
*/
|
||||
public String recognizeAudio(File audioFile, String format) throws IOException
|
||||
{
|
||||
if (client == null)
|
||||
{
|
||||
throw new RuntimeException("百度语音客户端未初始化");
|
||||
}
|
||||
|
||||
// 读取音频文件
|
||||
byte[] audioData = readFileToBytes(audioFile);
|
||||
|
||||
// 计算音频时长(估算)
|
||||
double durationSeconds = 0;
|
||||
if ("wav".equals(format))
|
||||
{
|
||||
// WAV格式:16kHz, 单声道, 16位 = 32000 bytes/s
|
||||
// 减去44字节的WAV文件头
|
||||
durationSeconds = (audioData.length - 44) / 32000.0;
|
||||
}
|
||||
else if ("pcm".equals(format))
|
||||
{
|
||||
// PCM格式:无文件头
|
||||
durationSeconds = audioData.length / 32000.0;
|
||||
}
|
||||
|
||||
log.info("调用百度API识别音频 - 大小: {} bytes, 格式: {}, 估算时长: {}秒",
|
||||
audioData.length, format, String.format("%.2f", durationSeconds));
|
||||
|
||||
// 检查音频时长
|
||||
if (durationSeconds < 0.5)
|
||||
{
|
||||
log.warn("音频时长过短: {}秒,可能无法识别", String.format("%.2f", durationSeconds));
|
||||
}
|
||||
else if (durationSeconds > 10.0)
|
||||
{
|
||||
log.warn("音频时长过长: {}秒。百度短语音识别最佳时长为3-10秒,超过10秒可能只识别部分内容",
|
||||
String.format("%.2f", durationSeconds));
|
||||
}
|
||||
else if (durationSeconds >= 2.0 && durationSeconds <= 10.0)
|
||||
{
|
||||
log.info("音频时长合适: {}秒(推荐范围:2-10秒)", String.format("%.2f", durationSeconds));
|
||||
}
|
||||
|
||||
// 调用百度API
|
||||
// format: 音频格式,支持 pcm/wav/mp3/m4a
|
||||
// rate: 采样率,支持 8000/16000
|
||||
HashMap<String, Object> options = new HashMap<>();
|
||||
options.put("dev_pid", 1537); // 中文普通话识别
|
||||
|
||||
JSONObject result = client.asr(audioData, format, 16000, options);
|
||||
|
||||
log.info("百度API响应: {}", result.toString());
|
||||
|
||||
// 解析结果
|
||||
int errNo = result.getInt("err_no");
|
||||
if (errNo == 0)
|
||||
{
|
||||
// 成功
|
||||
StringBuilder text = new StringBuilder();
|
||||
if (result.has("result"))
|
||||
{
|
||||
org.json.JSONArray resultArray = result.getJSONArray("result");
|
||||
log.info("识别结果数组长度: {}", resultArray.length());
|
||||
for (int i = 0; i < resultArray.length(); i++)
|
||||
{
|
||||
String part = resultArray.getString(i);
|
||||
log.info("识别片段[{}]: '{}'", i, part);
|
||||
text.append(part);
|
||||
}
|
||||
}
|
||||
|
||||
String recognizedText = text.toString().trim();
|
||||
|
||||
if (recognizedText.isEmpty())
|
||||
{
|
||||
log.warn("百度API返回成功但识别结果为空 - 可能原因: 音频太短({}秒)、无声音或音量太小",
|
||||
String.format("%.2f", durationSeconds));
|
||||
}
|
||||
else
|
||||
{
|
||||
log.info("识别成功: '{}', 长度: {}", recognizedText, recognizedText.length());
|
||||
}
|
||||
|
||||
return recognizedText;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 失败
|
||||
String errMsg = result.optString("err_msg", "未知错误");
|
||||
log.error("识别失败: err_no={}, err_msg={}", errNo, errMsg);
|
||||
throw new RuntimeException("百度API识别失败: " + errMsg);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 读取文件为字节数组
|
||||
*/
|
||||
private byte[] readFileToBytes(File file) throws IOException
|
||||
{
|
||||
try (FileInputStream fis = new FileInputStream(file))
|
||||
{
|
||||
byte[] data = new byte[(int) file.length()];
|
||||
fis.read(data);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件名获取音频格式
|
||||
*/
|
||||
private String getAudioFormat(String filename)
|
||||
{
|
||||
if (filename == null)
|
||||
{
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
String lowerName = filename.toLowerCase();
|
||||
if (lowerName.endsWith(".wav"))
|
||||
{
|
||||
return "wav";
|
||||
}
|
||||
else if (lowerName.endsWith(".pcm"))
|
||||
{
|
||||
return "pcm";
|
||||
}
|
||||
else if (lowerName.endsWith(".m4a"))
|
||||
{
|
||||
return "m4a";
|
||||
}
|
||||
else
|
||||
{
|
||||
return "mp3"; // 默认MP3
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查服务是否可用
|
||||
*/
|
||||
public boolean isAvailable()
|
||||
{
|
||||
return client != null && !APP_ID.equals("你的APP_ID");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
package com.ddnai.web.controller.study;
|
||||
|
||||
import java.io.File;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import com.ddnai.common.core.controller.BaseController;
|
||||
import com.ddnai.common.core.domain.AjaxResult;
|
||||
|
||||
/**
|
||||
* 语音识别服务(使用百度云API)
|
||||
* 主机访问百度云,内网设备通过主机使用语音识别
|
||||
*
|
||||
* @author ddnai
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/api/speech")
|
||||
public class VoiceSpeechController extends BaseController
|
||||
{
|
||||
private static final Logger log = LoggerFactory.getLogger(VoiceSpeechController.class);
|
||||
|
||||
@Autowired
|
||||
private BaiduSpeechService baiduSpeechService;
|
||||
|
||||
/**
|
||||
* 健康检查接口
|
||||
*/
|
||||
@GetMapping("/health")
|
||||
public AjaxResult health()
|
||||
{
|
||||
try
|
||||
{
|
||||
boolean available = baiduSpeechService.isAvailable();
|
||||
|
||||
if (available)
|
||||
{
|
||||
log.info("百度语音服务正常");
|
||||
return success("语音服务正常(百度云API)");
|
||||
}
|
||||
else
|
||||
{
|
||||
log.warn("百度语音服务未配置");
|
||||
return error("语音服务未配置,请填写百度API密钥");
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
log.error("语音服务检查失败", e);
|
||||
return error("语音服务不可用: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 语音识别接口
|
||||
* 使用百度云API进行识别(主机访问百度云,内网设备通过主机使用)
|
||||
*/
|
||||
@PostMapping("/recognize")
|
||||
public AjaxResult recognize(
|
||||
@RequestParam("audio") MultipartFile audioFile,
|
||||
@RequestParam(value = "referenceText", required = false) String referenceText,
|
||||
@RequestParam(value = "format", required = false, defaultValue = "wav") String format)
|
||||
{
|
||||
File tempInputFile = null;
|
||||
|
||||
try
|
||||
{
|
||||
log.info("收到语音识别请求 - 文件名: {}, 大小: {} bytes, 格式: {}, 参考文本: {}",
|
||||
audioFile.getOriginalFilename(), audioFile.getSize(), format, referenceText);
|
||||
|
||||
// 保存上传的文件,使用指定的格式
|
||||
tempInputFile = File.createTempFile("voice_", "." + format);
|
||||
audioFile.transferTo(tempInputFile);
|
||||
|
||||
log.info("音频文件已保存: {}, 大小: {} bytes",
|
||||
tempInputFile.getAbsolutePath(), tempInputFile.length());
|
||||
|
||||
// 调用百度API识别,传入格式参数
|
||||
log.info("调用百度云API识别,格式: {}...", format);
|
||||
String recognizedText = baiduSpeechService.recognizeAudio(tempInputFile, format);
|
||||
|
||||
if (recognizedText == null || recognizedText.isEmpty())
|
||||
{
|
||||
log.warn("未识别到有效语音");
|
||||
return error("未识别到有效语音。请确保:1) 录音时长至少1秒;2) 说话声音清晰;3) 麦克风权限已授予");
|
||||
}
|
||||
|
||||
log.info("识别成功: {}", recognizedText);
|
||||
|
||||
// 计算相似度评分
|
||||
double score = calculateSimilarity(recognizedText, referenceText != null ? referenceText : "");
|
||||
double pronunciationScore = Math.max(0, score - 5);
|
||||
double fluencyScore = Math.max(0, score - 3);
|
||||
|
||||
log.info("相似度: {}分", score);
|
||||
|
||||
// 返回结果
|
||||
java.util.Map<String, Object> data = new java.util.HashMap<>();
|
||||
data.put("recognizedText", recognizedText);
|
||||
data.put("score", score);
|
||||
data.put("pronunciationScore", pronunciationScore);
|
||||
data.put("fluencyScore", fluencyScore);
|
||||
data.put("status", "completed");
|
||||
|
||||
return success(data);
|
||||
}
|
||||
catch (java.io.IOException e)
|
||||
{
|
||||
log.error("文件处理失败", e);
|
||||
return error("文件处理失败: " + e.getMessage());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
log.error("语音识别请求失败", e);
|
||||
return error("语音识别失败: " + e.getMessage());
|
||||
}
|
||||
finally
|
||||
{
|
||||
// 清理临时文件
|
||||
if (tempInputFile != null && tempInputFile.exists())
|
||||
{
|
||||
if (tempInputFile.delete())
|
||||
{
|
||||
log.debug("临时文件已删除: {}", tempInputFile.getAbsolutePath());
|
||||
}
|
||||
else
|
||||
{
|
||||
log.warn("临时文件删除失败: {}", tempInputFile.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文件扩展名
|
||||
*/
|
||||
private String getFileExtension(String filename)
|
||||
{
|
||||
if (filename == null || filename.isEmpty())
|
||||
{
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
int lastDotIndex = filename.lastIndexOf('.');
|
||||
if (lastDotIndex > 0 && lastDotIndex < filename.length() - 1)
|
||||
{
|
||||
return filename.substring(lastDotIndex + 1);
|
||||
}
|
||||
|
||||
return "mp3";
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算文本相似度(0-100分)
|
||||
*/
|
||||
private double calculateSimilarity(String text1, String text2)
|
||||
{
|
||||
if (text1 == null || text1.isEmpty() || text2 == null || text2.isEmpty())
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// 去除空格
|
||||
text1 = text1.replaceAll("\\s+", "");
|
||||
text2 = text2.replaceAll("\\s+", "");
|
||||
|
||||
if (text1.isEmpty() || text2.isEmpty())
|
||||
{
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
// 使用编辑距离计算相似度
|
||||
int distance = levenshteinDistance(text1, text2);
|
||||
int maxLength = Math.max(text1.length(), text2.length());
|
||||
|
||||
double similarity = (1.0 - (double) distance / maxLength) * 100;
|
||||
return Math.round(similarity * 100.0) / 100.0; // 保留两位小数
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算编辑距离(Levenshtein距离)
|
||||
*/
|
||||
private int levenshteinDistance(String s1, String s2)
|
||||
{
|
||||
int len1 = s1.length();
|
||||
int len2 = s2.length();
|
||||
|
||||
int[][] dp = new int[len1 + 1][len2 + 1];
|
||||
|
||||
for (int i = 0; i <= len1; i++)
|
||||
{
|
||||
dp[i][0] = i;
|
||||
}
|
||||
|
||||
for (int j = 0; j <= len2; j++)
|
||||
{
|
||||
dp[0][j] = j;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= len1; i++)
|
||||
{
|
||||
for (int j = 1; j <= len2; j++)
|
||||
{
|
||||
if (s1.charAt(i - 1) == s2.charAt(j - 1))
|
||||
{
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
dp[i][j] = Math.min(Math.min(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1]) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dp[len1][len2];
|
||||
}
|
||||
}
|
||||
172
baidu_speech_server.py
Normal file
172
baidu_speech_server.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
百度语音识别服务(超级简单,不需要ffmpeg)
|
||||
免费额度:每天50000次
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import base64
|
||||
import json
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# 百度语音识别配置(需要申请)
|
||||
# 免费申请地址:https://console.bce.baidu.com/ai/#/ai/speech/overview/index
|
||||
BAIDU_APP_ID = "你的APP_ID" # ← 需要替换
|
||||
BAIDU_API_KEY = "你的API_KEY" # ← 需要替换
|
||||
BAIDU_SECRET_KEY = "你的SECRET_KEY" # ← 需要替换
|
||||
|
||||
# 懒加载客户端
|
||||
asr_client = None
|
||||
|
||||
def get_asr_client():
|
||||
"""获取百度语音识别客户端"""
|
||||
global asr_client
|
||||
if asr_client is None:
|
||||
try:
|
||||
from aip import AipSpeech
|
||||
asr_client = AipSpeech(BAIDU_APP_ID, BAIDU_API_KEY, BAIDU_SECRET_KEY)
|
||||
print("✓ 百度语音客户端初始化成功")
|
||||
except ImportError:
|
||||
print("✗ 未安装百度SDK,请运行: pip install baidu-aip")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"✗ 初始化失败: {str(e)}")
|
||||
return None
|
||||
return asr_client
|
||||
|
||||
def recognize_audio_baidu(audio_data, format='mp3'):
|
||||
"""使用百度API识别音频"""
|
||||
try:
|
||||
client = get_asr_client()
|
||||
if not client:
|
||||
return None, "百度语音客户端未初始化"
|
||||
|
||||
# 百度API识别
|
||||
result = client.asr(audio_data, format, 16000, {
|
||||
'dev_pid': 1537, # 中文普通话
|
||||
})
|
||||
|
||||
if result['err_no'] == 0:
|
||||
text = ''.join(result['result'])
|
||||
return text, None
|
||||
else:
|
||||
return None, f"识别失败: {result.get('err_msg', '未知错误')}"
|
||||
|
||||
except Exception as e:
|
||||
return None, str(e)
|
||||
|
||||
def calculate_similarity(text1, text2):
|
||||
"""计算文本相似度(0-100分)"""
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
text1 = text1.replace(" ", "")
|
||||
text2 = text2.replace(" ", "")
|
||||
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
similarity = SequenceMatcher(None, text1, text2).ratio()
|
||||
return round(similarity * 100, 2)
|
||||
|
||||
@app.route('/api/speech/recognize', methods=['POST'])
|
||||
def recognize():
|
||||
"""语音识别接口"""
|
||||
try:
|
||||
# 检查文件
|
||||
if 'audio' not in request.files:
|
||||
return jsonify({
|
||||
'code': 400,
|
||||
'msg': '未上传音频文件'
|
||||
}), 400
|
||||
|
||||
audio_file = request.files['audio']
|
||||
reference_text = request.form.get('referenceText', '')
|
||||
|
||||
# 读取音频数据
|
||||
audio_data = audio_file.read()
|
||||
|
||||
print(f"收到音频: {len(audio_data)} bytes")
|
||||
print(f"参考文本: {reference_text}")
|
||||
|
||||
# 识别音频(百度API自动处理格式)
|
||||
recognized_text, error = recognize_audio_baidu(audio_data, format='mp3')
|
||||
|
||||
if error:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'识别失败: {error}'
|
||||
}), 500
|
||||
|
||||
if not recognized_text:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': '未识别到有效语音'
|
||||
}), 500
|
||||
|
||||
# 计算评分
|
||||
score = calculate_similarity(recognized_text, reference_text)
|
||||
pronunciation_score = max(0, score - 5)
|
||||
fluency_score = max(0, score - 3)
|
||||
|
||||
print(f"识别结果: {recognized_text}")
|
||||
print(f"相似度: {score}分")
|
||||
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '成功',
|
||||
'data': {
|
||||
'recognizedText': recognized_text,
|
||||
'score': score,
|
||||
'pronunciationScore': pronunciation_score,
|
||||
'fluencyScore': fluency_score,
|
||||
'status': 'completed'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理错误: {str(e)}")
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'处理失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/speech/health', methods=['GET'])
|
||||
def health():
|
||||
"""健康检查"""
|
||||
client = get_asr_client()
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '服务正常',
|
||||
'data': {
|
||||
'engine': 'baidu',
|
||||
'client_ready': client is not None
|
||||
}
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 50)
|
||||
print("百度语音识别服务(超简单,无需ffmpeg)")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
print("1. 安装依赖: pip install baidu-aip")
|
||||
print("2. 申请百度API: https://console.bce.baidu.com/ai/#/ai/speech/overview/index")
|
||||
print("3. 填写 APP_ID, API_KEY, SECRET_KEY")
|
||||
print("")
|
||||
|
||||
if BAIDU_APP_ID == "你的APP_ID":
|
||||
print("⚠️ 请先配置百度API密钥!")
|
||||
print("")
|
||||
|
||||
print("=" * 50)
|
||||
print("服务启动成功!")
|
||||
print("访问地址: http://localhost:5000")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||
|
|
@ -79,16 +79,6 @@
|
|||
|
||||
<!-- 语音识别操作区域 -->
|
||||
<view class="action-section" v-if="selectedContent">
|
||||
<!-- 未准备好时显示初始化按钮 -->
|
||||
<button
|
||||
v-if="!isReady && !isLoading"
|
||||
class="action-btn btn-init"
|
||||
@click="initSpeechModel"
|
||||
>
|
||||
<text class="btn-icon">🔄</text>
|
||||
<text class="btn-text">重新初始化</text>
|
||||
</button>
|
||||
|
||||
<!-- 准备好后显示开始按钮 -->
|
||||
<button
|
||||
v-if="isReady"
|
||||
|
|
@ -253,7 +243,9 @@ export default {
|
|||
isSubmitted: false,
|
||||
isSubmitting: false,
|
||||
isSaving: false,
|
||||
pageUnloaded: false // 页面卸载标记
|
||||
pageUnloaded: false, // 页面卸载标记
|
||||
recordStartTime: 0, // 录音开始时间
|
||||
recordingFailCount: 0 // 录音失败次数
|
||||
}
|
||||
},
|
||||
onLoad(options) {
|
||||
|
|
@ -277,18 +269,13 @@ export default {
|
|||
this.pageUnloaded = true
|
||||
|
||||
// #ifdef APP-PLUS
|
||||
// 安全地停止语音识别
|
||||
// 停止录音
|
||||
if (this.isRecording) {
|
||||
this.isRecording = false
|
||||
try {
|
||||
if (typeof stopSpeechVoice === 'function') {
|
||||
stopSpeechVoice()
|
||||
}
|
||||
} catch(e) {
|
||||
console.error('[Speech] 停止识别时出错:', e)
|
||||
}
|
||||
console.log('[Speech] 页面卸载时停止录音')
|
||||
this.handleStop()
|
||||
}
|
||||
// #endif
|
||||
|
||||
this.stopAutoScroll()
|
||||
// 清理定时器
|
||||
if (this.scrollTimer) {
|
||||
|
|
@ -304,6 +291,73 @@ export default {
|
|||
}
|
||||
},
|
||||
methods: {
|
||||
// 初始化语音服务
|
||||
async initSpeechService() {
|
||||
try {
|
||||
console.log('[Speech] 开始初始化语音服务')
|
||||
|
||||
// 请求录音权限
|
||||
const permissionResult = await this.requestRecordPermission()
|
||||
if (!permissionResult) {
|
||||
this.statusText = '需要录音权限'
|
||||
this.debugInfo = '请在设置中开启录音权限'
|
||||
return
|
||||
}
|
||||
|
||||
// 初始化录音器
|
||||
speechRecorder.init()
|
||||
this.statusText = '准备就绪'
|
||||
this.isReady = true
|
||||
console.log('[Speech] 语音服务初始化成功')
|
||||
} catch (error) {
|
||||
console.error('[Speech] 初始化失败', error)
|
||||
this.statusText = '初始化失败'
|
||||
this.debugInfo = '错误: ' + error.message
|
||||
|
||||
uni.showToast({
|
||||
title: '语音服务初始化失败',
|
||||
icon: 'none',
|
||||
duration: 2000
|
||||
})
|
||||
}
|
||||
},
|
||||
|
||||
// 请求录音权限
|
||||
requestRecordPermission() {
|
||||
return new Promise((resolve) => {
|
||||
// #ifdef APP-PLUS
|
||||
const permissions = ['android.permission.RECORD_AUDIO']
|
||||
|
||||
plus.android.requestPermissions(
|
||||
permissions,
|
||||
(result) => {
|
||||
console.log('[Speech] 权限请求结果', result)
|
||||
const granted = result.granted && result.granted.length > 0
|
||||
if (granted) {
|
||||
console.log('[Speech] 录音权限已授予')
|
||||
resolve(true)
|
||||
} else {
|
||||
console.log('[Speech] 录音权限被拒绝')
|
||||
uni.showModal({
|
||||
title: '需要录音权限',
|
||||
content: '语音评测需要使用您的麦克风,请在设置中开启录音权限',
|
||||
showCancel: false
|
||||
})
|
||||
resolve(false)
|
||||
}
|
||||
},
|
||||
(error) => {
|
||||
console.error('[Speech] 权限请求失败', error)
|
||||
resolve(false)
|
||||
}
|
||||
)
|
||||
// #endif
|
||||
// #ifndef APP-PLUS
|
||||
resolve(true)
|
||||
// #endif
|
||||
})
|
||||
},
|
||||
|
||||
async loadContentList() {
|
||||
this.loadingContent = true
|
||||
try {
|
||||
|
|
@ -361,102 +415,7 @@ export default {
|
|||
},
|
||||
|
||||
// #ifdef APP-PLUS
|
||||
initSpeechModel() {
|
||||
console.log('[Speech] ========== 开始初始化语音模型 ==========')
|
||||
this.isLoading = true
|
||||
this.statusText = '正在初始化模型...'
|
||||
this.debugInfo = '检查模型缓存...'
|
||||
|
||||
try {
|
||||
// 先检查是否有已保存的模型路径
|
||||
const savedModelPath = uni.getStorageSync('vosk_model_path')
|
||||
console.log('[Speech] 已保存的模型路径:', savedModelPath)
|
||||
|
||||
if (savedModelPath) {
|
||||
this.debugInfo = '加载已解压模型: ' + savedModelPath.substring(savedModelPath.length - 30)
|
||||
this.statusText = '正在加载已解压模型...'
|
||||
|
||||
initVoskModel({
|
||||
modelPath: savedModelPath,
|
||||
zipModelPath: ''
|
||||
}, (result) => {
|
||||
console.log('[Speech] 加载已保存模型结果:', JSON.stringify(result))
|
||||
if (result && result.data && result.data.modelPath) {
|
||||
this.modelPath = result.data.modelPath
|
||||
this.isReady = true
|
||||
this.isLoading = false
|
||||
this.statusText = '准备就绪,可以开始说话'
|
||||
this.debugInfo = '模型已加载: ' + this.modelPath.substring(this.modelPath.length - 20)
|
||||
console.log('[Speech] 模型加载成功:', this.modelPath)
|
||||
} else {
|
||||
console.log('[Speech] 已保存模型加载失败,尝试从静态资源加载')
|
||||
// 清除无效的缓存
|
||||
uni.removeStorageSync('vosk_model_path')
|
||||
this.initFromStatic()
|
||||
}
|
||||
})
|
||||
} else {
|
||||
this.initFromStatic()
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Speech] 初始化错误:', error)
|
||||
this.isLoading = false
|
||||
this.statusText = '初始化失败'
|
||||
this.debugInfo = '错误: ' + (error.message || JSON.stringify(error))
|
||||
uni.showToast({ title: '模型初始化失败', icon: 'none' })
|
||||
}
|
||||
},
|
||||
|
||||
initFromStatic() {
|
||||
console.log('[Speech] ========== 从静态资源加载模型 ==========')
|
||||
this.statusText = '正在解压模型文件...'
|
||||
this.debugInfo = '首次加载,正在解压模型(约需30秒)...'
|
||||
|
||||
try {
|
||||
// 模型文件路径
|
||||
const staticZipPath = '/static/vosk-model-small-cn-0.22.zip'
|
||||
let resolvedPath = staticZipPath
|
||||
|
||||
// 转换路径
|
||||
if (typeof plus !== 'undefined' && plus.io && typeof plus.io.convertLocalFileSystemURL === 'function') {
|
||||
resolvedPath = plus.io.convertLocalFileSystemURL(staticZipPath)
|
||||
console.log('[Speech] 转换后的路径:', resolvedPath)
|
||||
}
|
||||
|
||||
this.debugInfo = '模型路径: ' + resolvedPath
|
||||
|
||||
initVoskModel({
|
||||
zipModelPath: resolvedPath
|
||||
}, (result) => {
|
||||
console.log('[Speech] 静态资源加载结果:', JSON.stringify(result))
|
||||
|
||||
if (result && result.data && result.data.modelPath) {
|
||||
this.modelPath = result.data.modelPath
|
||||
// 保存模型路径,下次直接加载
|
||||
uni.setStorageSync('vosk_model_path', result.data.modelPath)
|
||||
this.isReady = true
|
||||
this.isLoading = false
|
||||
this.statusText = '准备就绪,可以开始说话'
|
||||
this.debugInfo = '模型解压成功'
|
||||
console.log('[Speech] 模型解压成功:', this.modelPath)
|
||||
uni.showToast({ title: '模型加载成功', icon: 'success', duration: 2000 })
|
||||
} else {
|
||||
console.error('[Speech] 模型加载失败,结果:', result)
|
||||
this.isLoading = false
|
||||
this.statusText = '模型加载失败'
|
||||
this.debugInfo = '加载失败: ' + JSON.stringify(result)
|
||||
uni.showToast({ title: '模型加载失败,请检查模型文件', icon: 'none', duration: 3000 })
|
||||
}
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('[Speech] 加载静态资源失败:', error)
|
||||
this.isLoading = false
|
||||
this.statusText = '模型加载失败'
|
||||
this.debugInfo = '异常: ' + (error.message || JSON.stringify(error))
|
||||
}
|
||||
},
|
||||
|
||||
handleStart() {
|
||||
async handleStart() {
|
||||
if (!this.isReady) {
|
||||
uni.showToast({ title: '未准备好,请稍候', icon: 'none' })
|
||||
return
|
||||
|
|
@ -467,19 +426,46 @@ export default {
|
|||
return
|
||||
}
|
||||
|
||||
// 新方案:使用uni原生录音
|
||||
console.log('[Speech] 开始录音')
|
||||
if (!this.selectedContent) {
|
||||
uni.showToast({ title: '请先选择题目', icon: 'none' })
|
||||
return
|
||||
}
|
||||
|
||||
// 启动录音
|
||||
this.isRecording = true
|
||||
this.recordStartTime = Date.now() // 记录开始时间
|
||||
console.log('[Speech] 录音开始时间:', this.recordStartTime)
|
||||
this.statusText = '正在录音...'
|
||||
this.recognizedText = ''
|
||||
this.scoreResult = null
|
||||
this.hasFirstResult = false
|
||||
this.debugInfo = '录音中,请朗读...'
|
||||
this.debugInfo = '最佳时长:3-10秒'
|
||||
|
||||
speechRecorder.start({
|
||||
duration: 60000,
|
||||
sampleRate: 16000
|
||||
try {
|
||||
speechRecorder.start()
|
||||
uni.showToast({ title: '开始录音!请大声说话(推荐3-10秒)', icon: 'none', duration: 2500 })
|
||||
|
||||
// 3秒后提示可以停止
|
||||
setTimeout(() => {
|
||||
if (this.isRecording) {
|
||||
uni.showToast({ title: '可以停止了(最佳时长3-10秒)', icon: 'success', duration: 1500 })
|
||||
}
|
||||
}, 3000)
|
||||
|
||||
// 10秒后提示不要太长
|
||||
setTimeout(() => {
|
||||
if (this.isRecording) {
|
||||
uni.showToast({ title: '建议尽快停止,避免过长', icon: 'none', duration: 2000 })
|
||||
}
|
||||
}, 10000)
|
||||
} catch (error) {
|
||||
console.error('[Speech] 录音启动失败:', error)
|
||||
this.isRecording = false
|
||||
this.statusText = '录音启动失败'
|
||||
uni.showToast({
|
||||
title: '录音失败: ' + error.message,
|
||||
icon: 'none'
|
||||
})
|
||||
}
|
||||
},
|
||||
|
||||
async handleStop() {
|
||||
|
|
@ -490,11 +476,110 @@ export default {
|
|||
return
|
||||
}
|
||||
|
||||
// 检查录音时长(百度API最佳识别范围:2-10秒)
|
||||
const now = Date.now()
|
||||
const recordDuration = (now - this.recordStartTime) / 1000
|
||||
console.log('[Speech] 当前时间:', now)
|
||||
console.log('[Speech] 开始时间:', this.recordStartTime)
|
||||
console.log('[Speech] 录音时长:', recordDuration, '秒')
|
||||
|
||||
// 太短(<1.5秒)
|
||||
if (!this.recordStartTime || recordDuration < 1.5) {
|
||||
this.isRecording = true // 继续录音状态
|
||||
uni.showModal({
|
||||
title: '录音时长不够',
|
||||
content: `当前只录了${recordDuration.toFixed(1)}秒,请继续说话至少2秒!`,
|
||||
showCancel: false,
|
||||
confirmText: '继续录音'
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 太长(>15秒)- 提示但允许继续
|
||||
if (recordDuration > 15) {
|
||||
uni.showModal({
|
||||
title: '录音时长过长',
|
||||
content: `已录${recordDuration.toFixed(1)}秒。百度语音识别最佳时长为3-10秒,过长可能只识别部分内容。是否继续?`,
|
||||
cancelText: '继续录音',
|
||||
confirmText: '立即识别',
|
||||
success: (res) => {
|
||||
if (!res.confirm) {
|
||||
this.isRecording = true // 继续录音
|
||||
} else {
|
||||
// 用户选择识别,继续执行
|
||||
this.processSpeech()
|
||||
}
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 通过检查,执行识别
|
||||
this.processSpeech()
|
||||
},
|
||||
|
||||
async processSpeech() {
|
||||
const actualDuration = (Date.now() - this.recordStartTime) / 1000
|
||||
console.log('[Speech] 实际录音时长:', actualDuration, '秒')
|
||||
|
||||
this.isRecording = false
|
||||
uni.showLoading({ title: '评测中...', mask: true })
|
||||
uni.showLoading({ title: `正在处理(${actualDuration.toFixed(1)}秒)...`, mask: true })
|
||||
|
||||
try {
|
||||
// 停止录音并上传评测
|
||||
// 1. 先停止录音,获取录音文件路径
|
||||
console.log('[Speech] 停止录音中...')
|
||||
const filePath = await speechRecorder.stop()
|
||||
console.log('[Speech] 录音文件路径:', filePath)
|
||||
console.log('[Speech] 点击停止时的时长:', actualDuration, '秒')
|
||||
|
||||
if (!filePath) {
|
||||
throw new Error('录音文件获取失败')
|
||||
}
|
||||
|
||||
// 验证文件大小(关键!检测录音是否完整)
|
||||
const fileInfo = await new Promise((resolve, reject) => {
|
||||
uni.getFileInfo({
|
||||
filePath: filePath,
|
||||
success: resolve,
|
||||
fail: reject
|
||||
})
|
||||
})
|
||||
|
||||
const fileSize = fileInfo.size
|
||||
const expectedSize = actualDuration * 32000 // 16kHz单声道16bit = 32000 bytes/s
|
||||
const sizeRatio = fileSize / expectedSize
|
||||
|
||||
console.log('[Speech] 文件大小:', fileSize, 'bytes')
|
||||
console.log('[Speech] 预期大小:', expectedSize.toFixed(0), 'bytes')
|
||||
console.log('[Speech] 完整度:', (sizeRatio * 100).toFixed(1), '%')
|
||||
|
||||
// 如果文件大小 < 预期的30%,说明严重丢失数据
|
||||
if (sizeRatio < 0.3) {
|
||||
uni.hideLoading()
|
||||
uni.showModal({
|
||||
title: '录音文件不完整',
|
||||
content: `录音${actualDuration.toFixed(1)}秒,但文件只有${(fileSize/32000).toFixed(1)}秒。您的设备可能不支持此录音方式。\n\n建议:\n1. 重试并说慢一点\n2. 录音时长控制在3-5秒\n3. 或使用手动输入`,
|
||||
showCancel: true,
|
||||
cancelText: '重试',
|
||||
confirmText: '手动输入',
|
||||
success: (res) => {
|
||||
if (res.confirm) {
|
||||
this.showManualInput()
|
||||
}
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// 如果文件大小30%-70%,警告但继续
|
||||
if (sizeRatio < 0.7) {
|
||||
console.warn('[Speech] 警告:录音文件可能不完整,完整度:', (sizeRatio * 100).toFixed(1), '%')
|
||||
}
|
||||
|
||||
// 2. 上传并评测
|
||||
uni.showLoading({ title: '评测中...', mask: true })
|
||||
console.log('[Speech] 开始评测...')
|
||||
|
||||
const result = await speechRecorder.evaluateAsync(
|
||||
this.selectedContent?.content || '测试文本',
|
||||
this.selectedContent?.id
|
||||
|
|
@ -512,18 +597,59 @@ export default {
|
|||
this.hasFirstResult = true
|
||||
this.statusText = '评测完成'
|
||||
this.debugInfo = `得分:${result.score}分`
|
||||
this.recordingFailCount = 0 // 成功后重置失败计数
|
||||
uni.showToast({ title: `得分:${result.score}分`, icon: 'success' })
|
||||
} else {
|
||||
this.statusText = '评测失败'
|
||||
this.debugInfo = result.error || '评测失败'
|
||||
this.recordingFailCount++
|
||||
|
||||
// 连续失败2次,建议手动输入
|
||||
if (this.recordingFailCount >= 2) {
|
||||
setTimeout(() => {
|
||||
uni.showModal({
|
||||
title: '录音功能异常',
|
||||
content: '您的设备录音功能可能不兼容,已连续失败' + this.recordingFailCount + '次。\n\n建议使用手动输入功能完成练习。',
|
||||
cancelText: '重试',
|
||||
confirmText: '手动输入',
|
||||
success: (res) => {
|
||||
if (res.confirm) {
|
||||
this.showManualInput()
|
||||
} else {
|
||||
this.recordingFailCount = 0 // 用户选择重试,重置计数
|
||||
}
|
||||
}
|
||||
})
|
||||
}, 500)
|
||||
} else {
|
||||
uni.showToast({ title: '评测失败', icon: 'none' })
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
uni.hideLoading()
|
||||
console.error('[Speech] 评测错误:', error)
|
||||
this.statusText = '评测失败'
|
||||
this.debugInfo = error.message
|
||||
uni.showToast({ title: '评测失败', icon: 'none' })
|
||||
this.debugInfo = error.message || '未知错误'
|
||||
this.recordingFailCount++
|
||||
|
||||
// 连续失败2次,建议手动输入
|
||||
if (this.recordingFailCount >= 2) {
|
||||
uni.showModal({
|
||||
title: '录音功能异常',
|
||||
content: '您的设备录音功能可能不兼容,已连续失败' + this.recordingFailCount + '次。\n\n建议使用手动输入功能完成练习。',
|
||||
cancelText: '重试',
|
||||
confirmText: '手动输入',
|
||||
success: (res) => {
|
||||
if (res.confirm) {
|
||||
this.showManualInput()
|
||||
} else {
|
||||
this.recordingFailCount = 0 // 用户选择重试,重置计数
|
||||
}
|
||||
}
|
||||
})
|
||||
} else {
|
||||
uni.showToast({ title: '评测失败: ' + error.message, icon: 'none', duration: 3000 })
|
||||
}
|
||||
}
|
||||
},
|
||||
// #endif
|
||||
|
|
@ -533,15 +659,18 @@ export default {
|
|||
uni.showToast({ title: '语音识别仅支持APP端', icon: 'none' })
|
||||
},
|
||||
handleStop() {},
|
||||
initSpeechModel() {
|
||||
this.statusText = '语音识别仅支持APP端'
|
||||
},
|
||||
// #endif
|
||||
|
||||
// 手动输入文本
|
||||
showManualInput() {
|
||||
if (!this.selectedContent) {
|
||||
uni.showToast({ title: '请先选择题目', icon: 'none' })
|
||||
return
|
||||
}
|
||||
|
||||
uni.showModal({
|
||||
title: '手动输入识别文本',
|
||||
content: this.selectedContent.content || '',
|
||||
editable: true,
|
||||
placeholderText: '请输入您要朗读的内容',
|
||||
success: (res) => {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
* 支持内网环境,录音后上传到服务器进行识别
|
||||
*/
|
||||
|
||||
import config from './config.js'
|
||||
|
||||
class SpeechRecorder {
|
||||
constructor() {
|
||||
this.recorderManager = null
|
||||
|
|
@ -25,9 +27,26 @@ class SpeechRecorder {
|
|||
|
||||
// 录音结束监听
|
||||
this.recorderManager.onStop((res) => {
|
||||
console.log('[录音] 录音结束', res)
|
||||
console.log('[录音] 录音结束,详细信息:', {
|
||||
tempFilePath: res.tempFilePath,
|
||||
duration: res.duration || '未知',
|
||||
fileSize: res.fileSize || '未知'
|
||||
})
|
||||
this.isRecording = false
|
||||
this.tempFilePath = res.tempFilePath
|
||||
|
||||
// 验证文件是否存在(异步,不阻塞)
|
||||
if (res.tempFilePath) {
|
||||
uni.getFileInfo({
|
||||
filePath: res.tempFilePath,
|
||||
success: (fileInfo) => {
|
||||
console.log('[录音] 文件验证成功,大小:', fileInfo.size, 'bytes')
|
||||
},
|
||||
fail: (err) => {
|
||||
console.error('[录音] 文件验证失败:', err)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// 录音错误监听
|
||||
|
|
@ -52,10 +71,11 @@ class SpeechRecorder {
|
|||
|
||||
const defaultOptions = {
|
||||
duration: 60000, // 最长录音时间(毫秒)
|
||||
sampleRate: 16000, // 采样率
|
||||
numberOfChannels: 1, // 声道数
|
||||
encodeBitRate: 96000, // 编码码率
|
||||
format: 'mp3' // 音频格式
|
||||
sampleRate: 16000, // 采样率(百度API推荐)
|
||||
numberOfChannels: 1, // 声道数(单声道)
|
||||
encodeBitRate: 48000, // 编码码率
|
||||
format: 'wav', // 音频格式(WAV格式,百度API完美支持)
|
||||
frameSize: 50 // 指定帧大小,增加缓冲(避免数据丢失)
|
||||
}
|
||||
|
||||
const config = { ...defaultOptions, ...options }
|
||||
|
|
@ -72,6 +92,7 @@ class SpeechRecorder {
|
|||
if (!this.isRecording) {
|
||||
// 如果已经停止了,但有临时文件,返回该文件
|
||||
if (this.tempFilePath) {
|
||||
console.log('[录音] 使用已有的录音文件:', this.tempFilePath)
|
||||
resolve(this.tempFilePath)
|
||||
} else {
|
||||
reject(new Error('未在录音中'))
|
||||
|
|
@ -79,15 +100,37 @@ class SpeechRecorder {
|
|||
return
|
||||
}
|
||||
|
||||
// 注册一次性监听器
|
||||
const onStopHandler = (res) => {
|
||||
this.tempFilePath = res.tempFilePath
|
||||
this.isRecording = false
|
||||
resolve(res.tempFilePath)
|
||||
}
|
||||
console.log('[录音] 准备停止录音...')
|
||||
console.log('[录音] 录音状态:', this.isRecording)
|
||||
|
||||
this.recorderManager.onStop(onStopHandler)
|
||||
// 先停止录音
|
||||
this.recorderManager.stop()
|
||||
|
||||
// 等待800ms让音频缓冲完全写入(增加延迟,解决数据丢失)
|
||||
console.log('[录音] 等待音频缓冲写入...')
|
||||
setTimeout(() => {
|
||||
// 等待录音停止完成
|
||||
const timeout = setTimeout(() => {
|
||||
console.error('[录音] 停止录音超时!')
|
||||
reject(new Error('停止录音超时'))
|
||||
}, 8000)
|
||||
|
||||
// 等待onStop事件(已在init中注册)
|
||||
const checkInterval = setInterval(() => {
|
||||
if (!this.isRecording && this.tempFilePath) {
|
||||
clearTimeout(timeout)
|
||||
clearInterval(checkInterval)
|
||||
console.log('[录音] 停止成功,文件路径:', this.tempFilePath)
|
||||
|
||||
// 再等待500ms确保文件完全写入磁盘(增加延迟)
|
||||
console.log('[录音] 等待文件完全保存...')
|
||||
setTimeout(() => {
|
||||
console.log('[录音] 文件已完全保存,准备返回')
|
||||
resolve(this.tempFilePath)
|
||||
}, 500)
|
||||
}
|
||||
}, 100)
|
||||
}, 800)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -99,22 +142,28 @@ class SpeechRecorder {
|
|||
*/
|
||||
uploadAndRecognize(filePath, params = {}) {
|
||||
return new Promise((resolve, reject) => {
|
||||
// 获取服务器配置
|
||||
const config = require('./config.js').default
|
||||
// 使用导入的服务器配置
|
||||
// 开发环境可以用 localhost:5000 测试
|
||||
// const serverUrl = 'http://localhost:5000' // Windows本地测试
|
||||
const serverUrl = config.API_BASE_URL
|
||||
|
||||
console.log('[上传] 开始上传录音文件')
|
||||
console.log('[上传] 服务器地址:', serverUrl)
|
||||
console.log('[上传] 文件路径:', filePath)
|
||||
console.log('[上传] 参数:', params)
|
||||
|
||||
uni.uploadFile({
|
||||
url: `${serverUrl}/api/speech/recognize`,
|
||||
filePath: filePath,
|
||||
name: 'audio',
|
||||
formData: {
|
||||
...params,
|
||||
format: 'mp3',
|
||||
format: 'wav', // 匹配录音格式
|
||||
sampleRate: 16000
|
||||
},
|
||||
success: (uploadRes) => {
|
||||
console.log('[上传] 上传成功,状态码:', uploadRes.statusCode)
|
||||
console.log('[上传] 响应数据:', uploadRes.data)
|
||||
if (uploadRes.statusCode === 200) {
|
||||
try {
|
||||
const result = JSON.parse(uploadRes.data)
|
||||
|
|
@ -124,13 +173,16 @@ class SpeechRecorder {
|
|||
reject(new Error(result.msg || '识别失败'))
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[上传] 解析响应失败:', e)
|
||||
reject(new Error('解析结果失败'))
|
||||
}
|
||||
} else {
|
||||
console.error('[上传] HTTP状态码错误:', uploadRes.statusCode)
|
||||
reject(new Error('上传失败'))
|
||||
}
|
||||
},
|
||||
fail: (err) => {
|
||||
console.error('[上传] 上传请求失败:', err)
|
||||
reject(err)
|
||||
}
|
||||
})
|
||||
|
|
|
|||
135
simple_speech_server.py
Normal file
135
simple_speech_server.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
简化版语音服务 - 无需PaddleSpeech
|
||||
仅做文本对比评分,先让APP能用起来
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import os
|
||||
import time
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
def calculate_similarity(text1, text2):
|
||||
"""计算文本相似度"""
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
text1 = ''.join(filter(str.isalnum, text1))
|
||||
text2 = ''.join(filter(str.isalnum, text2))
|
||||
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
similarity = SequenceMatcher(None, text1, text2).ratio()
|
||||
return round(similarity * 100, 2)
|
||||
|
||||
@app.route('/api/speech/recognize', methods=['POST'])
|
||||
def recognize():
|
||||
"""
|
||||
临时方案:让用户手动输入识别文本
|
||||
后续接入真实语音识别
|
||||
"""
|
||||
try:
|
||||
# 检查是否有音频文件(暂时忽略,不处理)
|
||||
if 'audio' in request.files:
|
||||
audio_file = request.files['audio']
|
||||
# 保存音频以便后续处理
|
||||
temp_dir = './temp_audio'
|
||||
if not os.path.exists(temp_dir):
|
||||
os.makedirs(temp_dir)
|
||||
|
||||
timestamp = str(int(time.time() * 1000))
|
||||
temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
|
||||
audio_file.save(temp_path)
|
||||
print(f"已保存音频: {temp_path}")
|
||||
|
||||
reference_text = request.form.get('referenceText', '')
|
||||
|
||||
# 模拟识别:返回提示让用户手动输入
|
||||
# 实际应用中,这里应该调用语音识别
|
||||
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '音频已接收,请手动输入识别文本进行评分',
|
||||
'data': {
|
||||
'recognizedText': '', # 空的,让前端手动输入
|
||||
'score': 0,
|
||||
'pronunciationScore': 0,
|
||||
'fluencyScore': 0,
|
||||
'status': 'completed',
|
||||
'needManualInput': True # 标记需要手动输入
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"错误: {str(e)}")
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'处理失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/speech/evaluate', methods=['POST'])
|
||||
def evaluate():
|
||||
"""
|
||||
评测接口:对比用户输入和标准文本
|
||||
"""
|
||||
try:
|
||||
user_text = request.form.get('userText', '')
|
||||
reference_text = request.form.get('referenceText', '')
|
||||
|
||||
if not user_text:
|
||||
return jsonify({
|
||||
'code': 400,
|
||||
'msg': '缺少用户输入文本'
|
||||
}), 400
|
||||
|
||||
# 计算相似度
|
||||
score = calculate_similarity(user_text, reference_text)
|
||||
pronunciation_score = max(0, score - 5)
|
||||
fluency_score = max(0, score - 3)
|
||||
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '评测成功',
|
||||
'data': {
|
||||
'recognizedText': user_text,
|
||||
'score': score,
|
||||
'pronunciationScore': pronunciation_score,
|
||||
'fluencyScore': fluency_score,
|
||||
'status': 'completed'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'评测失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/speech/health', methods=['GET'])
|
||||
def health():
|
||||
"""健康检查"""
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '服务正常(简化版)',
|
||||
'data': {
|
||||
'version': 'simple',
|
||||
'speech_recognition': False
|
||||
}
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 50)
|
||||
print("简化版语音服务")
|
||||
print("说明:音频接收后需手动输入识别文本")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
print("服务启动在: http://localhost:5000")
|
||||
print("")
|
||||
|
||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||
167
speech_server.py
Normal file
167
speech_server.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
PaddleSpeech 语音识别服务
|
||||
Windows 版本
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import os
|
||||
import time
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app) # 允许跨域
|
||||
|
||||
# 全局变量
|
||||
asr_model = None
|
||||
model_loaded = False
|
||||
|
||||
def init_model():
|
||||
"""初始化语音识别模型"""
|
||||
global asr_model, model_loaded
|
||||
|
||||
print("正在加载 PaddleSpeech 模型...")
|
||||
try:
|
||||
from paddlespeech.cli.asr.infer import ASRExecutor
|
||||
|
||||
asr_model = ASRExecutor()
|
||||
model_loaded = True
|
||||
print("✓ 模型加载成功!")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"✗ 模型加载失败: {str(e)}")
|
||||
print("提示:首次运行会自动下载模型,需要等待...")
|
||||
model_loaded = False
|
||||
return False
|
||||
|
||||
def calculate_similarity(text1, text2):
|
||||
"""计算文本相似度(0-100分)"""
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
# 去除空格和标点
|
||||
text1 = ''.join(filter(str.isalnum, text1))
|
||||
text2 = ''.join(filter(str.isalnum, text2))
|
||||
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
# 计算相似度
|
||||
similarity = SequenceMatcher(None, text1, text2).ratio()
|
||||
return round(similarity * 100, 2)
|
||||
|
||||
@app.route('/api/speech/recognize', methods=['POST'])
|
||||
def recognize():
|
||||
"""语音识别接口"""
|
||||
try:
|
||||
# 检查模型是否加载
|
||||
if not model_loaded:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': '模型未加载,请稍后重试'
|
||||
}), 500
|
||||
|
||||
# 检查文件
|
||||
if 'audio' not in request.files:
|
||||
return jsonify({
|
||||
'code': 400,
|
||||
'msg': '未上传音频文件'
|
||||
}), 400
|
||||
|
||||
audio_file = request.files['audio']
|
||||
reference_text = request.form.get('referenceText', '')
|
||||
|
||||
# 保存临时文件
|
||||
temp_dir = './temp_audio'
|
||||
if not os.path.exists(temp_dir):
|
||||
os.makedirs(temp_dir)
|
||||
|
||||
timestamp = str(int(time.time() * 1000))
|
||||
temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
|
||||
audio_file.save(temp_path)
|
||||
|
||||
print(f"收到音频文件: {temp_path}")
|
||||
print(f"参考文本: {reference_text}")
|
||||
|
||||
# 识别音频
|
||||
try:
|
||||
result = asr_model(audio_input=temp_path, force_yes=True)
|
||||
recognized_text = result if isinstance(result, str) else str(result)
|
||||
print(f"识别结果: {recognized_text}")
|
||||
|
||||
# 计算相似度
|
||||
score = calculate_similarity(recognized_text, reference_text)
|
||||
|
||||
# 简单的发音和流利度评分(可以后续优化)
|
||||
pronunciation_score = max(0, score - 5)
|
||||
fluency_score = max(0, score - 3)
|
||||
|
||||
# 删除临时文件
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '成功',
|
||||
'data': {
|
||||
'recognizedText': recognized_text,
|
||||
'score': score,
|
||||
'pronunciationScore': pronunciation_score,
|
||||
'fluencyScore': fluency_score,
|
||||
'status': 'completed'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"识别失败: {str(e)}")
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'识别失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理错误: {str(e)}")
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'处理失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/speech/health', methods=['GET'])
|
||||
def health():
|
||||
"""健康检查"""
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '服务正常',
|
||||
'data': {
|
||||
'model_loaded': model_loaded
|
||||
}
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 50)
|
||||
print("PaddleSpeech 语音识别服务")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
|
||||
# 初始化模型(首次会下载,需要时间)
|
||||
init_model()
|
||||
|
||||
print("")
|
||||
print("=" * 50)
|
||||
print("服务启动成功!")
|
||||
print("访问地址: http://localhost:5000")
|
||||
print("健康检查: http://localhost:5000/api/speech/health")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
|
||||
# 启动服务
|
||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||
6
vosk-model-small-cn-0.22/README
Normal file
6
vosk-model-small-cn-0.22/README
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
Chinese Vosk model for mobile
|
||||
|
||||
CER results
|
||||
|
||||
23.54% speechio_02
|
||||
38.29% speechio_06
|
||||
BIN
vosk-model-small-cn-0.22/am/final.mdl
Normal file
BIN
vosk-model-small-cn-0.22/am/final.mdl
Normal file
Binary file not shown.
8
vosk-model-small-cn-0.22/conf/mfcc.conf
Normal file
8
vosk-model-small-cn-0.22/conf/mfcc.conf
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
--use-energy=false
|
||||
--sample-frequency=16000
|
||||
--num-mel-bins=40
|
||||
--num-ceps=40
|
||||
--low-freq=40
|
||||
--high-freq=-200
|
||||
--allow-upsample=true
|
||||
--allow-downsample=true
|
||||
10
vosk-model-small-cn-0.22/conf/model.conf
Normal file
10
vosk-model-small-cn-0.22/conf/model.conf
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
--min-active=200
|
||||
--max-active=5000
|
||||
--beam=12.0
|
||||
--lattice-beam=4.0
|
||||
--acoustic-scale=1.0
|
||||
--frame-subsampling-factor=3
|
||||
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
|
||||
--endpoint.rule2.min-trailing-silence=0.5
|
||||
--endpoint.rule3.min-trailing-silence=1.0
|
||||
--endpoint.rule4.min-trailing-silence=2.0
|
||||
BIN
vosk-model-small-cn-0.22/graph/Gr.fst
Normal file
BIN
vosk-model-small-cn-0.22/graph/Gr.fst
Normal file
Binary file not shown.
BIN
vosk-model-small-cn-0.22/graph/HCLr.fst
Normal file
BIN
vosk-model-small-cn-0.22/graph/HCLr.fst
Normal file
Binary file not shown.
39
vosk-model-small-cn-0.22/graph/disambig_tid.int
Normal file
39
vosk-model-small-cn-0.22/graph/disambig_tid.int
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
11845
|
||||
11846
|
||||
11847
|
||||
11848
|
||||
11849
|
||||
11850
|
||||
11851
|
||||
11852
|
||||
11853
|
||||
11854
|
||||
11855
|
||||
11856
|
||||
11857
|
||||
11858
|
||||
11859
|
||||
11860
|
||||
11861
|
||||
11862
|
||||
11863
|
||||
11864
|
||||
11865
|
||||
11866
|
||||
11867
|
||||
11868
|
||||
11869
|
||||
11870
|
||||
11871
|
||||
11872
|
||||
11873
|
||||
11874
|
||||
11875
|
||||
11876
|
||||
11877
|
||||
11878
|
||||
11879
|
||||
11880
|
||||
11881
|
||||
11882
|
||||
11883
|
||||
646
vosk-model-small-cn-0.22/graph/phones/word_boundary.int
Normal file
646
vosk-model-small-cn-0.22/graph/phones/word_boundary.int
Normal file
|
|
@ -0,0 +1,646 @@
|
|||
1 nonword
|
||||
2 begin
|
||||
3 end
|
||||
4 internal
|
||||
5 singleton
|
||||
6 nonword
|
||||
7 begin
|
||||
8 end
|
||||
9 internal
|
||||
10 singleton
|
||||
11 begin
|
||||
12 end
|
||||
13 internal
|
||||
14 singleton
|
||||
15 begin
|
||||
16 end
|
||||
17 internal
|
||||
18 singleton
|
||||
19 begin
|
||||
20 end
|
||||
21 internal
|
||||
22 singleton
|
||||
23 begin
|
||||
24 end
|
||||
25 internal
|
||||
26 singleton
|
||||
27 begin
|
||||
28 end
|
||||
29 internal
|
||||
30 singleton
|
||||
31 begin
|
||||
32 end
|
||||
33 internal
|
||||
34 singleton
|
||||
35 begin
|
||||
36 end
|
||||
37 internal
|
||||
38 singleton
|
||||
39 begin
|
||||
40 end
|
||||
41 internal
|
||||
42 singleton
|
||||
43 begin
|
||||
44 end
|
||||
45 internal
|
||||
46 singleton
|
||||
47 begin
|
||||
48 end
|
||||
49 internal
|
||||
50 singleton
|
||||
51 begin
|
||||
52 end
|
||||
53 internal
|
||||
54 singleton
|
||||
55 begin
|
||||
56 end
|
||||
57 internal
|
||||
58 singleton
|
||||
59 begin
|
||||
60 end
|
||||
61 internal
|
||||
62 singleton
|
||||
63 begin
|
||||
64 end
|
||||
65 internal
|
||||
66 singleton
|
||||
67 begin
|
||||
68 end
|
||||
69 internal
|
||||
70 singleton
|
||||
71 begin
|
||||
72 end
|
||||
73 internal
|
||||
74 singleton
|
||||
75 begin
|
||||
76 end
|
||||
77 internal
|
||||
78 singleton
|
||||
79 begin
|
||||
80 end
|
||||
81 internal
|
||||
82 singleton
|
||||
83 begin
|
||||
84 end
|
||||
85 internal
|
||||
86 singleton
|
||||
87 begin
|
||||
88 end
|
||||
89 internal
|
||||
90 singleton
|
||||
91 begin
|
||||
92 end
|
||||
93 internal
|
||||
94 singleton
|
||||
95 begin
|
||||
96 end
|
||||
97 internal
|
||||
98 singleton
|
||||
99 begin
|
||||
100 end
|
||||
101 internal
|
||||
102 singleton
|
||||
103 begin
|
||||
104 end
|
||||
105 internal
|
||||
106 singleton
|
||||
107 begin
|
||||
108 end
|
||||
109 internal
|
||||
110 singleton
|
||||
111 begin
|
||||
112 end
|
||||
113 internal
|
||||
114 singleton
|
||||
115 begin
|
||||
116 end
|
||||
117 internal
|
||||
118 singleton
|
||||
119 begin
|
||||
120 end
|
||||
121 internal
|
||||
122 singleton
|
||||
123 begin
|
||||
124 end
|
||||
125 internal
|
||||
126 singleton
|
||||
127 begin
|
||||
128 end
|
||||
129 internal
|
||||
130 singleton
|
||||
131 begin
|
||||
132 end
|
||||
133 internal
|
||||
134 singleton
|
||||
135 begin
|
||||
136 end
|
||||
137 internal
|
||||
138 singleton
|
||||
139 begin
|
||||
140 end
|
||||
141 internal
|
||||
142 singleton
|
||||
143 begin
|
||||
144 end
|
||||
145 internal
|
||||
146 singleton
|
||||
147 begin
|
||||
148 end
|
||||
149 internal
|
||||
150 singleton
|
||||
151 begin
|
||||
152 end
|
||||
153 internal
|
||||
154 singleton
|
||||
155 begin
|
||||
156 end
|
||||
157 internal
|
||||
158 singleton
|
||||
159 begin
|
||||
160 end
|
||||
161 internal
|
||||
162 singleton
|
||||
163 begin
|
||||
164 end
|
||||
165 internal
|
||||
166 singleton
|
||||
167 begin
|
||||
168 end
|
||||
169 internal
|
||||
170 singleton
|
||||
171 begin
|
||||
172 end
|
||||
173 internal
|
||||
174 singleton
|
||||
175 begin
|
||||
176 end
|
||||
177 internal
|
||||
178 singleton
|
||||
179 begin
|
||||
180 end
|
||||
181 internal
|
||||
182 singleton
|
||||
183 begin
|
||||
184 end
|
||||
185 internal
|
||||
186 singleton
|
||||
187 begin
|
||||
188 end
|
||||
189 internal
|
||||
190 singleton
|
||||
191 begin
|
||||
192 end
|
||||
193 internal
|
||||
194 singleton
|
||||
195 begin
|
||||
196 end
|
||||
197 internal
|
||||
198 singleton
|
||||
199 begin
|
||||
200 end
|
||||
201 internal
|
||||
202 singleton
|
||||
203 begin
|
||||
204 end
|
||||
205 internal
|
||||
206 singleton
|
||||
207 begin
|
||||
208 end
|
||||
209 internal
|
||||
210 singleton
|
||||
211 begin
|
||||
212 end
|
||||
213 internal
|
||||
214 singleton
|
||||
215 begin
|
||||
216 end
|
||||
217 internal
|
||||
218 singleton
|
||||
219 begin
|
||||
220 end
|
||||
221 internal
|
||||
222 singleton
|
||||
223 begin
|
||||
224 end
|
||||
225 internal
|
||||
226 singleton
|
||||
227 begin
|
||||
228 end
|
||||
229 internal
|
||||
230 singleton
|
||||
231 begin
|
||||
232 end
|
||||
233 internal
|
||||
234 singleton
|
||||
235 begin
|
||||
236 end
|
||||
237 internal
|
||||
238 singleton
|
||||
239 begin
|
||||
240 end
|
||||
241 internal
|
||||
242 singleton
|
||||
243 begin
|
||||
244 end
|
||||
245 internal
|
||||
246 singleton
|
||||
247 begin
|
||||
248 end
|
||||
249 internal
|
||||
250 singleton
|
||||
251 begin
|
||||
252 end
|
||||
253 internal
|
||||
254 singleton
|
||||
255 begin
|
||||
256 end
|
||||
257 internal
|
||||
258 singleton
|
||||
259 begin
|
||||
260 end
|
||||
261 internal
|
||||
262 singleton
|
||||
263 begin
|
||||
264 end
|
||||
265 internal
|
||||
266 singleton
|
||||
267 begin
|
||||
268 end
|
||||
269 internal
|
||||
270 singleton
|
||||
271 begin
|
||||
272 end
|
||||
273 internal
|
||||
274 singleton
|
||||
275 begin
|
||||
276 end
|
||||
277 internal
|
||||
278 singleton
|
||||
279 begin
|
||||
280 end
|
||||
281 internal
|
||||
282 singleton
|
||||
283 begin
|
||||
284 end
|
||||
285 internal
|
||||
286 singleton
|
||||
287 begin
|
||||
288 end
|
||||
289 internal
|
||||
290 singleton
|
||||
291 begin
|
||||
292 end
|
||||
293 internal
|
||||
294 singleton
|
||||
295 begin
|
||||
296 end
|
||||
297 internal
|
||||
298 singleton
|
||||
299 begin
|
||||
300 end
|
||||
301 internal
|
||||
302 singleton
|
||||
303 begin
|
||||
304 end
|
||||
305 internal
|
||||
306 singleton
|
||||
307 begin
|
||||
308 end
|
||||
309 internal
|
||||
310 singleton
|
||||
311 begin
|
||||
312 end
|
||||
313 internal
|
||||
314 singleton
|
||||
315 begin
|
||||
316 end
|
||||
317 internal
|
||||
318 singleton
|
||||
319 begin
|
||||
320 end
|
||||
321 internal
|
||||
322 singleton
|
||||
323 begin
|
||||
324 end
|
||||
325 internal
|
||||
326 singleton
|
||||
327 begin
|
||||
328 end
|
||||
329 internal
|
||||
330 singleton
|
||||
331 begin
|
||||
332 end
|
||||
333 internal
|
||||
334 singleton
|
||||
335 begin
|
||||
336 end
|
||||
337 internal
|
||||
338 singleton
|
||||
339 begin
|
||||
340 end
|
||||
341 internal
|
||||
342 singleton
|
||||
343 begin
|
||||
344 end
|
||||
345 internal
|
||||
346 singleton
|
||||
347 begin
|
||||
348 end
|
||||
349 internal
|
||||
350 singleton
|
||||
351 begin
|
||||
352 end
|
||||
353 internal
|
||||
354 singleton
|
||||
355 begin
|
||||
356 end
|
||||
357 internal
|
||||
358 singleton
|
||||
359 begin
|
||||
360 end
|
||||
361 internal
|
||||
362 singleton
|
||||
363 begin
|
||||
364 end
|
||||
365 internal
|
||||
366 singleton
|
||||
367 begin
|
||||
368 end
|
||||
369 internal
|
||||
370 singleton
|
||||
371 begin
|
||||
372 end
|
||||
373 internal
|
||||
374 singleton
|
||||
375 begin
|
||||
376 end
|
||||
377 internal
|
||||
378 singleton
|
||||
379 begin
|
||||
380 end
|
||||
381 internal
|
||||
382 singleton
|
||||
383 begin
|
||||
384 end
|
||||
385 internal
|
||||
386 singleton
|
||||
387 begin
|
||||
388 end
|
||||
389 internal
|
||||
390 singleton
|
||||
391 begin
|
||||
392 end
|
||||
393 internal
|
||||
394 singleton
|
||||
395 begin
|
||||
396 end
|
||||
397 internal
|
||||
398 singleton
|
||||
399 begin
|
||||
400 end
|
||||
401 internal
|
||||
402 singleton
|
||||
403 begin
|
||||
404 end
|
||||
405 internal
|
||||
406 singleton
|
||||
407 begin
|
||||
408 end
|
||||
409 internal
|
||||
410 singleton
|
||||
411 begin
|
||||
412 end
|
||||
413 internal
|
||||
414 singleton
|
||||
415 begin
|
||||
416 end
|
||||
417 internal
|
||||
418 singleton
|
||||
419 begin
|
||||
420 end
|
||||
421 internal
|
||||
422 singleton
|
||||
423 begin
|
||||
424 end
|
||||
425 internal
|
||||
426 singleton
|
||||
427 begin
|
||||
428 end
|
||||
429 internal
|
||||
430 singleton
|
||||
431 begin
|
||||
432 end
|
||||
433 internal
|
||||
434 singleton
|
||||
435 begin
|
||||
436 end
|
||||
437 internal
|
||||
438 singleton
|
||||
439 begin
|
||||
440 end
|
||||
441 internal
|
||||
442 singleton
|
||||
443 begin
|
||||
444 end
|
||||
445 internal
|
||||
446 singleton
|
||||
447 begin
|
||||
448 end
|
||||
449 internal
|
||||
450 singleton
|
||||
451 begin
|
||||
452 end
|
||||
453 internal
|
||||
454 singleton
|
||||
455 begin
|
||||
456 end
|
||||
457 internal
|
||||
458 singleton
|
||||
459 begin
|
||||
460 end
|
||||
461 internal
|
||||
462 singleton
|
||||
463 begin
|
||||
464 end
|
||||
465 internal
|
||||
466 singleton
|
||||
467 begin
|
||||
468 end
|
||||
469 internal
|
||||
470 singleton
|
||||
471 begin
|
||||
472 end
|
||||
473 internal
|
||||
474 singleton
|
||||
475 begin
|
||||
476 end
|
||||
477 internal
|
||||
478 singleton
|
||||
479 begin
|
||||
480 end
|
||||
481 internal
|
||||
482 singleton
|
||||
483 begin
|
||||
484 end
|
||||
485 internal
|
||||
486 singleton
|
||||
487 begin
|
||||
488 end
|
||||
489 internal
|
||||
490 singleton
|
||||
491 begin
|
||||
492 end
|
||||
493 internal
|
||||
494 singleton
|
||||
495 begin
|
||||
496 end
|
||||
497 internal
|
||||
498 singleton
|
||||
499 begin
|
||||
500 end
|
||||
501 internal
|
||||
502 singleton
|
||||
503 begin
|
||||
504 end
|
||||
505 internal
|
||||
506 singleton
|
||||
507 begin
|
||||
508 end
|
||||
509 internal
|
||||
510 singleton
|
||||
511 begin
|
||||
512 end
|
||||
513 internal
|
||||
514 singleton
|
||||
515 begin
|
||||
516 end
|
||||
517 internal
|
||||
518 singleton
|
||||
519 begin
|
||||
520 end
|
||||
521 internal
|
||||
522 singleton
|
||||
523 begin
|
||||
524 end
|
||||
525 internal
|
||||
526 singleton
|
||||
527 begin
|
||||
528 end
|
||||
529 internal
|
||||
530 singleton
|
||||
531 begin
|
||||
532 end
|
||||
533 internal
|
||||
534 singleton
|
||||
535 begin
|
||||
536 end
|
||||
537 internal
|
||||
538 singleton
|
||||
539 begin
|
||||
540 end
|
||||
541 internal
|
||||
542 singleton
|
||||
543 begin
|
||||
544 end
|
||||
545 internal
|
||||
546 singleton
|
||||
547 begin
|
||||
548 end
|
||||
549 internal
|
||||
550 singleton
|
||||
551 begin
|
||||
552 end
|
||||
553 internal
|
||||
554 singleton
|
||||
555 begin
|
||||
556 end
|
||||
557 internal
|
||||
558 singleton
|
||||
559 begin
|
||||
560 end
|
||||
561 internal
|
||||
562 singleton
|
||||
563 begin
|
||||
564 end
|
||||
565 internal
|
||||
566 singleton
|
||||
567 begin
|
||||
568 end
|
||||
569 internal
|
||||
570 singleton
|
||||
571 begin
|
||||
572 end
|
||||
573 internal
|
||||
574 singleton
|
||||
575 begin
|
||||
576 end
|
||||
577 internal
|
||||
578 singleton
|
||||
579 begin
|
||||
580 end
|
||||
581 internal
|
||||
582 singleton
|
||||
583 begin
|
||||
584 end
|
||||
585 internal
|
||||
586 singleton
|
||||
587 begin
|
||||
588 end
|
||||
589 internal
|
||||
590 singleton
|
||||
591 begin
|
||||
592 end
|
||||
593 internal
|
||||
594 singleton
|
||||
595 begin
|
||||
596 end
|
||||
597 internal
|
||||
598 singleton
|
||||
599 begin
|
||||
600 end
|
||||
601 internal
|
||||
602 singleton
|
||||
603 begin
|
||||
604 end
|
||||
605 internal
|
||||
606 singleton
|
||||
607 begin
|
||||
608 end
|
||||
609 internal
|
||||
610 singleton
|
||||
611 begin
|
||||
612 end
|
||||
613 internal
|
||||
614 singleton
|
||||
615 begin
|
||||
616 end
|
||||
617 internal
|
||||
618 singleton
|
||||
619 begin
|
||||
620 end
|
||||
621 internal
|
||||
622 singleton
|
||||
623 begin
|
||||
624 end
|
||||
625 internal
|
||||
626 singleton
|
||||
627 begin
|
||||
628 end
|
||||
629 internal
|
||||
630 singleton
|
||||
631 begin
|
||||
632 end
|
||||
633 internal
|
||||
634 singleton
|
||||
635 begin
|
||||
636 end
|
||||
637 internal
|
||||
638 singleton
|
||||
639 begin
|
||||
640 end
|
||||
641 internal
|
||||
642 singleton
|
||||
643 begin
|
||||
644 end
|
||||
645 internal
|
||||
646 singleton
|
||||
BIN
vosk-model-small-cn-0.22/ivector/final.dubm
Normal file
BIN
vosk-model-small-cn-0.22/ivector/final.dubm
Normal file
Binary file not shown.
BIN
vosk-model-small-cn-0.22/ivector/final.ie
Normal file
BIN
vosk-model-small-cn-0.22/ivector/final.ie
Normal file
Binary file not shown.
BIN
vosk-model-small-cn-0.22/ivector/final.mat
Normal file
BIN
vosk-model-small-cn-0.22/ivector/final.mat
Normal file
Binary file not shown.
3
vosk-model-small-cn-0.22/ivector/global_cmvn.stats
Normal file
3
vosk-model-small-cn-0.22/ivector/global_cmvn.stats
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
[
|
||||
1.117107e+11 -7.827721e+08 -1.101398e+10 -2.193934e+09 -1.347332e+10 -1.613916e+10 -1.199561e+10 -1.255081e+10 -1.638895e+10 -3.821099e+09 -1.372833e+10 -5.244242e+09 -1.098187e+10 -3.655235e+09 -9.364579e+09 -4.285302e+09 -6.296873e+09 -1.552953e+09 -3.176746e+09 -1.202976e+08 -9.857023e+08 2.316555e+08 -1.61059e+08 -5.891868e+07 3.465849e+08 -1.842054e+08 3.248211e+08 -1.483965e+08 3.739239e+08 -6.672061e+08 4.442288e+08 -9.274889e+08 5.142684e+08 4.292036e+07 2.206386e+08 -4.532715e+08 -2.092499e+08 -3.70488e+08 -8.079404e+07 -8.425977e+07 1.344125e+09
|
||||
9.982632e+12 1.02635e+12 8.634624e+11 9.06451e+11 9.652096e+11 1.12772e+12 9.468372e+11 9.141218e+11 9.670484e+11 6.936961e+11 8.141006e+11 6.256321e+11 6.087707e+11 4.616898e+11 4.212042e+11 2.862872e+11 2.498089e+11 1.470856e+11 1.099197e+11 5.780894e+10 3.118114e+10 1.060667e+10 1.466199e+09 4.173056e+08 5.257362e+09 1.277714e+10 2.114478e+10 2.974502e+10 3.587691e+10 4.078971e+10 4.247745e+10 4.382608e+10 4.62521e+10 4.575282e+10 3.546206e+10 3.041531e+10 2.838562e+10 2.258604e+10 1.715295e+10 1.303227e+10 0 ]
|
||||
0
vosk-model-small-cn-0.22/ivector/online_cmvn.conf
Normal file
0
vosk-model-small-cn-0.22/ivector/online_cmvn.conf
Normal file
2
vosk-model-small-cn-0.22/ivector/splice.conf
Normal file
2
vosk-model-small-cn-0.22/ivector/splice.conf
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
--left-context=3
|
||||
--right-context=3
|
||||
305
vosk_speech_server.py
Normal file
305
vosk_speech_server.py
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Vosk 语音识别服务
|
||||
轻量级、易安装、支持离线
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
import os
|
||||
import json
|
||||
import wave
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# 全局变量
|
||||
vosk_model = None
|
||||
model_loaded = False
|
||||
|
||||
def init_vosk_model():
|
||||
"""初始化Vosk模型"""
|
||||
global vosk_model, model_loaded
|
||||
|
||||
try:
|
||||
from vosk import Model, KaldiRecognizer
|
||||
# 将KaldiRecognizer设为全局变量以便后续使用
|
||||
globals()['KaldiRecognizer'] = KaldiRecognizer
|
||||
|
||||
model_path = "./vosk-model-small-cn-0.22"
|
||||
|
||||
if not os.path.exists(model_path):
|
||||
print(f"[错误] 模型不存在: {model_path}")
|
||||
print("请下载模型:https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip")
|
||||
return False
|
||||
|
||||
print(f"正在加载模型: {model_path}")
|
||||
vosk_model = Model(model_path)
|
||||
model_loaded = True
|
||||
print("✓ 模型加载成功!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ 模型加载失败: {str(e)}")
|
||||
model_loaded = False
|
||||
return False
|
||||
|
||||
def convert_audio_to_wav(input_path, output_path):
|
||||
"""转换音频为WAV格式(使用pydub)"""
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
|
||||
print(f"[转换] 使用pydub转换音频...")
|
||||
|
||||
# 加载音频(自动检测格式)
|
||||
audio = AudioSegment.from_file(input_path)
|
||||
|
||||
# 转换为单声道、16kHz、16位WAV
|
||||
audio = audio.set_channels(1) # 单声道
|
||||
audio = audio.set_frame_rate(16000) # 16kHz采样率
|
||||
audio = audio.set_sample_width(2) # 16位
|
||||
|
||||
# 导出为WAV
|
||||
audio.export(output_path, format='wav')
|
||||
print(f"[转换] pydub转换成功")
|
||||
return True, None
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e).lower()
|
||||
|
||||
# 检查是否是ffmpeg未安装的错误
|
||||
if 'ffmpeg' in error_msg or 'ffprobe' in error_msg or 'filenotfounderror' in error_msg:
|
||||
return False, (
|
||||
"需要安装 ffmpeg 才能转换音频格式。\n"
|
||||
"请下载 ffmpeg: https://www.gyan.dev/ffmpeg/builds/\n"
|
||||
"或运行: pip install ffmpeg-python\n"
|
||||
"错误详情: " + str(e)
|
||||
)
|
||||
else:
|
||||
return False, f"音频转换失败: {str(e)}"
|
||||
|
||||
def recognize_audio(audio_path):
|
||||
"""识别音频文件"""
|
||||
converted_path = None
|
||||
try:
|
||||
# 先尝试直接打开WAV文件
|
||||
print(f"[识别] 尝试打开音频文件: {audio_path}")
|
||||
try:
|
||||
wf = wave.open(audio_path, "rb")
|
||||
is_valid_wav = True
|
||||
print(f"[识别] 文件是有效的WAV格式")
|
||||
except Exception as e:
|
||||
is_valid_wav = False
|
||||
print(f"[识别] 不是有效的WAV格式: {str(e)}")
|
||||
|
||||
# 如果不是有效的WAV,尝试转换
|
||||
if not is_valid_wav:
|
||||
print(f"[识别] 检测到非WAV格式,开始转换...")
|
||||
converted_path = audio_path + '.converted.wav'
|
||||
success, error = convert_audio_to_wav(audio_path, converted_path)
|
||||
|
||||
if not success:
|
||||
print(f"[识别] 转换失败: {error}")
|
||||
return None, f"音频格式转换失败: {error}"
|
||||
|
||||
print(f"[识别] 转换成功: {converted_path}")
|
||||
# 使用转换后的文件
|
||||
audio_path = converted_path
|
||||
wf = wave.open(audio_path, "rb")
|
||||
print(f"[识别] 转换后的文件已打开")
|
||||
|
||||
# 检查音频参数
|
||||
if wf.getnchannels() != 1:
|
||||
wf.close()
|
||||
return None, "音频必须是单声道"
|
||||
|
||||
if wf.getsampwidth() != 2:
|
||||
wf.close()
|
||||
return None, "音频必须是16位"
|
||||
|
||||
if wf.getframerate() not in [8000, 16000, 32000, 48000]:
|
||||
wf.close()
|
||||
return None, f"不支持的采样率: {wf.getframerate()}"
|
||||
|
||||
# 创建识别器
|
||||
rec = KaldiRecognizer(vosk_model, wf.getframerate())
|
||||
rec.SetWords(True)
|
||||
|
||||
result_text = ""
|
||||
|
||||
# 读取并识别
|
||||
while True:
|
||||
data = wf.readframes(4000)
|
||||
if len(data) == 0:
|
||||
break
|
||||
|
||||
if rec.AcceptWaveform(data):
|
||||
result = json.loads(rec.Result())
|
||||
text = result.get('text', '')
|
||||
if text:
|
||||
result_text += text + " "
|
||||
|
||||
# 获取最终结果
|
||||
final_result = json.loads(rec.FinalResult())
|
||||
final_text = final_result.get('text', '')
|
||||
if final_text:
|
||||
result_text += final_text
|
||||
|
||||
wf.close()
|
||||
|
||||
# 清理转换后的临时文件
|
||||
if converted_path and os.path.exists(converted_path):
|
||||
try:
|
||||
os.remove(converted_path)
|
||||
print(f"[识别] 已清理转换文件: {converted_path}")
|
||||
except Exception as e:
|
||||
print(f"[识别] 清理转换文件失败: {e}")
|
||||
|
||||
result_text = result_text.strip()
|
||||
return result_text, None
|
||||
|
||||
except Exception as e:
|
||||
# 发生错误时也清理转换文件
|
||||
if converted_path and os.path.exists(converted_path):
|
||||
try:
|
||||
os.remove(converted_path)
|
||||
except:
|
||||
pass
|
||||
return None, str(e)
|
||||
|
||||
def calculate_similarity(text1, text2):
|
||||
"""计算文本相似度(0-100分)"""
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
# 去除空格
|
||||
text1 = text1.replace(" ", "")
|
||||
text2 = text2.replace(" ", "")
|
||||
|
||||
if not text1 or not text2:
|
||||
return 0
|
||||
|
||||
# 计算相似度
|
||||
similarity = SequenceMatcher(None, text1, text2).ratio()
|
||||
return round(similarity * 100, 2)
|
||||
|
||||
@app.route('/api/speech/recognize', methods=['POST'])
|
||||
def recognize():
|
||||
"""语音识别接口"""
|
||||
try:
|
||||
# 检查模型是否加载
|
||||
if not model_loaded:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': '模型未加载,请检查服务器日志'
|
||||
}), 500
|
||||
|
||||
# 检查文件
|
||||
if 'audio' not in request.files:
|
||||
return jsonify({
|
||||
'code': 400,
|
||||
'msg': '未上传音频文件'
|
||||
}), 400
|
||||
|
||||
audio_file = request.files['audio']
|
||||
reference_text = request.form.get('referenceText', '')
|
||||
|
||||
# 保存临时文件
|
||||
temp_dir = './temp_audio'
|
||||
if not os.path.exists(temp_dir):
|
||||
os.makedirs(temp_dir)
|
||||
|
||||
import time
|
||||
timestamp = str(int(time.time() * 1000))
|
||||
temp_path = os.path.join(temp_dir, f'audio_{timestamp}.wav')
|
||||
audio_file.save(temp_path)
|
||||
|
||||
print(f"收到音频: {temp_path}")
|
||||
print(f"参考文本: {reference_text}")
|
||||
|
||||
# 识别音频
|
||||
recognized_text, error = recognize_audio(temp_path)
|
||||
|
||||
# 删除临时文件
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
if error:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'识别失败: {error}'
|
||||
}), 500
|
||||
|
||||
if not recognized_text:
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': '未识别到有效语音'
|
||||
}), 500
|
||||
|
||||
# 计算评分
|
||||
score = calculate_similarity(recognized_text, reference_text)
|
||||
pronunciation_score = max(0, score - 5)
|
||||
fluency_score = max(0, score - 3)
|
||||
|
||||
print(f"识别结果: {recognized_text}")
|
||||
print(f"相似度: {score}分")
|
||||
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '成功',
|
||||
'data': {
|
||||
'recognizedText': recognized_text,
|
||||
'score': score,
|
||||
'pronunciationScore': pronunciation_score,
|
||||
'fluencyScore': fluency_score,
|
||||
'status': 'completed'
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理错误: {str(e)}")
|
||||
return jsonify({
|
||||
'code': 500,
|
||||
'msg': f'处理失败: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/speech/health', methods=['GET'])
|
||||
def health():
|
||||
"""健康检查"""
|
||||
return jsonify({
|
||||
'code': 200,
|
||||
'msg': '服务正常',
|
||||
'data': {
|
||||
'model_loaded': model_loaded,
|
||||
'engine': 'vosk'
|
||||
}
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 50)
|
||||
print("Vosk 语音识别服务")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
|
||||
# 初始化模型
|
||||
if init_vosk_model():
|
||||
print("")
|
||||
print("=" * 50)
|
||||
print("服务启动成功!")
|
||||
print("访问地址: http://localhost:5000")
|
||||
print("健康检查: http://localhost:5000/api/speech/health")
|
||||
print("=" * 50)
|
||||
print("")
|
||||
|
||||
# 启动服务
|
||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||
else:
|
||||
print("")
|
||||
print("=" * 50)
|
||||
print("服务启动失败!请检查模型文件")
|
||||
print("=" * 50)
|
||||
input("按回车键退出...")
|
||||
37
启动Vosk服务.bat
Normal file
37
启动Vosk服务.bat
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
@echo off
|
||||
chcp 65001 > nul
|
||||
echo ======================================
|
||||
echo 启动 Vosk 语音识别服务
|
||||
echo ======================================
|
||||
echo.
|
||||
|
||||
cd /d "%~dp0"
|
||||
|
||||
echo 检查Vosk...
|
||||
pip show vosk > nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 正在安装Vosk...
|
||||
pip install vosk -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
)
|
||||
|
||||
echo.
|
||||
echo 检查模型文件...
|
||||
if not exist "vosk-model-small-cn-0.22" (
|
||||
echo [错误] 模型文件不存在!
|
||||
echo 请先下载模型:
|
||||
echo https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip
|
||||
echo.
|
||||
echo 下载后解压到当前目录
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
echo ✓ 模型文件存在
|
||||
echo.
|
||||
echo 正在启动服务...
|
||||
echo 服务地址: http://localhost:5000
|
||||
echo.
|
||||
|
||||
python vosk_speech_server.py
|
||||
|
||||
pause
|
||||
24
启动语音服务.bat
Normal file
24
启动语音服务.bat
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
@echo off
|
||||
chcp 65001 > nul
|
||||
echo ======================================
|
||||
echo 启动简化版语音服务
|
||||
echo ======================================
|
||||
echo.
|
||||
|
||||
cd /d "%~dp0"
|
||||
|
||||
echo 检查Flask...
|
||||
pip show flask > nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo 正在安装Flask...
|
||||
pip install flask flask-cors -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
)
|
||||
|
||||
echo.
|
||||
echo 正在启动服务...
|
||||
echo 服务地址: http://localhost:5000
|
||||
echo.
|
||||
|
||||
python simple_speech_server.py
|
||||
|
||||
pause
|
||||
34
安装ffmpeg.bat
Normal file
34
安装ffmpeg.bat
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
@echo off
|
||||
chcp 65001 > nul
|
||||
echo ====================================
|
||||
echo 安装 ffmpeg(语音转换依赖)
|
||||
echo ====================================
|
||||
echo.
|
||||
|
||||
echo 方案1:使用 Chocolatey 安装(推荐)
|
||||
echo.
|
||||
echo 如果已安装 Chocolatey,运行:
|
||||
echo choco install ffmpeg
|
||||
echo.
|
||||
echo 方案2:手动下载
|
||||
echo.
|
||||
echo 1. 访问:https://www.gyan.dev/ffmpeg/builds/
|
||||
echo 2. 下载:ffmpeg-release-essentials.zip
|
||||
echo 3. 解压到:C:\ffmpeg
|
||||
echo 4. 添加到PATH:C:\ffmpeg\bin
|
||||
echo.
|
||||
echo 方案3:使用 winget 安装
|
||||
echo winget install ffmpeg
|
||||
echo.
|
||||
|
||||
pause
|
||||
|
||||
echo.
|
||||
echo 尝试使用 winget 安装...
|
||||
winget install "FFmpeg (Essentials Build)"
|
||||
|
||||
echo.
|
||||
echo 安装完成后,请重新打开命令提示符测试:
|
||||
echo ffmpeg -version
|
||||
echo.
|
||||
pause
|
||||
235
录音问题-最终解决方案.md
Normal file
235
录音问题-最终解决方案.md
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
# 录音问题 - 最终诊断与解决方案
|
||||
|
||||
## 🔴 **问题确认**
|
||||
|
||||
### **实际测试数据:**
|
||||
```
|
||||
录音时长: 3.846 秒
|
||||
文件大小: 10,983 bytes
|
||||
预期大小: 123,072 bytes (3.846秒 × 32000 bytes/秒)
|
||||
完整度: 8.9%
|
||||
丢失数据: 91.1%
|
||||
```
|
||||
|
||||
### **结论:**
|
||||
**您的设备与uni-app的录音管理器严重不兼容!**
|
||||
|
||||
录音文件只保存了不到9%的音频数据,导致:
|
||||
- 百度API只能识别到最后0.3秒的内容
|
||||
- 无论说多长,都只识别最后几个字
|
||||
- 这是**设备/系统限制**,不是代码bug
|
||||
|
||||
---
|
||||
|
||||
## ✅ **已实施的解决方案**
|
||||
|
||||
### **1. 自动检测+建议(已完成)**
|
||||
|
||||
#### **功能:**
|
||||
- ✅ 连续失败2次后,自动弹窗建议手动输入
|
||||
- ✅ 用户可选择"重试"或"手动输入"
|
||||
- ✅ 成功后重置失败计数
|
||||
|
||||
#### **体验流程:**
|
||||
```
|
||||
第1次录音 → 失败 → 提示"评测失败"
|
||||
第2次录音 → 失败 → 弹窗:"您的设备录音功能可能不兼容...建议使用手动输入"
|
||||
↓ 用户选择
|
||||
→ "重试":继续录音(重置计数)
|
||||
→ "手动输入":打开输入框
|
||||
```
|
||||
|
||||
### **2. 手动输入功能(已优化)**
|
||||
|
||||
#### **使用方法:**
|
||||
1. 选择题目
|
||||
2. 点击页面上的"📝 手动输入文本"按钮
|
||||
3. 在弹出框中输入要朗读的内容
|
||||
4. 点击确定
|
||||
|
||||
#### **优势:**
|
||||
- ✅ 无需录音,直接输入
|
||||
- ✅ 避免设备兼容性问题
|
||||
- ✅ 可以完成学习任务
|
||||
|
||||
### **3. 延迟优化(已完成)**
|
||||
- 增加stop()延迟:300ms → 800ms
|
||||
- 增加保存延迟:200ms → 500ms
|
||||
- 总延迟:1300ms
|
||||
|
||||
**注意:虽然增加了延迟,但由于是设备限制,完整度仍然很低。**
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **当前推荐方案**
|
||||
|
||||
### **方案A:使用手动输入(推荐)** ⭐⭐⭐⭐⭐
|
||||
|
||||
**适用:** 需要完成学习任务
|
||||
|
||||
**操作:**
|
||||
1. 点击"📝 手动输入文本"
|
||||
2. 输入题目内容
|
||||
3. 提交
|
||||
|
||||
**优点:**
|
||||
- ✅ 100%可靠
|
||||
- ✅ 无需录音
|
||||
- ✅ 快速完成
|
||||
|
||||
**缺点:**
|
||||
- ❌ 无法练习发音
|
||||
- ❌ 失去语音识别体验
|
||||
|
||||
---
|
||||
|
||||
### **方案B:更换设备测试** ⭐⭐⭐
|
||||
|
||||
**适用:** 想测试真实录音功能
|
||||
|
||||
**操作:**
|
||||
1. 换一台Android手机或iPhone
|
||||
2. 重新运行APP测试
|
||||
|
||||
**可能结果:**
|
||||
- ✅ 某些设备可以正常录音(华为、小米新机型较好)
|
||||
- ❌ 某些设备依然有问题(老机型、低端机)
|
||||
|
||||
---
|
||||
|
||||
### **方案C:使用原生录音插件(终极)** ⭐⭐⭐⭐
|
||||
|
||||
**适用:** 需要彻底解决问题
|
||||
|
||||
**实施:**
|
||||
1. 使用uni-app的**原生录音插件**
|
||||
2. 或开发自定义UTS插件
|
||||
3. 绕过uni.getRecorderManager()
|
||||
|
||||
**优点:**
|
||||
- ✅ 100%兼容
|
||||
- ✅ 性能更好
|
||||
- ✅ 功能更强
|
||||
|
||||
**缺点:**
|
||||
- ❌ 需要重新开发(2-3天)
|
||||
- ❌ 需要原生开发知识
|
||||
- ❌ 需要测试多个设备
|
||||
|
||||
**成本:** 较高,不推荐个人学习项目使用
|
||||
|
||||
---
|
||||
|
||||
## 📊 **设备兼容性参考**
|
||||
|
||||
### **测试建议:**
|
||||
| 设备类型 | 兼容性 | 推荐度 |
|
||||
|---------|--------|--------|
|
||||
| iPhone (iOS 13+) | ✅ 良好 | ⭐⭐⭐⭐⭐ |
|
||||
| 华为 (HarmonyOS) | ✅ 良好 | ⭐⭐⭐⭐⭐ |
|
||||
| 小米 (MIUI 12+) | ✅ 较好 | ⭐⭐⭐⭐ |
|
||||
| OPPO/vivo | ⚠️ 一般 | ⭐⭐⭐ |
|
||||
| 老机型 (Android 7-) | ❌ 差 | ⭐ |
|
||||
| 您当前设备 | ❌ 不兼容 | - |
|
||||
|
||||
---
|
||||
|
||||
## 🔄 **当前使用建议**
|
||||
|
||||
### **短期方案(立即可用):**
|
||||
|
||||
#### **1. 第1-2次尝试录音**
|
||||
- 测试设备是否能正常工作
|
||||
- 如果失败,系统会自动建议手动输入
|
||||
|
||||
#### **2. 失败后使用手动输入**
|
||||
```
|
||||
步骤:
|
||||
1. 选择题目:"孤舟蓑笠翁,独钓寒江雪"
|
||||
2. 点击"📝 手动输入文本"
|
||||
3. 输入:"孤舟蓑笠翁独钓寒江雪"
|
||||
4. 提交
|
||||
5. 查看得分
|
||||
```
|
||||
|
||||
#### **3. 如果需要真实语音识别体验**
|
||||
- 更换设备测试
|
||||
- 或考虑使用PC端(如果有Web版)
|
||||
|
||||
---
|
||||
|
||||
### **长期方案(如果要继续开发):**
|
||||
|
||||
#### **选项1:集成原生录音插件**
|
||||
推荐插件:
|
||||
- `uni-audio-recorder` (原生录音)
|
||||
- `uni-media-capture` (媒体采集)
|
||||
|
||||
#### **选项2:改为在线录音**
|
||||
- 使用WebRTC录音(浏览器)
|
||||
- 或使用云端实时识别(如百度实时语音识别)
|
||||
|
||||
#### **选项3:降级功能**
|
||||
- 只保留手动输入
|
||||
- 将语音识别标记为"实验性功能"
|
||||
|
||||
---
|
||||
|
||||
## 📝 **技术总结**
|
||||
|
||||
### **为什么会出现这个问题?**
|
||||
|
||||
**uni-app的录音管理器**依赖于:
|
||||
1. **Android MediaRecorder API** (Android)
|
||||
2. **AVAudioRecorder** (iOS)
|
||||
|
||||
某些设备的系统实现有以下问题:
|
||||
- 调用`stop()`时立即释放缓冲区
|
||||
- 写入文件的速度慢于缓冲区清空速度
|
||||
- 最终只保存了最后一小部分数据
|
||||
|
||||
**这是系统级限制,无法通过JavaScript代码解决。**
|
||||
|
||||
### **为什么增加延迟也无效?**
|
||||
|
||||
因为问题不在于延迟时间,而在于:
|
||||
```
|
||||
stop() → 触发系统停止录音
|
||||
↓
|
||||
系统立即清空内存缓冲区(丢失90%数据)
|
||||
↓
|
||||
只有最后10%写入文件
|
||||
↓
|
||||
无论等多久,文件就是不完整
|
||||
```
|
||||
|
||||
### **唯一解决方案:**
|
||||
**绕过uni.getRecorderManager(),使用原生API直接录音。**
|
||||
|
||||
---
|
||||
|
||||
## ✅ **当前可用功能**
|
||||
|
||||
1. ✅ **手动输入** - 100%可用,推荐使用
|
||||
2. ⚠️ **语音录音** - 设备不兼容,不推荐
|
||||
3. ✅ **文本相似度评分** - 正常工作
|
||||
4. ✅ **学习记录保存** - 正常工作
|
||||
|
||||
---
|
||||
|
||||
## 🎯 **立即行动**
|
||||
|
||||
### **如果要继续使用当前APP:**
|
||||
1. **重新编译并运行APP**(包含最新修复)
|
||||
2. **尝试录音1-2次**(测试自动建议功能)
|
||||
3. **失败后点击"手动输入"**
|
||||
4. **完成学习任务**
|
||||
|
||||
### **如果要彻底解决录音问题:**
|
||||
1. **更换设备测试**(iPhone或新款Android)
|
||||
2. 或 **等待开发原生录音插件**(需要2-3天)
|
||||
3. 或 **接受只使用手动输入**
|
||||
|
||||
---
|
||||
|
||||
**现在重新运行APP,测试失败2次后会自动建议手动输入!** 📱✅
|
||||
126
录音问题诊断.md
Normal file
126
录音问题诊断.md
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# 录音只识别最后几个字 - 问题诊断
|
||||
|
||||
## 🔍 问题描述
|
||||
用户录音5秒,但只识别到最后几个字。
|
||||
|
||||
## 🐛 根本原因
|
||||
**录音文件本身不完整!** 不是识别问题,而是音频数据丢失。
|
||||
|
||||
## 📊 诊断方法
|
||||
|
||||
### 1. 查看前端日志(关键)
|
||||
重新运行APP后,录音并停止,查看控制台:
|
||||
|
||||
```
|
||||
[Speech] 实际录音时长: 5.2 秒 ← 用户点击停止时的时长
|
||||
[录音] 准备停止录音...
|
||||
[录音] 录音结束,详细信息: {
|
||||
duration: 523, ← 录音管理器报告的时长(ms)
|
||||
fileSize: 15000 ← 文件大小
|
||||
}
|
||||
[录音] 文件验证成功,大小: 15000 bytes ← 实际文件大小
|
||||
```
|
||||
|
||||
**关键对比:**
|
||||
- **预期大小**:5秒 × 32000 bytes/秒 = 160000 bytes
|
||||
- **实际大小**:15000 bytes(说明只录了约0.5秒!)
|
||||
|
||||
### 2. 查看后端日志
|
||||
```
|
||||
调用百度API识别音频 - 大小: 15000 bytes, 格式: wav, 估算时长: 0.46秒
|
||||
```
|
||||
|
||||
**如果后端时长 << 前端时长 → 确认是录音文件不完整**
|
||||
|
||||
## ✅ 解决方案
|
||||
|
||||
### 已实施的修复:
|
||||
|
||||
#### 1. **增加stop()延迟(300ms + 200ms)**
|
||||
```javascript
|
||||
// 等待300ms让音频缓冲完全写入
|
||||
setTimeout(() => {
|
||||
// 等待onStop事件
|
||||
// 再等待200ms确保文件完全写入磁盘
|
||||
}, 300)
|
||||
```
|
||||
|
||||
#### 2. **增加frameSize缓冲**
|
||||
```javascript
|
||||
frameSize: 50 // 增加缓冲,避免数据丢失
|
||||
```
|
||||
|
||||
#### 3. **详细日志**
|
||||
- 显示录音时长、文件大小
|
||||
- 验证文件是否完整
|
||||
|
||||
## 🧪 测试步骤
|
||||
|
||||
### 测试1:验证录音完整性
|
||||
|
||||
1. 重新运行APP
|
||||
2. 选择题目
|
||||
3. 点击"开始说话"
|
||||
4. **大声清晰说5秒**:"白日依山尽,黄河入海流,欲穷千里目,更上一层楼"
|
||||
5. 等待3秒提示后点击"停止识别"
|
||||
|
||||
### 预期结果:
|
||||
|
||||
**前端日志:**
|
||||
```
|
||||
[Speech] 实际录音时长: 5.2 秒
|
||||
[录音] 录音结束,详细信息: { duration: 5200, fileSize: 170000 }
|
||||
[录音] 文件验证成功,大小: 170000 bytes ✅ 约5.3秒
|
||||
```
|
||||
|
||||
**后端日志:**
|
||||
```
|
||||
调用百度API识别音频 - 大小: 170000 bytes, 格式: wav, 估算时长: 5.31秒 ✅
|
||||
音频时长合适: 5.31秒(推荐范围:2-10秒)
|
||||
识别成功: '白日依山尽黄河入海流欲穷千里目更上一层楼' ✅
|
||||
```
|
||||
|
||||
### 如果还是不完整:
|
||||
|
||||
**可能原因:**
|
||||
1. **手机性能问题** - 录音缓冲区被覆盖
|
||||
2. **存储空间不足** - 无法完整保存文件
|
||||
3. **系统限制** - Android/iOS版本限制
|
||||
|
||||
**进一步诊断:**
|
||||
```
|
||||
[录音] 文件验证成功,大小: 15000 bytes ← 还是很小
|
||||
→ 说明uni-app录音管理器本身有问题
|
||||
```
|
||||
|
||||
**终极解决方案:**
|
||||
- 使用原生录音插件(UTS插件)
|
||||
- 或分段录音(3-5秒一段)
|
||||
|
||||
## 📱 不同场景的时长要求
|
||||
|
||||
| 场景 | 推荐时长 | 说明 |
|
||||
|------|---------|------|
|
||||
| 短句 | 3-5秒 | 最佳识别效果 |
|
||||
| 中等 | 5-8秒 | 良好识别效果 |
|
||||
| 长句 | 8-10秒 | 可识别,建议分段 |
|
||||
| 超长 | >10秒 | 可能只识别部分 |
|
||||
|
||||
## 🎯 用户使用建议
|
||||
|
||||
1. **说话节奏**:不要太快,每个字清晰
|
||||
2. **音量**:正常说话音量,不要太小
|
||||
3. **时长**:3-8秒最佳
|
||||
4. **环境**:安静环境,避免噪音
|
||||
5. **长内容**:分段录音,不要一次录太长
|
||||
|
||||
## 🔧 后续优化方向
|
||||
|
||||
如果问题依然存在:
|
||||
1. 考虑使用原生录音插件
|
||||
2. 实现实时语音识别(流式传输)
|
||||
3. 或改为使用百度长语音识别API(60秒)
|
||||
|
||||
---
|
||||
|
||||
**重新运行APP测试,查看日志中的文件大小是否正常!**
|
||||
66
测试语音服务.py
Normal file
66
测试语音服务.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试语音识别服务
|
||||
"""
|
||||
|
||||
import requests
|
||||
|
||||
# 测试健康检查
|
||||
def test_health():
|
||||
print("测试健康检查...")
|
||||
try:
|
||||
response = requests.get('http://localhost:5000/api/speech/health')
|
||||
print(f"状态码: {response.status_code}")
|
||||
print(f"响应: {response.json()}")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"错误: {str(e)}")
|
||||
return False
|
||||
|
||||
# 测试语音识别(需要有音频文件)
|
||||
def test_recognize(audio_file):
|
||||
print(f"\n测试语音识别: {audio_file}")
|
||||
try:
|
||||
with open(audio_file, 'rb') as f:
|
||||
files = {'audio': f}
|
||||
data = {'referenceText': '你好世界'}
|
||||
|
||||
response = requests.post(
|
||||
'http://localhost:5000/api/speech/recognize',
|
||||
files=files,
|
||||
data=data
|
||||
)
|
||||
|
||||
print(f"状态码: {response.status_code}")
|
||||
print(f"响应: {response.json()}")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"错误: {str(e)}")
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 50)
|
||||
print("语音服务测试")
|
||||
print("=" * 50)
|
||||
print()
|
||||
|
||||
# 1. 测试健康检查
|
||||
if test_health():
|
||||
print("✓ 健康检查通过")
|
||||
else:
|
||||
print("✗ 健康检查失败")
|
||||
exit(1)
|
||||
|
||||
# 2. 测试语音识别(如果有测试音频)
|
||||
import os
|
||||
test_audio = './test.wav'
|
||||
if os.path.exists(test_audio):
|
||||
if test_recognize(test_audio):
|
||||
print("✓ 语音识别测试通过")
|
||||
else:
|
||||
print("✗ 语音识别测试失败")
|
||||
else:
|
||||
print(f"\n提示: 创建 {test_audio} 文件可测试语音识别")
|
||||
|
||||
print("\n所有测试完成!")
|
||||
97
百度语音配置说明.md
Normal file
97
百度语音配置说明.md
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# 百度语音识别配置说明(超简单)
|
||||
|
||||
## ✅ 优势
|
||||
- **无需ffmpeg** - 百度API自动处理所有格式
|
||||
- **无需本地模型** - 不用下载大文件
|
||||
- **主机访问百度云** - 内网设备通过主机使用(完美!)
|
||||
- **识别准确** - 百度的识别率高
|
||||
- **免费额度** - 每天50000次,完全够用
|
||||
|
||||
## 📝 配置步骤(只需5分钟)
|
||||
|
||||
### 1. 申请百度API(免费)
|
||||
|
||||
1. 访问:https://console.bce.baidu.com/ai/#/ai/speech/overview/index
|
||||
2. 注册/登录百度账号
|
||||
3. 点击"创建应用"
|
||||
4. 填写应用信息(随便填)
|
||||
5. 获取以下3个密钥:
|
||||
- `APP_ID`
|
||||
- `API_KEY`
|
||||
- `SECRET_KEY`
|
||||
|
||||
### 2. 配置密钥
|
||||
|
||||
打开文件:
|
||||
```
|
||||
Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/BaiduSpeechService.java
|
||||
```
|
||||
|
||||
修改第24-26行:
|
||||
```java
|
||||
private static final String APP_ID = "你的APP_ID"; // ← 替换这里
|
||||
private static final String API_KEY = "你的API_KEY"; // ← 替换这里
|
||||
private static final String SECRET_KEY = "你的SECRET_KEY"; // ← 替换这里
|
||||
```
|
||||
|
||||
### 3. 编译并重启后端
|
||||
|
||||
```bash
|
||||
cd Study-Vue-redis
|
||||
mvn clean package -DskipTests
|
||||
```
|
||||
|
||||
然后重启Spring Boot服务。
|
||||
|
||||
### 4. 测试
|
||||
|
||||
在APP中录音测试,就这么简单!
|
||||
|
||||
---
|
||||
|
||||
## 🎯 架构说明
|
||||
|
||||
```
|
||||
手机APP(内网,不能上网)
|
||||
↓ 局域网 (http://192.168.1.80:30091)
|
||||
Spring Boot主机(能上外网)
|
||||
↓ 互联网
|
||||
百度云API(语音识别)
|
||||
```
|
||||
|
||||
**只要你的主机(192.168.1.80)能访问外网,内网设备就能用语音识别!**
|
||||
|
||||
---
|
||||
|
||||
## 💰 费用说明
|
||||
|
||||
- **免费额度**:每天50000次
|
||||
- **超出后**:0.0015元/次(很便宜)
|
||||
- **个人使用**:完全够用,基本不花钱
|
||||
|
||||
---
|
||||
|
||||
## 🔍 故障排查
|
||||
|
||||
### 问题1:提示"百度语音客户端未初始化"
|
||||
- 检查是否填写了密钥
|
||||
- 检查密钥是否正确
|
||||
|
||||
### 问题2:识别失败
|
||||
- 检查主机是否能访问外网
|
||||
- 检查百度API额度是否用完
|
||||
- 查看后端日志获取详细错误
|
||||
|
||||
### 问题3:网络问题
|
||||
- 确保主机能访问 `https://aip.baidubce.com`
|
||||
- 检查防火墙设置
|
||||
|
||||
---
|
||||
|
||||
## 📞 技术支持
|
||||
|
||||
百度语音识别文档:https://ai.baidu.com/ai-doc/SPEECH/Vk38lxily
|
||||
|
||||
---
|
||||
|
||||
**就这么简单!不需要ffmpeg,不需要模型,5分钟搞定!** 🎉
|
||||
Loading…
Reference in New Issue
Block a user