diff --git a/Study-Vue-redis/ry-study-admin/pom.xml b/Study-Vue-redis/ry-study-admin/pom.xml index 2c256a6..6aacf85 100644 --- a/Study-Vue-redis/ry-study-admin/pom.xml +++ b/Study-Vue-redis/ry-study-admin/pom.xml @@ -73,6 +73,13 @@ spring-boot-starter-websocket + + + com.baidu.aip + java-sdk + 4.16.18 + + diff --git a/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/BaiduSpeechService.java b/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/BaiduSpeechService.java new file mode 100644 index 0000000..c482912 --- /dev/null +++ b/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/BaiduSpeechService.java @@ -0,0 +1,212 @@ +package com.ddnai.web.controller.study; + +import com.baidu.aip.speech.AipSpeech; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.HashMap; + +/** + * 百度语音识别服务 + * 主机访问百度云,内网设备通过主机使用语音识别 + * + * @author ddnai + */ +@Service +public class BaiduSpeechService +{ + private static final Logger log = LoggerFactory.getLogger(BaiduSpeechService.class); + + // 百度语音识别配置 + // 申请地址:https://console.bce.baidu.com/ai/#/ai/speech/overview/index + private static final String APP_ID = "7307076"; + private static final String API_KEY = "RtL2IfV3FbLnVDDacRV6QDae"; + private static final String SECRET_KEY = "NobJaGFov7II95fnFUBNGBk0Wm3fcNIB"; + + private AipSpeech client; + + /** + * 初始化百度语音客户端 + */ + public BaiduSpeechService() + { + try + { + client = new AipSpeech(APP_ID, API_KEY, SECRET_KEY); + // 设置超时 + client.setConnectionTimeoutInMillis(5000); + client.setSocketTimeoutInMillis(30000); + log.info("百度语音客户端初始化成功"); + } + catch (Exception e) + { + log.error("百度语音客户端初始化失败", e); + } + } + + /** + * 识别音频文件 + * + * @param audioFile 音频文件(支持MP3、WAV、PCM等) + * @return 识别结果文本 + */ + public String recognizeAudio(File audioFile) throws IOException + { + return recognizeAudio(audioFile, getAudioFormat(audioFile.getName())); + } + + /** + * 识别音频文件(指定格式) + * + * @param audioFile 音频文件 + * @param format 音频格式(pcm/wav/mp3/m4a) + * @return 识别结果文本 + */ + public String recognizeAudio(File audioFile, String format) throws IOException + { + if (client == null) + { + throw new RuntimeException("百度语音客户端未初始化"); + } + + // 读取音频文件 + byte[] audioData = readFileToBytes(audioFile); + + // 计算音频时长(估算) + double durationSeconds = 0; + if ("wav".equals(format)) + { + // WAV格式:16kHz, 单声道, 16位 = 32000 bytes/s + // 减去44字节的WAV文件头 + durationSeconds = (audioData.length - 44) / 32000.0; + } + else if ("pcm".equals(format)) + { + // PCM格式:无文件头 + durationSeconds = audioData.length / 32000.0; + } + + log.info("调用百度API识别音频 - 大小: {} bytes, 格式: {}, 估算时长: {}秒", + audioData.length, format, String.format("%.2f", durationSeconds)); + + // 检查音频时长 + if (durationSeconds < 0.5) + { + log.warn("音频时长过短: {}秒,可能无法识别", String.format("%.2f", durationSeconds)); + } + else if (durationSeconds > 10.0) + { + log.warn("音频时长过长: {}秒。百度短语音识别最佳时长为3-10秒,超过10秒可能只识别部分内容", + String.format("%.2f", durationSeconds)); + } + else if (durationSeconds >= 2.0 && durationSeconds <= 10.0) + { + log.info("音频时长合适: {}秒(推荐范围:2-10秒)", String.format("%.2f", durationSeconds)); + } + + // 调用百度API + // format: 音频格式,支持 pcm/wav/mp3/m4a + // rate: 采样率,支持 8000/16000 + HashMap options = new HashMap<>(); + options.put("dev_pid", 1537); // 中文普通话识别 + + JSONObject result = client.asr(audioData, format, 16000, options); + + log.info("百度API响应: {}", result.toString()); + + // 解析结果 + int errNo = result.getInt("err_no"); + if (errNo == 0) + { + // 成功 + StringBuilder text = new StringBuilder(); + if (result.has("result")) + { + org.json.JSONArray resultArray = result.getJSONArray("result"); + log.info("识别结果数组长度: {}", resultArray.length()); + for (int i = 0; i < resultArray.length(); i++) + { + String part = resultArray.getString(i); + log.info("识别片段[{}]: '{}'", i, part); + text.append(part); + } + } + + String recognizedText = text.toString().trim(); + + if (recognizedText.isEmpty()) + { + log.warn("百度API返回成功但识别结果为空 - 可能原因: 音频太短({}秒)、无声音或音量太小", + String.format("%.2f", durationSeconds)); + } + else + { + log.info("识别成功: '{}', 长度: {}", recognizedText, recognizedText.length()); + } + + return recognizedText; + } + else + { + // 失败 + String errMsg = result.optString("err_msg", "未知错误"); + log.error("识别失败: err_no={}, err_msg={}", errNo, errMsg); + throw new RuntimeException("百度API识别失败: " + errMsg); + } + } + + /** + * 读取文件为字节数组 + */ + private byte[] readFileToBytes(File file) throws IOException + { + try (FileInputStream fis = new FileInputStream(file)) + { + byte[] data = new byte[(int) file.length()]; + fis.read(data); + return data; + } + } + + /** + * 根据文件名获取音频格式 + */ + private String getAudioFormat(String filename) + { + if (filename == null) + { + return "mp3"; + } + + String lowerName = filename.toLowerCase(); + if (lowerName.endsWith(".wav")) + { + return "wav"; + } + else if (lowerName.endsWith(".pcm")) + { + return "pcm"; + } + else if (lowerName.endsWith(".m4a")) + { + return "m4a"; + } + else + { + return "mp3"; // 默认MP3 + } + } + + /** + * 检查服务是否可用 + */ + public boolean isAvailable() + { + return client != null && !APP_ID.equals("你的APP_ID"); + } +} diff --git a/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/VoiceSpeechController.java b/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/VoiceSpeechController.java new file mode 100644 index 0000000..0902d91 --- /dev/null +++ b/Study-Vue-redis/ry-study-admin/src/main/java/com/ddnai/web/controller/study/VoiceSpeechController.java @@ -0,0 +1,222 @@ +package com.ddnai.web.controller.study; + +import java.io.File; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; +import com.ddnai.common.core.controller.BaseController; +import com.ddnai.common.core.domain.AjaxResult; + +/** + * 语音识别服务(使用百度云API) + * 主机访问百度云,内网设备通过主机使用语音识别 + * + * @author ddnai + */ +@RestController +@RequestMapping("/api/speech") +public class VoiceSpeechController extends BaseController +{ + private static final Logger log = LoggerFactory.getLogger(VoiceSpeechController.class); + + @Autowired + private BaiduSpeechService baiduSpeechService; + + /** + * 健康检查接口 + */ + @GetMapping("/health") + public AjaxResult health() + { + try + { + boolean available = baiduSpeechService.isAvailable(); + + if (available) + { + log.info("百度语音服务正常"); + return success("语音服务正常(百度云API)"); + } + else + { + log.warn("百度语音服务未配置"); + return error("语音服务未配置,请填写百度API密钥"); + } + } + catch (Exception e) + { + log.error("语音服务检查失败", e); + return error("语音服务不可用: " + e.getMessage()); + } + } + + /** + * 语音识别接口 + * 使用百度云API进行识别(主机访问百度云,内网设备通过主机使用) + */ + @PostMapping("/recognize") + public AjaxResult recognize( + @RequestParam("audio") MultipartFile audioFile, + @RequestParam(value = "referenceText", required = false) String referenceText, + @RequestParam(value = "format", required = false, defaultValue = "wav") String format) + { + File tempInputFile = null; + + try + { + log.info("收到语音识别请求 - 文件名: {}, 大小: {} bytes, 格式: {}, 参考文本: {}", + audioFile.getOriginalFilename(), audioFile.getSize(), format, referenceText); + + // 保存上传的文件,使用指定的格式 + tempInputFile = File.createTempFile("voice_", "." + format); + audioFile.transferTo(tempInputFile); + + log.info("音频文件已保存: {}, 大小: {} bytes", + tempInputFile.getAbsolutePath(), tempInputFile.length()); + + // 调用百度API识别,传入格式参数 + log.info("调用百度云API识别,格式: {}...", format); + String recognizedText = baiduSpeechService.recognizeAudio(tempInputFile, format); + + if (recognizedText == null || recognizedText.isEmpty()) + { + log.warn("未识别到有效语音"); + return error("未识别到有效语音。请确保:1) 录音时长至少1秒;2) 说话声音清晰;3) 麦克风权限已授予"); + } + + log.info("识别成功: {}", recognizedText); + + // 计算相似度评分 + double score = calculateSimilarity(recognizedText, referenceText != null ? referenceText : ""); + double pronunciationScore = Math.max(0, score - 5); + double fluencyScore = Math.max(0, score - 3); + + log.info("相似度: {}分", score); + + // 返回结果 + java.util.Map data = new java.util.HashMap<>(); + data.put("recognizedText", recognizedText); + data.put("score", score); + data.put("pronunciationScore", pronunciationScore); + data.put("fluencyScore", fluencyScore); + data.put("status", "completed"); + + return success(data); + } + catch (java.io.IOException e) + { + log.error("文件处理失败", e); + return error("文件处理失败: " + e.getMessage()); + } + catch (Exception e) + { + log.error("语音识别请求失败", e); + return error("语音识别失败: " + e.getMessage()); + } + finally + { + // 清理临时文件 + if (tempInputFile != null && tempInputFile.exists()) + { + if (tempInputFile.delete()) + { + log.debug("临时文件已删除: {}", tempInputFile.getAbsolutePath()); + } + else + { + log.warn("临时文件删除失败: {}", tempInputFile.getAbsolutePath()); + } + } + } + } + + /** + * 获取文件扩展名 + */ + private String getFileExtension(String filename) + { + if (filename == null || filename.isEmpty()) + { + return "mp3"; + } + + int lastDotIndex = filename.lastIndexOf('.'); + if (lastDotIndex > 0 && lastDotIndex < filename.length() - 1) + { + return filename.substring(lastDotIndex + 1); + } + + return "mp3"; + } + + /** + * 计算文本相似度(0-100分) + */ + private double calculateSimilarity(String text1, String text2) + { + if (text1 == null || text1.isEmpty() || text2 == null || text2.isEmpty()) + { + return 0.0; + } + + // 去除空格 + text1 = text1.replaceAll("\\s+", ""); + text2 = text2.replaceAll("\\s+", ""); + + if (text1.isEmpty() || text2.isEmpty()) + { + return 0.0; + } + + // 使用编辑距离计算相似度 + int distance = levenshteinDistance(text1, text2); + int maxLength = Math.max(text1.length(), text2.length()); + + double similarity = (1.0 - (double) distance / maxLength) * 100; + return Math.round(similarity * 100.0) / 100.0; // 保留两位小数 + } + + /** + * 计算编辑距离(Levenshtein距离) + */ + private int levenshteinDistance(String s1, String s2) + { + int len1 = s1.length(); + int len2 = s2.length(); + + int[][] dp = new int[len1 + 1][len2 + 1]; + + for (int i = 0; i <= len1; i++) + { + dp[i][0] = i; + } + + for (int j = 0; j <= len2; j++) + { + dp[0][j] = j; + } + + for (int i = 1; i <= len1; i++) + { + for (int j = 1; j <= len2; j++) + { + if (s1.charAt(i - 1) == s2.charAt(j - 1)) + { + dp[i][j] = dp[i - 1][j - 1]; + } + else + { + dp[i][j] = Math.min(Math.min(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1]) + 1; + } + } + } + + return dp[len1][len2]; + } +} diff --git a/baidu_speech_server.py b/baidu_speech_server.py new file mode 100644 index 0000000..08bf938 --- /dev/null +++ b/baidu_speech_server.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +百度语音识别服务(超级简单,不需要ffmpeg) +免费额度:每天50000次 +""" + +from flask import Flask, request, jsonify +from flask_cors import CORS +import base64 +import json +from difflib import SequenceMatcher + +app = Flask(__name__) +CORS(app) + +# 百度语音识别配置(需要申请) +# 免费申请地址:https://console.bce.baidu.com/ai/#/ai/speech/overview/index +BAIDU_APP_ID = "你的APP_ID" # ← 需要替换 +BAIDU_API_KEY = "你的API_KEY" # ← 需要替换 +BAIDU_SECRET_KEY = "你的SECRET_KEY" # ← 需要替换 + +# 懒加载客户端 +asr_client = None + +def get_asr_client(): + """获取百度语音识别客户端""" + global asr_client + if asr_client is None: + try: + from aip import AipSpeech + asr_client = AipSpeech(BAIDU_APP_ID, BAIDU_API_KEY, BAIDU_SECRET_KEY) + print("✓ 百度语音客户端初始化成功") + except ImportError: + print("✗ 未安装百度SDK,请运行: pip install baidu-aip") + return None + except Exception as e: + print(f"✗ 初始化失败: {str(e)}") + return None + return asr_client + +def recognize_audio_baidu(audio_data, format='mp3'): + """使用百度API识别音频""" + try: + client = get_asr_client() + if not client: + return None, "百度语音客户端未初始化" + + # 百度API识别 + result = client.asr(audio_data, format, 16000, { + 'dev_pid': 1537, # 中文普通话 + }) + + if result['err_no'] == 0: + text = ''.join(result['result']) + return text, None + else: + return None, f"识别失败: {result.get('err_msg', '未知错误')}" + + except Exception as e: + return None, str(e) + +def calculate_similarity(text1, text2): + """计算文本相似度(0-100分)""" + if not text1 or not text2: + return 0 + + text1 = text1.replace(" ", "") + text2 = text2.replace(" ", "") + + if not text1 or not text2: + return 0 + + similarity = SequenceMatcher(None, text1, text2).ratio() + return round(similarity * 100, 2) + +@app.route('/api/speech/recognize', methods=['POST']) +def recognize(): + """语音识别接口""" + try: + # 检查文件 + if 'audio' not in request.files: + return jsonify({ + 'code': 400, + 'msg': '未上传音频文件' + }), 400 + + audio_file = request.files['audio'] + reference_text = request.form.get('referenceText', '') + + # 读取音频数据 + audio_data = audio_file.read() + + print(f"收到音频: {len(audio_data)} bytes") + print(f"参考文本: {reference_text}") + + # 识别音频(百度API自动处理格式) + recognized_text, error = recognize_audio_baidu(audio_data, format='mp3') + + if error: + return jsonify({ + 'code': 500, + 'msg': f'识别失败: {error}' + }), 500 + + if not recognized_text: + return jsonify({ + 'code': 500, + 'msg': '未识别到有效语音' + }), 500 + + # 计算评分 + score = calculate_similarity(recognized_text, reference_text) + pronunciation_score = max(0, score - 5) + fluency_score = max(0, score - 3) + + print(f"识别结果: {recognized_text}") + print(f"相似度: {score}分") + + return jsonify({ + 'code': 200, + 'msg': '成功', + 'data': { + 'recognizedText': recognized_text, + 'score': score, + 'pronunciationScore': pronunciation_score, + 'fluencyScore': fluency_score, + 'status': 'completed' + } + }) + + except Exception as e: + print(f"处理错误: {str(e)}") + return jsonify({ + 'code': 500, + 'msg': f'处理失败: {str(e)}' + }), 500 + +@app.route('/api/speech/health', methods=['GET']) +def health(): + """健康检查""" + client = get_asr_client() + return jsonify({ + 'code': 200, + 'msg': '服务正常', + 'data': { + 'engine': 'baidu', + 'client_ready': client is not None + } + }) + +if __name__ == '__main__': + print("=" * 50) + print("百度语音识别服务(超简单,无需ffmpeg)") + print("=" * 50) + print("") + print("1. 安装依赖: pip install baidu-aip") + print("2. 申请百度API: https://console.bce.baidu.com/ai/#/ai/speech/overview/index") + print("3. 填写 APP_ID, API_KEY, SECRET_KEY") + print("") + + if BAIDU_APP_ID == "你的APP_ID": + print("⚠️ 请先配置百度API密钥!") + print("") + + print("=" * 50) + print("服务启动成功!") + print("访问地址: http://localhost:5000") + print("=" * 50) + print("") + + app.run(host='0.0.0.0', port=5000, debug=False) diff --git a/fronted_uniapp/pages/speech/speech.vue b/fronted_uniapp/pages/speech/speech.vue index cbc0019..10082c1 100644 --- a/fronted_uniapp/pages/speech/speech.vue +++ b/fronted_uniapp/pages/speech/speech.vue @@ -79,16 +79,6 @@ - - -