# -*- coding: utf-8 -*- """ RAG 知识库服务 - Flask API 支持与 jar 包同级目录部署 """ import os import sys from flask import Flask, request, jsonify from flask_cors import CORS from config import HOST, PORT, KNOWLEDGE_DIR, BASE_DIR from knowledge_service import knowledge_service from file_watcher import FileWatcher app = Flask(__name__) CORS(app) # 允许跨域请求 # 文件监控器 file_watcher = None @app.route('/api/health', methods=['GET']) def health_check(): """健康检查""" return jsonify({ 'status': 'ok', 'service': 'RAG Knowledge Service', 'knowledge_dir': KNOWLEDGE_DIR, 'base_dir': BASE_DIR }) @app.route('/api/documents', methods=['GET']) def list_documents(): """列出所有文档""" try: documents = knowledge_service.list_documents() return jsonify({ 'success': True, 'data': documents }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/documents/upload', methods=['POST']) def upload_document(): """上传文档""" try: if 'file' not in request.files: return jsonify({'success': False, 'error': '没有上传文件'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'success': False, 'error': '文件名为空'}), 400 result = knowledge_service.upload_and_index(file) if result['success']: return jsonify({ 'success': True, 'data': result }) else: return jsonify({'success': False, 'error': result.get('error')}), 400 except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/documents/', methods=['DELETE']) def delete_document(filename): """删除文档""" try: result = knowledge_service.delete_document(filename) return jsonify({ 'success': result['success'], 'data': result }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/search', methods=['POST']) def search(): """搜索文档""" try: data = request.get_json() query = data.get('query', '') top_k = data.get('top_k', 5) if not query: return jsonify({'success': False, 'error': '查询内容不能为空'}), 400 results = knowledge_service.search(query, top_k) # 打印调试信息 print(f"[Search] Query: {query[:50]}..., Results: {len(results)}") for i, r in enumerate(results): print(f" [{i+1}] filename: {r.get('filename')}, content_len: {len(r.get('content', ''))}") return jsonify({ 'success': True, 'data': results }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/stats', methods=['GET']) def get_stats(): """获取统计信息""" try: stats = knowledge_service.get_stats() return jsonify({ 'success': True, 'data': stats }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/rebuild', methods=['POST']) def rebuild_index(): """重建索引""" try: result = knowledge_service.rebuild_index() return jsonify({ 'success': True, 'data': result }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/scan', methods=['POST']) def scan_folder(): """扫描文件夹并索引新文件""" try: result = knowledge_service.scan_and_index_folder() return jsonify({ 'success': True, 'data': result }) except Exception as e: return jsonify({'success': False, 'error': str(e)}), 500 @app.route('/api/rag-analyze', methods=['POST']) def rag_analyze(): """RAG增强的AI分析 - 结合知识库生成分析报告""" try: data = request.get_json() report_content = data.get('reportContent', '') report_title = data.get('reportTitle', '心理测评报告') if not report_content: return jsonify({'success': False, 'error': '报告内容不能为空'}), 400 # 1. 从报告中提取关键词进行知识库检索 # 提取纯文本(去除HTML标签) import re text_content = re.sub(r'<[^>]*>', '', report_content) # 提取关键信息用于检索 query_keywords = [] # 提取因子名称 factor_matches = re.findall(r'([\u4e00-\u9fa5]+(?:焦虑|抑郁|压力|情绪|睡眠|躯体|认知|人格|心理)[\u4e00-\u9fa5]*)', text_content) query_keywords.extend(factor_matches[:5]) # 提取等级 level_matches = re.findall(r'(正常|轻度|中度|重度|严重)', text_content) query_keywords.extend(level_matches[:3]) query = ' '.join(set(query_keywords)) if query_keywords else '心理测评 分析 建议' # 2. 检索相关知识 search_results = knowledge_service.search(query, top_k=5) # 3. 构建知识库上下文 knowledge_context = "" sources = [] if search_results and len(search_results) > 0: knowledge_parts = [] for i, result in enumerate(search_results[:5]): content = result.get('content', '') filename = result.get('filename', '未知来源') similarity = result.get('similarity', 0) if content: knowledge_parts.append(f"【参考资料{i+1}】({filename})\n{content[:500]}") sources.append({ 'filename': filename, 'content': content[:200] + '...' if len(content) > 200 else content, 'similarity': similarity }) knowledge_context = '\n\n'.join(knowledge_parts) # 打印调试信息到控制台 print("=" * 50) print("📚 RAG-Analyze 检索结果") print("=" * 50) print(f"查询关键词: {query}") print(f"检索到文档数: {len(sources)}") for i, s in enumerate(sources): print(f" [{i+1}] {s['filename']} (相似度: {s.get('similarity', 0):.4f})") print("=" * 50) # 4. 返回检索结果,让前端调用AI return jsonify({ 'success': True, 'data': { 'knowledgeContext': knowledge_context, 'sources': sources, 'query': query } }) except Exception as e: import traceback traceback.print_exc() return jsonify({'success': False, 'error': str(e)}), 500 def init_service(): """初始化服务""" print("=" * 50) print("RAG 知识库服务启动中...") print("=" * 50) # 初始化知识库服务 knowledge_service.init() # 扫描并索引新文件 knowledge_service.scan_and_index_folder() # 启动文件监控 global file_watcher file_watcher = FileWatcher(knowledge_service) file_watcher.start() print("=" * 50) print(f"服务已启动: http://{HOST}:{PORT}") print(f"知识库文件夹: {KNOWLEDGE_DIR}") print("=" * 50) if __name__ == '__main__': init_service() app.run(host=HOST, port=PORT, debug=False, threaded=True)