加入AI分析知识库
This commit is contained in:
parent
ae39c44fa9
commit
0f490298f3
19
.gitignore
vendored
19
.gitignore
vendored
|
|
@ -60,11 +60,30 @@ rag-python/knowledge_docs/
|
|||
rag-python/uploads/
|
||||
rag-python/__pycache__/
|
||||
rag-python/*.pyc
|
||||
rag-python/*.bat
|
||||
|
||||
######################################################################
|
||||
# Android App Build Output
|
||||
xinli-App/build/
|
||||
xinli-App/app/build/
|
||||
xinli-App/.gradle/
|
||||
xinli-App/local.properties
|
||||
xinli-App/*.keystore
|
||||
xinli-App/*.jks
|
||||
|
||||
# Android App 临时文档和脚本
|
||||
xinli-App/*.md
|
||||
xinli-App/*.bat
|
||||
xinli-App/*.txt
|
||||
|
||||
######################################################################
|
||||
# PaddleSpeech - 大型第三方库
|
||||
PaddleSpeech-develop/
|
||||
|
||||
######################################################################
|
||||
# 项目介绍目录(本地文档)
|
||||
项目介绍/
|
||||
|
||||
######################################################################
|
||||
# 临时文档和调试文件(根目录下的中文 md/sql/bat 文件)
|
||||
/*.md
|
||||
|
|
|
|||
158
rag-python/app.py
Normal file
158
rag-python/app.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
RAG 知识库服务 - Flask API
|
||||
支持与 jar 包同级目录部署
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
from flask import Flask, request, jsonify
|
||||
from flask_cors import CORS
|
||||
from config import HOST, PORT, KNOWLEDGE_DIR, BASE_DIR
|
||||
from knowledge_service import knowledge_service
|
||||
from file_watcher import FileWatcher
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app) # 允许跨域请求
|
||||
|
||||
# 文件监控器
|
||||
file_watcher = None
|
||||
|
||||
@app.route('/api/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""健康检查"""
|
||||
return jsonify({
|
||||
'status': 'ok',
|
||||
'service': 'RAG Knowledge Service',
|
||||
'knowledge_dir': KNOWLEDGE_DIR,
|
||||
'base_dir': BASE_DIR
|
||||
})
|
||||
|
||||
@app.route('/api/documents', methods=['GET'])
|
||||
def list_documents():
|
||||
"""列出所有文档"""
|
||||
try:
|
||||
documents = knowledge_service.list_documents()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': documents
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/documents/upload', methods=['POST'])
|
||||
def upload_document():
|
||||
"""上传文档"""
|
||||
try:
|
||||
if 'file' not in request.files:
|
||||
return jsonify({'success': False, 'error': '没有上传文件'}), 400
|
||||
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return jsonify({'success': False, 'error': '文件名为空'}), 400
|
||||
|
||||
result = knowledge_service.upload_and_index(file)
|
||||
|
||||
if result['success']:
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': result
|
||||
})
|
||||
else:
|
||||
return jsonify({'success': False, 'error': result.get('error')}), 400
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/documents/<filename>', methods=['DELETE'])
|
||||
def delete_document(filename):
|
||||
"""删除文档"""
|
||||
try:
|
||||
result = knowledge_service.delete_document(filename)
|
||||
return jsonify({
|
||||
'success': result['success'],
|
||||
'data': result
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/search', methods=['POST'])
|
||||
def search():
|
||||
"""搜索文档"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
query = data.get('query', '')
|
||||
top_k = data.get('top_k', 5)
|
||||
|
||||
if not query:
|
||||
return jsonify({'success': False, 'error': '查询内容不能为空'}), 400
|
||||
|
||||
results = knowledge_service.search(query, top_k)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': results
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/stats', methods=['GET'])
|
||||
def get_stats():
|
||||
"""获取统计信息"""
|
||||
try:
|
||||
stats = knowledge_service.get_stats()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': stats
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/rebuild', methods=['POST'])
|
||||
def rebuild_index():
|
||||
"""重建索引"""
|
||||
try:
|
||||
result = knowledge_service.rebuild_index()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': result
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/scan', methods=['POST'])
|
||||
def scan_folder():
|
||||
"""扫描文件夹并索引新文件"""
|
||||
try:
|
||||
result = knowledge_service.scan_and_index_folder()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'data': result
|
||||
})
|
||||
except Exception as e:
|
||||
return jsonify({'success': False, 'error': str(e)}), 500
|
||||
|
||||
def init_service():
|
||||
"""初始化服务"""
|
||||
print("=" * 50)
|
||||
print("RAG 知识库服务启动中...")
|
||||
print("=" * 50)
|
||||
|
||||
# 初始化知识库服务
|
||||
knowledge_service.init()
|
||||
|
||||
# 扫描并索引新文件
|
||||
knowledge_service.scan_and_index_folder()
|
||||
|
||||
# 启动文件监控
|
||||
global file_watcher
|
||||
file_watcher = FileWatcher(knowledge_service)
|
||||
file_watcher.start()
|
||||
|
||||
print("=" * 50)
|
||||
print(f"服务已启动: http://{HOST}:{PORT}")
|
||||
print(f"知识库文件夹: {KNOWLEDGE_DIR}")
|
||||
print("=" * 50)
|
||||
|
||||
if __name__ == '__main__':
|
||||
init_service()
|
||||
app.run(host=HOST, port=PORT, debug=False, threaded=True)
|
||||
155
rag-python/batch_index.py
Normal file
155
rag-python/batch_index.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
批量索引脚本 - 用于处理大文件
|
||||
直接运行此脚本来索引 knowledge_docs 目录中的所有文件
|
||||
|
||||
使用方法:
|
||||
1. 将 PDF 文件放入 rag-python/knowledge_docs/ 目录
|
||||
2. 运行: python batch_index.py
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
# 添加当前目录到路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from config import KNOWLEDGE_DIR, CHUNK_SIZE
|
||||
from document_parser import parse_document, is_supported_file
|
||||
from text_splitter import split_text
|
||||
from vector_store import vector_store
|
||||
|
||||
def format_time(seconds):
|
||||
"""格式化时间"""
|
||||
if seconds < 60:
|
||||
return f"{seconds:.1f}秒"
|
||||
elif seconds < 3600:
|
||||
return f"{seconds/60:.1f}分钟"
|
||||
else:
|
||||
return f"{seconds/3600:.1f}小时"
|
||||
|
||||
def estimate_time(char_count):
|
||||
"""估算处理时间"""
|
||||
# 每300字符一个块,每块约1.5秒
|
||||
chunks = char_count / CHUNK_SIZE
|
||||
seconds = chunks * 1.5
|
||||
return format_time(seconds)
|
||||
|
||||
def batch_index():
|
||||
"""批量索引所有文件"""
|
||||
print("=" * 60)
|
||||
print("批量索引工具")
|
||||
print("=" * 60)
|
||||
print(f"知识库目录: {KNOWLEDGE_DIR}")
|
||||
print(f"分块大小: {CHUNK_SIZE} 字符")
|
||||
print()
|
||||
|
||||
# 加载现有索引
|
||||
print("加载现有索引...")
|
||||
vector_store.load_index()
|
||||
stats = vector_store.get_stats()
|
||||
indexed_files = set(stats.get('files', []))
|
||||
print(f"已索引文件: {len(indexed_files)} 个")
|
||||
print()
|
||||
|
||||
# 扫描文件
|
||||
files_to_process = []
|
||||
for filename in os.listdir(KNOWLEDGE_DIR):
|
||||
file_path = os.path.join(KNOWLEDGE_DIR, filename)
|
||||
if os.path.isfile(file_path) and is_supported_file(filename):
|
||||
if filename not in indexed_files:
|
||||
file_size = os.path.getsize(file_path)
|
||||
files_to_process.append((filename, file_path, file_size))
|
||||
|
||||
if not files_to_process:
|
||||
print("没有新文件需要索引。")
|
||||
print(f"如需重新索引,请先删除 index_data 目录中的文件。")
|
||||
return
|
||||
|
||||
# 显示待处理文件
|
||||
print(f"发现 {len(files_to_process)} 个新文件:")
|
||||
total_size = 0
|
||||
for filename, _, size in files_to_process:
|
||||
size_mb = size / (1024 * 1024)
|
||||
total_size += size
|
||||
print(f" - {filename} ({size_mb:.1f} MB)")
|
||||
|
||||
print(f"\n总大小: {total_size / (1024 * 1024):.1f} MB")
|
||||
print()
|
||||
|
||||
# 确认处理
|
||||
confirm = input("是否开始处理?(y/n): ").strip().lower()
|
||||
if confirm != 'y':
|
||||
print("已取消。")
|
||||
return
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("开始处理...")
|
||||
print("=" * 60)
|
||||
|
||||
total_start = time.time()
|
||||
success_count = 0
|
||||
fail_count = 0
|
||||
|
||||
for i, (filename, file_path, file_size) in enumerate(files_to_process):
|
||||
print()
|
||||
print(f"[{i+1}/{len(files_to_process)}] 处理: {filename}")
|
||||
print("-" * 40)
|
||||
|
||||
file_start = time.time()
|
||||
|
||||
try:
|
||||
# 解析文档
|
||||
print("解析文档...")
|
||||
content = parse_document(file_path)
|
||||
|
||||
if not content or not content.strip():
|
||||
print(f" 警告: 文档内容为空,跳过")
|
||||
fail_count += 1
|
||||
continue
|
||||
|
||||
char_count = len(content)
|
||||
print(f" 提取文本: {char_count} 字符")
|
||||
print(f" 预计处理时间: {estimate_time(char_count)}")
|
||||
|
||||
# 分块
|
||||
print("分块处理...")
|
||||
chunks = split_text(content)
|
||||
print(f" 生成 {len(chunks)} 个文本块")
|
||||
|
||||
# 向量化
|
||||
print("向量化处理...")
|
||||
metadata = {
|
||||
'filename': filename,
|
||||
'file_path': file_path,
|
||||
'char_count': char_count
|
||||
}
|
||||
|
||||
added = vector_store.add_documents(chunks, metadata)
|
||||
|
||||
file_time = time.time() - file_start
|
||||
print(f" 完成! 耗时: {format_time(file_time)}")
|
||||
success_count += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" 错误: {e}")
|
||||
fail_count += 1
|
||||
|
||||
# 总结
|
||||
total_time = time.time() - total_start
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("处理完成!")
|
||||
print("=" * 60)
|
||||
print(f"成功: {success_count} 个文件")
|
||||
print(f"失败: {fail_count} 个文件")
|
||||
print(f"总耗时: {format_time(total_time)}")
|
||||
|
||||
# 显示最终统计
|
||||
final_stats = vector_store.get_stats()
|
||||
print(f"索引总文件数: {final_stats['total_files']}")
|
||||
print(f"索引总文本块: {final_stats['total_chunks']}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
batch_index()
|
||||
51
rag-python/config.py
Normal file
51
rag-python/config.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
RAG 服务配置文件
|
||||
支持与 jar 包同级目录部署
|
||||
使用本地 Ollama 进行向量化
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 服务配置
|
||||
HOST = "0.0.0.0"
|
||||
PORT = 5000
|
||||
|
||||
# Ollama 配置(使用本地 Ollama 生成向量)
|
||||
OLLAMA_URL = "http://localhost:11434"
|
||||
OLLAMA_EMBED_MODEL = "nomic-embed-text" # 你已经下载的嵌入模型
|
||||
|
||||
# 获取程序运行目录(支持打包后部署)
|
||||
# 当与 jar 包同级部署时,BASE_DIR 就是 rag-python 文件夹
|
||||
if getattr(sys, 'frozen', False):
|
||||
# 如果是打包后的 exe
|
||||
BASE_DIR = os.path.dirname(sys.executable)
|
||||
else:
|
||||
# 正常 Python 运行
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# 文件夹配置 - 都在 rag-python 目录下
|
||||
KNOWLEDGE_DIR = os.path.join(BASE_DIR, "knowledge_docs") # 知识库文档目录
|
||||
INDEX_DIR = os.path.join(BASE_DIR, "index_data") # 索引存储目录
|
||||
UPLOAD_DIR = os.path.join(BASE_DIR, "uploads") # 上传文件临时目录
|
||||
|
||||
# 确保目录存在
|
||||
for dir_path in [KNOWLEDGE_DIR, INDEX_DIR, UPLOAD_DIR]:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
# 支持的文件类型
|
||||
SUPPORTED_EXTENSIONS = {'.txt', '.md', '.pdf', '.docx', '.doc'}
|
||||
|
||||
# 文本分块配置
|
||||
CHUNK_SIZE = 300 # 每个文本块的字符数(减小以适应 nomic-embed-text 的 2048 token 限制)
|
||||
CHUNK_OVERLAP = 30 # 文本块之间的重叠字符数
|
||||
|
||||
# 检索配置
|
||||
TOP_K = 5 # 返回最相关的文档数量
|
||||
|
||||
# 打印配置信息
|
||||
print(f"[配置] 程序目录: {BASE_DIR}")
|
||||
print(f"[配置] 知识库目录: {KNOWLEDGE_DIR}")
|
||||
print(f"[配置] 索引目录: {INDEX_DIR}")
|
||||
print(f"[配置] Ollama地址: {OLLAMA_URL}")
|
||||
print(f"[配置] 嵌入模型: {OLLAMA_EMBED_MODEL}")
|
||||
97
rag-python/document_parser.py
Normal file
97
rag-python/document_parser.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
文档解析器 - 支持多种文件格式
|
||||
"""
|
||||
import os
|
||||
import chardet
|
||||
from config import SUPPORTED_EXTENSIONS
|
||||
|
||||
def detect_encoding(file_path):
|
||||
"""检测文件编码"""
|
||||
with open(file_path, 'rb') as f:
|
||||
raw_data = f.read(10000)
|
||||
result = chardet.detect(raw_data)
|
||||
return result['encoding'] or 'utf-8'
|
||||
|
||||
def parse_txt(file_path):
|
||||
"""解析纯文本文件"""
|
||||
encoding = detect_encoding(file_path)
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding, errors='ignore') as f:
|
||||
return f.read()
|
||||
except Exception as e:
|
||||
print(f"解析TXT文件失败 {file_path}: {e}")
|
||||
return ""
|
||||
|
||||
def parse_md(file_path):
|
||||
"""解析Markdown文件"""
|
||||
return parse_txt(file_path)
|
||||
|
||||
def parse_pdf(file_path):
|
||||
"""解析PDF文件(支持大文件)"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
file_size_mb = file_size / (1024 * 1024)
|
||||
print(f" PDF文件大小: {file_size_mb:.1f} MB")
|
||||
|
||||
reader = PdfReader(file_path)
|
||||
total_pages = len(reader.pages)
|
||||
print(f" PDF总页数: {total_pages}")
|
||||
|
||||
text_parts = []
|
||||
for i, page in enumerate(reader.pages):
|
||||
if (i + 1) % 50 == 0 or i == 0:
|
||||
print(f" 解析进度: {i + 1}/{total_pages} 页")
|
||||
try:
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
except Exception as e:
|
||||
print(f" 警告: 第 {i + 1} 页解析失败: {e}")
|
||||
continue
|
||||
|
||||
print(f" PDF解析完成,提取文本 {len(''.join(text_parts))} 字符")
|
||||
return "\n".join(text_parts)
|
||||
except Exception as e:
|
||||
print(f"解析PDF文件失败 {file_path}: {e}")
|
||||
return ""
|
||||
|
||||
def parse_docx(file_path):
|
||||
"""解析Word文档"""
|
||||
try:
|
||||
from docx import Document
|
||||
doc = Document(file_path)
|
||||
text_parts = []
|
||||
for para in doc.paragraphs:
|
||||
if para.text.strip():
|
||||
text_parts.append(para.text)
|
||||
return "\n".join(text_parts)
|
||||
except Exception as e:
|
||||
print(f"解析DOCX文件失败 {file_path}: {e}")
|
||||
return ""
|
||||
|
||||
def parse_document(file_path):
|
||||
"""根据文件类型解析文档"""
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if ext not in SUPPORTED_EXTENSIONS:
|
||||
print(f"不支持的文件类型: {ext}")
|
||||
return ""
|
||||
|
||||
parsers = {
|
||||
'.txt': parse_txt,
|
||||
'.md': parse_md,
|
||||
'.pdf': parse_pdf,
|
||||
'.docx': parse_docx,
|
||||
'.doc': parse_docx,
|
||||
}
|
||||
|
||||
parser = parsers.get(ext, parse_txt)
|
||||
return parser(file_path)
|
||||
|
||||
def is_supported_file(filename):
|
||||
"""检查文件是否支持"""
|
||||
ext = os.path.splitext(filename)[1].lower()
|
||||
return ext in SUPPORTED_EXTENSIONS
|
||||
172
rag-python/file_watcher.py
Normal file
172
rag-python/file_watcher.py
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
文件监控服务 - 监控知识库文件夹的变化
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
import threading
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from config import KNOWLEDGE_DIR
|
||||
from document_parser import is_supported_file
|
||||
|
||||
# 全局变量:正在上传的文件列表(用于避免文件监控器重复处理)
|
||||
uploading_files = set()
|
||||
uploading_lock = threading.Lock()
|
||||
|
||||
def mark_uploading(filename):
|
||||
"""标记文件正在上传"""
|
||||
with uploading_lock:
|
||||
uploading_files.add(filename)
|
||||
|
||||
def unmark_uploading(filename):
|
||||
"""取消上传标记"""
|
||||
with uploading_lock:
|
||||
uploading_files.discard(filename)
|
||||
|
||||
def is_uploading(filename):
|
||||
"""检查文件是否正在上传"""
|
||||
with uploading_lock:
|
||||
return filename in uploading_files
|
||||
|
||||
|
||||
class KnowledgeFileHandler(FileSystemEventHandler):
|
||||
"""文件变化处理器"""
|
||||
|
||||
def __init__(self, knowledge_service):
|
||||
self.knowledge_service = knowledge_service
|
||||
self.pending_files = {} # 防抖:记录待处理的文件
|
||||
self.debounce_seconds = 3 # 增加防抖时间
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _should_process(self, path):
|
||||
"""检查是否应该处理该文件"""
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
filename = os.path.basename(path)
|
||||
# 跳过正在上传的文件
|
||||
if is_uploading(filename):
|
||||
return False
|
||||
return is_supported_file(filename)
|
||||
|
||||
def _debounce_process(self, path, action):
|
||||
"""防抖处理"""
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# 跳过正在上传的文件
|
||||
if is_uploading(filename):
|
||||
print(f"[文件监控] 跳过正在上传的文件: {filename}")
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
self.pending_files[path] = {
|
||||
'action': action,
|
||||
'time': time.time()
|
||||
}
|
||||
|
||||
# 延迟处理
|
||||
def delayed_process():
|
||||
time.sleep(self.debounce_seconds)
|
||||
with self._lock:
|
||||
if path in self.pending_files:
|
||||
info = self.pending_files.pop(path)
|
||||
# 再次检查是否正在上传
|
||||
if not is_uploading(os.path.basename(path)):
|
||||
self._do_process(path, info['action'])
|
||||
|
||||
threading.Thread(target=delayed_process, daemon=True).start()
|
||||
|
||||
def _do_process(self, path, action):
|
||||
"""执行实际处理"""
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# 最后一次检查
|
||||
if is_uploading(filename):
|
||||
return
|
||||
|
||||
try:
|
||||
if action == 'add':
|
||||
# 检查文件是否已经在索引中
|
||||
stats = self.knowledge_service.get_stats()
|
||||
if filename in stats.get('files', []):
|
||||
print(f"[文件监控] 文件已索引,跳过: {filename}")
|
||||
return
|
||||
|
||||
print(f"[文件监控] 检测到新文件: {filename}")
|
||||
result = self.knowledge_service.add_document(path, filename)
|
||||
if result['success']:
|
||||
print(f"[文件监控] 已索引: {filename}, {result['chunks']} 个文本块")
|
||||
else:
|
||||
print(f"[文件监控] 索引失败: {filename}, {result.get('error')}")
|
||||
|
||||
elif action == 'delete':
|
||||
# 确认文件确实不存在
|
||||
if os.path.exists(path):
|
||||
print(f"[文件监控] 文件仍存在,跳过删除: {filename}")
|
||||
return
|
||||
print(f"[文件监控] 检测到文件删除: {filename}")
|
||||
result = self.knowledge_service.delete_document(filename)
|
||||
print(f"[文件监控] 已从索引删除: {filename}")
|
||||
|
||||
elif action == 'modify':
|
||||
# 对于修改事件,只有当文件内容确实变化时才重新索引
|
||||
# 这里简化处理:跳过修改事件,因为上传时已经索引过了
|
||||
print(f"[文件监控] 检测到文件修改,跳过: {filename}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[文件监控] 处理失败 {filename}: {e}")
|
||||
|
||||
def on_created(self, event):
|
||||
if not event.is_directory and self._should_process(event.src_path):
|
||||
self._debounce_process(event.src_path, 'add')
|
||||
|
||||
def on_deleted(self, event):
|
||||
if not event.is_directory:
|
||||
filename = os.path.basename(event.src_path)
|
||||
if is_supported_file(filename) and not is_uploading(filename):
|
||||
self._debounce_process(event.src_path, 'delete')
|
||||
|
||||
def on_modified(self, event):
|
||||
# 暂时禁用修改事件处理,避免与上传冲突
|
||||
pass
|
||||
|
||||
def on_moved(self, event):
|
||||
if not event.is_directory:
|
||||
src_filename = os.path.basename(event.src_path)
|
||||
# 处理移出
|
||||
if is_supported_file(src_filename) and not is_uploading(src_filename):
|
||||
self._debounce_process(event.src_path, 'delete')
|
||||
|
||||
# 处理移入
|
||||
if self._should_process(event.dest_path):
|
||||
self._debounce_process(event.dest_path, 'add')
|
||||
|
||||
|
||||
class FileWatcher:
|
||||
"""文件监控服务"""
|
||||
|
||||
def __init__(self, knowledge_service):
|
||||
self.knowledge_service = knowledge_service
|
||||
self.observer = None
|
||||
self.running = False
|
||||
|
||||
def start(self):
|
||||
"""启动文件监控"""
|
||||
if self.running:
|
||||
return
|
||||
|
||||
print(f"[文件监控] 开始监控文件夹: {KNOWLEDGE_DIR}")
|
||||
|
||||
handler = KnowledgeFileHandler(self.knowledge_service)
|
||||
self.observer = Observer()
|
||||
self.observer.schedule(handler, KNOWLEDGE_DIR, recursive=True)
|
||||
self.observer.start()
|
||||
self.running = True
|
||||
|
||||
def stop(self):
|
||||
"""停止文件监控"""
|
||||
if self.observer:
|
||||
self.observer.stop()
|
||||
self.observer.join()
|
||||
self.running = False
|
||||
print("[文件监控] 已停止")
|
||||
244
rag-python/knowledge_service.py
Normal file
244
rag-python/knowledge_service.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
知识库服务 - 管理文档的添加、删除和检索
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from config import KNOWLEDGE_DIR, UPLOAD_DIR
|
||||
from document_parser import parse_document, is_supported_file
|
||||
from text_splitter import split_text
|
||||
from vector_store import vector_store
|
||||
|
||||
class KnowledgeService:
|
||||
def __init__(self):
|
||||
self.vector_store = vector_store
|
||||
|
||||
def init(self):
|
||||
"""初始化服务,加载已有索引"""
|
||||
self.vector_store.load_index()
|
||||
|
||||
def scan_and_index_folder(self):
|
||||
"""
|
||||
扫描知识库文件夹并索引所有文档
|
||||
用于启动时或手动重建索引
|
||||
"""
|
||||
print(f"开始扫描知识库文件夹: {KNOWLEDGE_DIR}")
|
||||
|
||||
# 获取已索引的文件
|
||||
stats = self.vector_store.get_stats()
|
||||
indexed_files = set(stats.get('files', []))
|
||||
|
||||
# 扫描文件夹
|
||||
new_files = []
|
||||
for root, dirs, files in os.walk(KNOWLEDGE_DIR):
|
||||
for filename in files:
|
||||
if is_supported_file(filename):
|
||||
file_path = os.path.join(root, filename)
|
||||
rel_path = os.path.relpath(file_path, KNOWLEDGE_DIR)
|
||||
|
||||
if rel_path not in indexed_files:
|
||||
new_files.append((filename, file_path, rel_path))
|
||||
|
||||
# 索引新文件
|
||||
indexed_count = 0
|
||||
for filename, file_path, rel_path in new_files:
|
||||
try:
|
||||
result = self.add_document(file_path, filename)
|
||||
if result['success']:
|
||||
indexed_count += 1
|
||||
print(f" 已索引: {rel_path}")
|
||||
except Exception as e:
|
||||
print(f" 索引失败 {rel_path}: {e}")
|
||||
|
||||
print(f"扫描完成,新索引 {indexed_count} 个文件")
|
||||
return {
|
||||
'scanned': len(new_files),
|
||||
'indexed': indexed_count
|
||||
}
|
||||
|
||||
def add_document(self, file_path, filename=None):
|
||||
"""
|
||||
添加单个文档到知识库
|
||||
|
||||
Args:
|
||||
file_path: 文件路径
|
||||
filename: 文件名(可选)
|
||||
|
||||
Returns:
|
||||
处理结果
|
||||
"""
|
||||
if filename is None:
|
||||
filename = os.path.basename(file_path)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return {'success': False, 'error': '文件不存在'}
|
||||
|
||||
if not is_supported_file(filename):
|
||||
return {'success': False, 'error': '不支持的文件类型'}
|
||||
|
||||
# 解析文档
|
||||
print(f"正在解析文档: {filename}")
|
||||
content = parse_document(file_path)
|
||||
|
||||
if not content or not content.strip():
|
||||
return {'success': False, 'error': '文档内容为空'}
|
||||
|
||||
# 分块
|
||||
chunks = split_text(content)
|
||||
|
||||
if not chunks:
|
||||
return {'success': False, 'error': '文档分块失败'}
|
||||
|
||||
# 元数据
|
||||
metadata = {
|
||||
'filename': filename,
|
||||
'file_path': file_path,
|
||||
'indexed_at': datetime.now().isoformat(),
|
||||
'char_count': len(content)
|
||||
}
|
||||
|
||||
# 添加到向量存储
|
||||
added = self.vector_store.add_documents(chunks, metadata)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'filename': filename,
|
||||
'chunks': added,
|
||||
'char_count': len(content)
|
||||
}
|
||||
|
||||
def upload_and_index(self, file_storage, copy_to_knowledge=True):
|
||||
"""
|
||||
处理上传的文件并索引
|
||||
|
||||
Args:
|
||||
file_storage: Flask 的 FileStorage 对象
|
||||
copy_to_knowledge: 是否复制到知识库文件夹
|
||||
|
||||
Returns:
|
||||
处理结果
|
||||
"""
|
||||
from file_watcher import mark_uploading, unmark_uploading
|
||||
|
||||
filename = file_storage.filename
|
||||
|
||||
if not is_supported_file(filename):
|
||||
return {'success': False, 'error': '不支持的文件类型'}
|
||||
|
||||
# 标记文件正在上传,防止文件监控器干扰
|
||||
mark_uploading(filename)
|
||||
|
||||
# 保存到临时目录
|
||||
temp_path = os.path.join(UPLOAD_DIR, filename)
|
||||
file_storage.save(temp_path)
|
||||
|
||||
try:
|
||||
# 索引文档
|
||||
result = self.add_document(temp_path, filename)
|
||||
|
||||
if result['success'] and copy_to_knowledge:
|
||||
# 复制到知识库文件夹
|
||||
dest_path = os.path.join(KNOWLEDGE_DIR, filename)
|
||||
shutil.copy2(temp_path, dest_path)
|
||||
result['saved_to'] = dest_path
|
||||
|
||||
return result
|
||||
finally:
|
||||
# 清理临时文件
|
||||
if os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
|
||||
# 延迟取消上传标记,给文件监控器足够时间忽略事件
|
||||
import threading
|
||||
def delayed_unmark():
|
||||
import time
|
||||
time.sleep(5)
|
||||
unmark_uploading(filename)
|
||||
threading.Thread(target=delayed_unmark, daemon=True).start()
|
||||
|
||||
def delete_document(self, filename):
|
||||
"""
|
||||
删除文档
|
||||
|
||||
Args:
|
||||
filename: 文件名
|
||||
|
||||
Returns:
|
||||
删除结果
|
||||
"""
|
||||
# 从向量存储删除
|
||||
deleted = self.vector_store.delete_by_filename(filename)
|
||||
|
||||
# 从知识库文件夹删除
|
||||
file_path = os.path.join(KNOWLEDGE_DIR, filename)
|
||||
file_deleted = False
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
file_deleted = True
|
||||
|
||||
return {
|
||||
'success': deleted > 0 or file_deleted,
|
||||
'chunks_deleted': deleted,
|
||||
'file_deleted': file_deleted
|
||||
}
|
||||
|
||||
def search(self, query, top_k=5):
|
||||
"""
|
||||
搜索相关文档
|
||||
|
||||
Args:
|
||||
query: 查询文本
|
||||
top_k: 返回结果数量
|
||||
|
||||
Returns:
|
||||
搜索结果
|
||||
"""
|
||||
results = self.vector_store.search(query, top_k)
|
||||
return results
|
||||
|
||||
def get_stats(self):
|
||||
"""获取知识库统计信息"""
|
||||
return self.vector_store.get_stats()
|
||||
|
||||
def list_documents(self):
|
||||
"""列出所有已索引的文档"""
|
||||
stats = self.vector_store.get_stats()
|
||||
files = stats.get('files', [])
|
||||
|
||||
documents = []
|
||||
for filename in files:
|
||||
# 统计该文件的块数
|
||||
chunk_count = sum(1 for doc in self.vector_store.documents
|
||||
if doc.get('metadata', {}).get('filename') == filename)
|
||||
|
||||
# 获取文件信息
|
||||
file_path = os.path.join(KNOWLEDGE_DIR, filename)
|
||||
file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
|
||||
|
||||
documents.append({
|
||||
'filename': filename,
|
||||
'chunks': chunk_count,
|
||||
'size': file_size,
|
||||
'exists': os.path.exists(file_path)
|
||||
})
|
||||
|
||||
return documents
|
||||
|
||||
def rebuild_index(self):
|
||||
"""重建整个索引"""
|
||||
print("开始重建索引...")
|
||||
|
||||
# 清空现有索引
|
||||
self.vector_store.clear()
|
||||
|
||||
# 重新扫描并索引
|
||||
result = self.scan_and_index_folder()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'indexed': result['indexed']
|
||||
}
|
||||
|
||||
# 全局实例
|
||||
knowledge_service = KnowledgeService()
|
||||
11
rag-python/requirements.txt
Normal file
11
rag-python/requirements.txt
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# RAG 知识库服务依赖(使用本地 Ollama)
|
||||
flask>=2.0.0
|
||||
flask-cors>=4.0.0
|
||||
faiss-cpu>=1.7.0
|
||||
numpy>=1.21.0
|
||||
watchdog>=3.0.0
|
||||
pypdf2>=3.0.0
|
||||
python-docx>=0.8.0
|
||||
chardet>=5.0.0
|
||||
jieba>=0.42.0
|
||||
requests>=2.28.0
|
||||
87
rag-python/text_splitter.py
Normal file
87
rag-python/text_splitter.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
文本分块器 - 将长文本分割成小块
|
||||
"""
|
||||
import re
|
||||
from config import CHUNK_SIZE, CHUNK_OVERLAP
|
||||
|
||||
def split_text(text, chunk_size=None, chunk_overlap=None):
|
||||
"""
|
||||
将文本分割成小块
|
||||
|
||||
Args:
|
||||
text: 要分割的文本
|
||||
chunk_size: 每块的最大字符数
|
||||
chunk_overlap: 块之间的重叠字符数
|
||||
|
||||
Returns:
|
||||
文本块列表
|
||||
"""
|
||||
if chunk_size is None:
|
||||
chunk_size = CHUNK_SIZE
|
||||
if chunk_overlap is None:
|
||||
chunk_overlap = CHUNK_OVERLAP
|
||||
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
# 清理文本
|
||||
text = text.strip()
|
||||
text = re.sub(r'\n{3,}', '\n\n', text) # 多个换行合并
|
||||
text = re.sub(r' {2,}', ' ', text) # 多个空格合并
|
||||
|
||||
# 按段落分割
|
||||
paragraphs = re.split(r'\n\n+', text)
|
||||
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
for para in paragraphs:
|
||||
para = para.strip()
|
||||
if not para:
|
||||
continue
|
||||
|
||||
# 如果当前段落本身就超过chunk_size,需要进一步分割
|
||||
if len(para) > chunk_size:
|
||||
# 先保存当前chunk
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = ""
|
||||
|
||||
# 按句子分割长段落
|
||||
sentences = re.split(r'([。!?.!?])', para)
|
||||
temp_chunk = ""
|
||||
|
||||
for i in range(0, len(sentences), 2):
|
||||
sentence = sentences[i]
|
||||
if i + 1 < len(sentences):
|
||||
sentence += sentences[i + 1]
|
||||
|
||||
if len(temp_chunk) + len(sentence) <= chunk_size:
|
||||
temp_chunk += sentence
|
||||
else:
|
||||
if temp_chunk:
|
||||
chunks.append(temp_chunk.strip())
|
||||
temp_chunk = sentence[-chunk_overlap:] + sentence if len(sentence) > chunk_overlap else sentence
|
||||
|
||||
if temp_chunk:
|
||||
current_chunk = temp_chunk
|
||||
else:
|
||||
# 检查是否可以添加到当前chunk
|
||||
if len(current_chunk) + len(para) + 1 <= chunk_size:
|
||||
current_chunk += ("\n" if current_chunk else "") + para
|
||||
else:
|
||||
# 保存当前chunk,开始新的
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
# 保留重叠部分
|
||||
if len(current_chunk) > chunk_overlap:
|
||||
current_chunk = current_chunk[-chunk_overlap:] + "\n" + para
|
||||
else:
|
||||
current_chunk = para
|
||||
|
||||
# 保存最后一个chunk
|
||||
if current_chunk.strip():
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return chunks
|
||||
326
rag-python/vector_store.py
Normal file
326
rag-python/vector_store.py
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
向量存储 - 使用 Ollama 生成向量,FAISS 进行索引和检索
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
import requests
|
||||
from config import INDEX_DIR, OLLAMA_URL, OLLAMA_EMBED_MODEL, TOP_K
|
||||
|
||||
class VectorStore:
|
||||
def __init__(self):
|
||||
self.index = None
|
||||
self.documents = [] # 存储文档内容和元数据
|
||||
self.dimension = 768 # nomic-embed-text 的向量维度
|
||||
self.index_file = os.path.join(INDEX_DIR, "faiss.index")
|
||||
self.docs_file = os.path.join(INDEX_DIR, "documents.json")
|
||||
self.faiss = None
|
||||
|
||||
def _load_faiss(self):
|
||||
"""懒加载 FAISS"""
|
||||
if self.faiss is None:
|
||||
import faiss
|
||||
self.faiss = faiss
|
||||
|
||||
def _embed_with_ollama(self, text, retry_count=3):
|
||||
"""使用 Ollama 生成向量,带重试机制"""
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
url = f"{OLLAMA_URL}/api/embeddings"
|
||||
|
||||
# 确保文本不为空且是字符串
|
||||
if not text or not isinstance(text, str):
|
||||
text = "empty"
|
||||
|
||||
# 清理文本中的特殊字符
|
||||
text = text.replace('\x00', '') # 移除 null 字符
|
||||
|
||||
# 截断过长的文本(nomic-embed-text 上下文限制约 2048 tokens)
|
||||
# 中文约 1.5 字符/token,保守设置为 1000 字符
|
||||
max_length = 1000
|
||||
if len(text) > max_length:
|
||||
text = text[:max_length]
|
||||
|
||||
payload = {
|
||||
"model": OLLAMA_EMBED_MODEL,
|
||||
"prompt": text
|
||||
}
|
||||
|
||||
last_error = None
|
||||
for attempt in range(retry_count):
|
||||
try:
|
||||
# 使用 urllib 代替 requests,避免潜在的编码问题
|
||||
data = json.dumps(payload, ensure_ascii=False).encode('utf-8')
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=data,
|
||||
headers={'Content-Type': 'application/json; charset=utf-8'},
|
||||
method='POST'
|
||||
)
|
||||
|
||||
with urllib.request.urlopen(req, timeout=120) as response:
|
||||
result = json.loads(response.read().decode('utf-8'))
|
||||
return result.get("embedding", [])
|
||||
|
||||
except urllib.error.HTTPError as e:
|
||||
last_error = e
|
||||
error_body = e.read().decode('utf-8') if e.fp else 'N/A'
|
||||
print(f"Ollama HTTP 错误 (尝试 {attempt+1}/{retry_count}): {e.code} {e.reason}")
|
||||
print(f"响应内容: {error_body[:500]}")
|
||||
print(f"请求文本长度: {len(text)}")
|
||||
if attempt < retry_count - 1:
|
||||
wait_time = (attempt + 1) * 2
|
||||
print(f"等待 {wait_time} 秒后重试...")
|
||||
time.sleep(wait_time)
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
print(f"Ollama 嵌入失败 (尝试 {attempt+1}/{retry_count}): {e}")
|
||||
if attempt < retry_count - 1:
|
||||
wait_time = (attempt + 1) * 2
|
||||
print(f"等待 {wait_time} 秒后重试...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
raise last_error
|
||||
|
||||
def _embed_batch(self, texts):
|
||||
"""批量生成向量"""
|
||||
import time
|
||||
embeddings = []
|
||||
for i, text in enumerate(texts):
|
||||
# 打印文本信息用于调试
|
||||
print(f" 生成向量 {i+1}/{len(texts)}...")
|
||||
print(f" 文本长度: {len(text)}, 前50字符: {repr(text[:50])}")
|
||||
embedding = self._embed_with_ollama(text)
|
||||
embeddings.append(embedding)
|
||||
# 添加小延迟避免请求过快
|
||||
if i < len(texts) - 1:
|
||||
time.sleep(1.0)
|
||||
return embeddings
|
||||
for i, text in enumerate(texts):
|
||||
print(f" 生成向量 {i+1}/{len(texts)}...")
|
||||
embedding = self._embed_with_ollama(text)
|
||||
embeddings.append(embedding)
|
||||
# 添加小延迟避免请求过快
|
||||
if i < len(texts) - 1:
|
||||
time.sleep(0.5)
|
||||
return embeddings
|
||||
|
||||
def _init_index(self):
|
||||
"""初始化 FAISS 索引"""
|
||||
self._load_faiss()
|
||||
if self.index is None:
|
||||
self.index = self.faiss.IndexFlatIP(self.dimension)
|
||||
|
||||
def load_index(self):
|
||||
"""从磁盘加载索引"""
|
||||
self._load_faiss()
|
||||
|
||||
if os.path.exists(self.index_file) and os.path.exists(self.docs_file):
|
||||
try:
|
||||
print("正在加载已有索引...")
|
||||
|
||||
# FAISS 在 Windows 上不支持中文路径,使用临时文件
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
try:
|
||||
# 复制到临时文件再读取
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.index') as tmp:
|
||||
tmp_path = tmp.name
|
||||
shutil.copy2(self.index_file, tmp_path)
|
||||
self.index = self.faiss.read_index(tmp_path)
|
||||
os.unlink(tmp_path)
|
||||
except Exception as e:
|
||||
print(f"临时文件方式失败,尝试直接读取: {e}")
|
||||
self.index = self.faiss.read_index(self.index_file)
|
||||
|
||||
with open(self.docs_file, 'r', encoding='utf-8') as f:
|
||||
self.documents = json.load(f)
|
||||
print(f"索引加载完成,共 {len(self.documents)} 个文档块")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"加载索引失败: {e}")
|
||||
self._init_index()
|
||||
self.documents = []
|
||||
return False
|
||||
else:
|
||||
print("未找到已有索引,创建新索引")
|
||||
self._init_index()
|
||||
self.documents = []
|
||||
return False
|
||||
|
||||
def save_index(self):
|
||||
"""保存索引到磁盘"""
|
||||
self._load_faiss()
|
||||
if self.index is not None:
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(self.index_file), exist_ok=True)
|
||||
|
||||
# FAISS 在 Windows 上不支持中文路径,使用临时文件再移动
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
try:
|
||||
# 先写入临时文件
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix='.index') as tmp:
|
||||
tmp_path = tmp.name
|
||||
|
||||
self.faiss.write_index(self.index, tmp_path)
|
||||
|
||||
# 移动到目标位置
|
||||
shutil.move(tmp_path, self.index_file)
|
||||
except Exception as e:
|
||||
# 如果临时文件方式失败,尝试直接写入
|
||||
print(f"临时文件方式失败,尝试直接写入: {e}")
|
||||
self.faiss.write_index(self.index, self.index_file)
|
||||
|
||||
with open(self.docs_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(self.documents, f, ensure_ascii=False, indent=2)
|
||||
print(f"索引已保存,共 {len(self.documents)} 个文档块")
|
||||
|
||||
def add_documents(self, chunks, metadata=None):
|
||||
"""添加文档块到索引"""
|
||||
if not chunks:
|
||||
return 0
|
||||
|
||||
self._load_faiss()
|
||||
self._init_index()
|
||||
|
||||
# 使用 Ollama 生成向量
|
||||
print(f"正在为 {len(chunks)} 个文本块生成向量...")
|
||||
embeddings = self._embed_batch(chunks)
|
||||
|
||||
# 检查向量维度
|
||||
if embeddings and len(embeddings[0]) != self.dimension:
|
||||
self.dimension = len(embeddings[0])
|
||||
self.index = self.faiss.IndexFlatIP(self.dimension)
|
||||
print(f"更新向量维度为: {self.dimension}")
|
||||
|
||||
# 归一化向量(用于余弦相似度)
|
||||
embeddings_np = np.array(embeddings).astype('float32')
|
||||
norms = np.linalg.norm(embeddings_np, axis=1, keepdims=True)
|
||||
embeddings_np = embeddings_np / (norms + 1e-10)
|
||||
|
||||
# 添加到索引
|
||||
start_idx = len(self.documents)
|
||||
self.index.add(embeddings_np)
|
||||
|
||||
# 保存文档内容和元数据
|
||||
for i, chunk in enumerate(chunks):
|
||||
doc = {
|
||||
'id': start_idx + i,
|
||||
'content': chunk,
|
||||
'metadata': metadata or {}
|
||||
}
|
||||
self.documents.append(doc)
|
||||
|
||||
# 自动保存
|
||||
self.save_index()
|
||||
|
||||
return len(chunks)
|
||||
|
||||
def search(self, query, top_k=None):
|
||||
"""搜索相关文档"""
|
||||
if top_k is None:
|
||||
top_k = TOP_K
|
||||
|
||||
if self.index is None or self.index.ntotal == 0:
|
||||
return []
|
||||
|
||||
self._load_faiss()
|
||||
|
||||
# 生成查询向量
|
||||
query_embedding = self._embed_with_ollama(query)
|
||||
query_np = np.array([query_embedding]).astype('float32')
|
||||
|
||||
# 归一化
|
||||
norm = np.linalg.norm(query_np)
|
||||
query_np = query_np / (norm + 1e-10)
|
||||
|
||||
# 搜索
|
||||
k = min(top_k, self.index.ntotal)
|
||||
scores, indices = self.index.search(query_np, k)
|
||||
|
||||
# 构建结果
|
||||
results = []
|
||||
for i, idx in enumerate(indices[0]):
|
||||
if idx < len(self.documents) and idx >= 0:
|
||||
doc = self.documents[idx]
|
||||
results.append({
|
||||
'content': doc['content'],
|
||||
'score': float(scores[0][i]),
|
||||
'metadata': doc.get('metadata', {})
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def delete_by_filename(self, filename):
|
||||
"""删除指定文件的所有文档块"""
|
||||
if not self.documents:
|
||||
return 0
|
||||
|
||||
self._load_faiss()
|
||||
|
||||
# 找出要保留的文档
|
||||
remaining_docs = []
|
||||
deleted_count = 0
|
||||
|
||||
for doc in self.documents:
|
||||
if doc.get('metadata', {}).get('filename') != filename:
|
||||
remaining_docs.append(doc)
|
||||
else:
|
||||
deleted_count += 1
|
||||
|
||||
if deleted_count > 0:
|
||||
# 重建索引
|
||||
self.documents = []
|
||||
self.index = self.faiss.IndexFlatIP(self.dimension)
|
||||
|
||||
if remaining_docs:
|
||||
chunks = [doc['content'] for doc in remaining_docs]
|
||||
metadatas = [doc.get('metadata', {}) for doc in remaining_docs]
|
||||
|
||||
embeddings = self._embed_batch(chunks)
|
||||
embeddings_np = np.array(embeddings).astype('float32')
|
||||
norms = np.linalg.norm(embeddings_np, axis=1, keepdims=True)
|
||||
embeddings_np = embeddings_np / (norms + 1e-10)
|
||||
self.index.add(embeddings_np)
|
||||
|
||||
for i, (chunk, meta) in enumerate(zip(chunks, metadatas)):
|
||||
self.documents.append({
|
||||
'id': i,
|
||||
'content': chunk,
|
||||
'metadata': meta
|
||||
})
|
||||
|
||||
self.save_index()
|
||||
|
||||
return deleted_count
|
||||
|
||||
def clear(self):
|
||||
"""清空所有索引"""
|
||||
self._load_faiss()
|
||||
self.index = self.faiss.IndexFlatIP(self.dimension)
|
||||
self.documents = []
|
||||
self.save_index()
|
||||
print("索引已清空")
|
||||
|
||||
def get_stats(self):
|
||||
"""获取索引统计信息"""
|
||||
files = set()
|
||||
for doc in self.documents:
|
||||
filename = doc.get('metadata', {}).get('filename')
|
||||
if filename:
|
||||
files.add(filename)
|
||||
|
||||
return {
|
||||
'total_chunks': len(self.documents),
|
||||
'total_files': len(files),
|
||||
'files': list(files)
|
||||
}
|
||||
|
||||
# 全局实例
|
||||
vector_store = VectorStore()
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
package com.ddnai.web.controller.psychology;
|
||||
|
||||
import com.ddnai.common.core.controller.BaseController;
|
||||
import com.ddnai.common.core.domain.AjaxResult;
|
||||
import com.ddnai.system.rag.client.PythonRagClient;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.security.access.prepost.PreAuthorize;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* RAG 知识库管理Controller (调用Python服务)
|
||||
*
|
||||
* @author ddnai
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("/psychology/rag")
|
||||
public class PsyRagController extends BaseController {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PsyRagController.class);
|
||||
|
||||
@Autowired
|
||||
private PythonRagClient pythonRagClient;
|
||||
|
||||
/**
|
||||
* 检查服务状态
|
||||
*/
|
||||
@GetMapping("/status")
|
||||
public AjaxResult checkStatus() {
|
||||
boolean available = pythonRagClient.isAvailable();
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("available", available);
|
||||
result.put("message", available ? "Python RAG 服务运行中" : "Python RAG 服务未启动");
|
||||
|
||||
if (available) {
|
||||
result.put("stats", pythonRagClient.getStats());
|
||||
}
|
||||
|
||||
return AjaxResult.success(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文档
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:upload')")
|
||||
@PostMapping("/upload")
|
||||
public AjaxResult uploadDocument(@RequestParam("file") MultipartFile file) {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动,请先启动 rag-service");
|
||||
}
|
||||
|
||||
Map<String, Object> result = pythonRagClient.uploadDocument(file);
|
||||
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
return AjaxResult.success("文档上传成功", result.get("data"));
|
||||
} else {
|
||||
return AjaxResult.error("上传失败: " + result.get("error"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("文档上传失败", e);
|
||||
return AjaxResult.error("文档上传失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档列表
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:list')")
|
||||
@GetMapping("/documents")
|
||||
public AjaxResult listDocuments() {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动");
|
||||
}
|
||||
|
||||
List<Map<String, Object>> documents = pythonRagClient.listDocuments();
|
||||
return AjaxResult.success(documents);
|
||||
} catch (Exception e) {
|
||||
log.error("获取文档列表失败", e);
|
||||
return AjaxResult.error("获取文档列表失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文档
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:remove')")
|
||||
@DeleteMapping("/documents/{filename}")
|
||||
public AjaxResult deleteDocument(@PathVariable String filename) {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动");
|
||||
}
|
||||
|
||||
Map<String, Object> result = pythonRagClient.deleteDocument(filename);
|
||||
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
return AjaxResult.success("删除成功");
|
||||
} else {
|
||||
return AjaxResult.error("删除失败: " + result.get("error"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("删除文档失败", e);
|
||||
return AjaxResult.error("删除文档失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 搜索文档
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:list')")
|
||||
@PostMapping("/search")
|
||||
public AjaxResult search(@RequestBody Map<String, Object> params) {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动");
|
||||
}
|
||||
|
||||
String query = (String) params.get("query");
|
||||
Integer topK = params.get("topK") != null ? (Integer) params.get("topK") : 5;
|
||||
|
||||
if (query == null || query.trim().isEmpty()) {
|
||||
return AjaxResult.error("查询内容不能为空");
|
||||
}
|
||||
|
||||
List<Map<String, Object>> results = pythonRagClient.search(query, topK);
|
||||
return AjaxResult.success(results);
|
||||
} catch (Exception e) {
|
||||
log.error("搜索失败", e);
|
||||
return AjaxResult.error("搜索失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取统计信息
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:list')")
|
||||
@GetMapping("/stats")
|
||||
public AjaxResult getStats() {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("available", false);
|
||||
result.put("message", "Python RAG 服务未启动");
|
||||
return AjaxResult.success(result);
|
||||
}
|
||||
|
||||
Map<String, Object> stats = pythonRagClient.getStats();
|
||||
stats.put("available", true);
|
||||
return AjaxResult.success(stats);
|
||||
} catch (Exception e) {
|
||||
log.error("获取统计信息失败", e);
|
||||
return AjaxResult.error("获取统计信息失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 重建索引
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:rebuild')")
|
||||
@PostMapping("/rebuild")
|
||||
public AjaxResult rebuildIndex() {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动");
|
||||
}
|
||||
|
||||
Map<String, Object> result = pythonRagClient.rebuildIndex();
|
||||
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
return AjaxResult.success("索引重建成功", result.get("data"));
|
||||
} else {
|
||||
return AjaxResult.error("重建失败: " + result.get("error"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("重建索引失败", e);
|
||||
return AjaxResult.error("重建索引失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 扫描文件夹
|
||||
*/
|
||||
@PreAuthorize("@ss.hasPermi('psychology:knowledge:upload')")
|
||||
@PostMapping("/scan")
|
||||
public AjaxResult scanFolder() {
|
||||
try {
|
||||
if (!pythonRagClient.isAvailable()) {
|
||||
return AjaxResult.error("Python RAG 服务未启动");
|
||||
}
|
||||
|
||||
Map<String, Object> result = pythonRagClient.scanFolder();
|
||||
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
return AjaxResult.success("扫描完成", result.get("data"));
|
||||
} else {
|
||||
return AjaxResult.error("扫描失败: " + result.get("error"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("扫描文件夹失败", e);
|
||||
return AjaxResult.error("扫描文件夹失败:" + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -143,40 +143,38 @@ xss:
|
|||
|
||||
# RAG知识库配置
|
||||
rag:
|
||||
# 使用模式: openai(外部API)、ollama(本地) 或 hybrid(混合模式)
|
||||
mode: hybrid
|
||||
# Python RAG 服务配置(主要使用这个)
|
||||
python:
|
||||
url: http://localhost:5000
|
||||
enabled: true
|
||||
|
||||
# OpenAI兼容API配置(用于文本生成)
|
||||
# 禁用 Java 端的 RAG 功能,全部由 Python 服务处理
|
||||
# 使用模式: disabled(禁用Java端)、python(仅Python)
|
||||
mode: disabled
|
||||
|
||||
# OpenAI兼容API配置(仅用于AI报告生成,不用于RAG)
|
||||
openai:
|
||||
# Kimi API (Moonshot) - 你现有的API
|
||||
base-url: https://api.moonshot.cn/v1
|
||||
# 你的Kimi API Key
|
||||
api-key: sk-U9fdriPxwBcrpWW0Ite3N0eVtX7VxnqqqYUIBAdWd1hgEA9m
|
||||
# 嵌入模型(混合模式下不使用,由Ollama提供)
|
||||
embed-model: BAAI/bge-large-zh-v1.5
|
||||
# 生成模型(用于AI报告生成)
|
||||
embed-model: none
|
||||
generate-model: moonshot-v1-32k
|
||||
# 连接超时时间(秒)
|
||||
connect-timeout: 30
|
||||
# 读取超时时间(秒)
|
||||
read-timeout: 300
|
||||
connect-timeout: 10
|
||||
read-timeout: 60
|
||||
|
||||
# Ollama配置(用于本地嵌入)
|
||||
# Ollama配置(禁用)
|
||||
ollama:
|
||||
url: http://localhost:11434
|
||||
# 嵌入模型(已下载)
|
||||
embed-model: nomic-embed-text
|
||||
# 生成模型(混合模式下不使用,由OpenAI API提供)
|
||||
generate-model: deepseek-r1:32b
|
||||
# 连接超时时间(秒)
|
||||
connect-timeout: 30
|
||||
# 读取超时时间(秒)
|
||||
read-timeout: 300
|
||||
embed-model: none
|
||||
generate-model: none
|
||||
connect-timeout: 5
|
||||
read-timeout: 30
|
||||
enabled: false
|
||||
|
||||
# ChromaDB配置(本地部署,可选)
|
||||
# ChromaDB配置(禁用)
|
||||
chromadb:
|
||||
url: http://localhost:8000
|
||||
collection: psychology_knowledge
|
||||
enabled: false
|
||||
|
||||
# 存储配置
|
||||
storage:
|
||||
|
|
@ -184,9 +182,9 @@ rag:
|
|||
log-path: D:/wwwroot/RAG/logs
|
||||
chroma-data-path: D:/wwwroot/RAG/data/chroma_db
|
||||
|
||||
# 文件监听配置
|
||||
# 文件监听配置(禁用)
|
||||
file-watcher:
|
||||
enabled: false # 默认关闭,避免自动处理
|
||||
enabled: false
|
||||
watch-path: D:/wwwroot/RAG/uploads
|
||||
scan-interval: 10
|
||||
|
||||
|
|
|
|||
3
ry-xinli-admin/src/main/resources/banner.txt
Normal file
3
ry-xinli-admin/src/main/resources/banner.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
Application Version: ${ruoyi.version}
|
||||
Spring Boot Version: ${spring-boot.version}
|
||||
// AI心理健康测评系统 永不宕机 永无BUG //
|
||||
|
|
@ -40,22 +40,22 @@ public class ChromaDBClient {
|
|||
this.baseUrl = ragProperties.getChromadb().getUrl();
|
||||
this.collectionName = ragProperties.getChromadb().getCollection();
|
||||
|
||||
// 创建OkHttpClient实例
|
||||
// 创建OkHttpClient实例,使用较短的超时时间
|
||||
this.httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(ragProperties.getChromadb().getConnectTimeout(), TimeUnit.SECONDS)
|
||||
.readTimeout(ragProperties.getChromadb().getReadTimeout(), TimeUnit.SECONDS)
|
||||
.writeTimeout(ragProperties.getChromadb().getConnectTimeout(), TimeUnit.SECONDS)
|
||||
.retryOnConnectionFailure(true)
|
||||
.connectTimeout(5, TimeUnit.SECONDS)
|
||||
.readTimeout(10, TimeUnit.SECONDS)
|
||||
.writeTimeout(5, TimeUnit.SECONDS)
|
||||
.retryOnConnectionFailure(false)
|
||||
.build();
|
||||
|
||||
log.info("ChromaDBClient initialized with base URL: {}, collection: {}", baseUrl, collectionName);
|
||||
|
||||
// 尝试创建集合(如果不存在)
|
||||
try {
|
||||
ensureCollectionExists();
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to ensure collection exists: {}", e.getMessage());
|
||||
}
|
||||
// 不在启动时尝试连接,延迟到首次使用时
|
||||
// try {
|
||||
// ensureCollectionExists();
|
||||
// } catch (IOException e) {
|
||||
// log.warn("Failed to ensure collection exists: {}", e.getMessage());
|
||||
// }
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,12 +41,12 @@ public class OllamaClient {
|
|||
public void init() {
|
||||
this.baseUrl = ragProperties.getOllama().getUrl();
|
||||
|
||||
// 创建OkHttpClient实例
|
||||
// 创建OkHttpClient实例,使用较短的超时时间避免启动阻塞
|
||||
this.httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(ragProperties.getOllama().getConnectTimeout(), TimeUnit.SECONDS)
|
||||
.readTimeout(ragProperties.getOllama().getReadTimeout(), TimeUnit.SECONDS)
|
||||
.writeTimeout(ragProperties.getOllama().getConnectTimeout(), TimeUnit.SECONDS)
|
||||
.retryOnConnectionFailure(true)
|
||||
.connectTimeout(5, TimeUnit.SECONDS)
|
||||
.readTimeout(30, TimeUnit.SECONDS)
|
||||
.writeTimeout(10, TimeUnit.SECONDS)
|
||||
.retryOnConnectionFailure(false)
|
||||
.build();
|
||||
|
||||
log.info("OllamaClient initialized with base URL: {}", baseUrl);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,239 @@
|
|||
package com.ddnai.system.rag.client;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.ByteArrayResource;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import javax.annotation.PostConstruct;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Python RAG 服务客户端
|
||||
* 调用独立的 Python 知识库服务
|
||||
*/
|
||||
@Component
|
||||
public class PythonRagClient {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PythonRagClient.class);
|
||||
|
||||
@Value("${rag.python.url:http://localhost:5000}")
|
||||
private String pythonServiceUrl;
|
||||
|
||||
@Value("${rag.python.enabled:true}")
|
||||
private boolean enabled;
|
||||
|
||||
private final RestTemplate restTemplate;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
public PythonRagClient() {
|
||||
this.restTemplate = new RestTemplate();
|
||||
this.objectMapper = new ObjectMapper();
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
log.info("Python RAG Client initialized, URL: {}, Enabled: {}", pythonServiceUrl, enabled);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查服务是否可用
|
||||
*/
|
||||
public boolean isAvailable() {
|
||||
if (!enabled) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/health";
|
||||
ResponseEntity<String> response = restTemplate.getForEntity(url, String.class);
|
||||
return response.getStatusCode() == HttpStatus.OK;
|
||||
} catch (Exception e) {
|
||||
log.warn("Python RAG service not available: {}", e.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文档
|
||||
*/
|
||||
public Map<String, Object> uploadDocument(MultipartFile file) {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/documents/upload";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.MULTIPART_FORM_DATA);
|
||||
|
||||
MultiValueMap<String, Object> body = new LinkedMultiValueMap<>();
|
||||
body.add("file", new ByteArrayResource(file.getBytes()) {
|
||||
@Override
|
||||
public String getFilename() {
|
||||
return file.getOriginalFilename();
|
||||
}
|
||||
});
|
||||
|
||||
HttpEntity<MultiValueMap<String, Object>> requestEntity = new HttpEntity<>(body, headers);
|
||||
ResponseEntity<String> response = restTemplate.postForEntity(url, requestEntity, String.class);
|
||||
|
||||
return parseResponse(response.getBody());
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to upload document to Python service: {}", e.getMessage());
|
||||
return errorResult("上传失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档列表
|
||||
*/
|
||||
public List<Map<String, Object>> listDocuments() {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/documents";
|
||||
ResponseEntity<String> response = restTemplate.getForEntity(url, String.class);
|
||||
|
||||
Map<String, Object> result = parseResponse(response.getBody());
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
Object data = result.get("data");
|
||||
if (data instanceof List) {
|
||||
return (List<Map<String, Object>>) data;
|
||||
}
|
||||
}
|
||||
return new ArrayList<>();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to list documents: {}", e.getMessage());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文档
|
||||
*/
|
||||
public Map<String, Object> deleteDocument(String filename) {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/documents/" + filename;
|
||||
restTemplate.delete(url);
|
||||
return successResult("删除成功");
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to delete document: {}", e.getMessage());
|
||||
return errorResult("删除失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 搜索文档
|
||||
*/
|
||||
public List<Map<String, Object>> search(String query, int topK) {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/search";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
|
||||
Map<String, Object> body = new HashMap<>();
|
||||
body.put("query", query);
|
||||
body.put("top_k", topK);
|
||||
|
||||
HttpEntity<Map<String, Object>> requestEntity = new HttpEntity<>(body, headers);
|
||||
ResponseEntity<String> response = restTemplate.postForEntity(url, requestEntity, String.class);
|
||||
|
||||
Map<String, Object> result = parseResponse(response.getBody());
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
Object data = result.get("data");
|
||||
if (data instanceof List) {
|
||||
return (List<Map<String, Object>>) data;
|
||||
}
|
||||
}
|
||||
return new ArrayList<>();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to search: {}", e.getMessage());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取统计信息
|
||||
*/
|
||||
public Map<String, Object> getStats() {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/stats";
|
||||
ResponseEntity<String> response = restTemplate.getForEntity(url, String.class);
|
||||
|
||||
Map<String, Object> result = parseResponse(response.getBody());
|
||||
if (Boolean.TRUE.equals(result.get("success"))) {
|
||||
return (Map<String, Object>) result.get("data");
|
||||
}
|
||||
return new HashMap<>();
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to get stats: {}", e.getMessage());
|
||||
return new HashMap<>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 重建索引
|
||||
*/
|
||||
public Map<String, Object> rebuildIndex() {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/rebuild";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
|
||||
HttpEntity<String> requestEntity = new HttpEntity<>("{}", headers);
|
||||
ResponseEntity<String> response = restTemplate.postForEntity(url, requestEntity, String.class);
|
||||
|
||||
return parseResponse(response.getBody());
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to rebuild index: {}", e.getMessage());
|
||||
return errorResult("重建索引失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 扫描文件夹
|
||||
*/
|
||||
public Map<String, Object> scanFolder() {
|
||||
try {
|
||||
String url = pythonServiceUrl + "/api/scan";
|
||||
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
|
||||
HttpEntity<String> requestEntity = new HttpEntity<>("{}", headers);
|
||||
ResponseEntity<String> response = restTemplate.postForEntity(url, requestEntity, String.class);
|
||||
|
||||
return parseResponse(response.getBody());
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to scan folder: {}", e.getMessage());
|
||||
return errorResult("扫描失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> parseResponse(String json) {
|
||||
try {
|
||||
return objectMapper.readValue(json, Map.class);
|
||||
} catch (Exception e) {
|
||||
return errorResult("解析响应失败");
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> successResult(String message) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("success", true);
|
||||
result.put("message", message);
|
||||
return result;
|
||||
}
|
||||
|
||||
private Map<String, Object> errorResult(String error) {
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("success", false);
|
||||
result.put("error", error);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -51,6 +51,15 @@ public class AIServiceAdapter {
|
|||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
// 检查是否禁用 Java 端 RAG
|
||||
if ("disabled".equalsIgnoreCase(mode) || "python".equalsIgnoreCase(mode)) {
|
||||
log.info("RAG服务模式: 已禁用Java端RAG,使用Python服务");
|
||||
useOpenAI = false;
|
||||
useSimpleStore = true;
|
||||
isHybridMode = false;
|
||||
return;
|
||||
}
|
||||
|
||||
isHybridMode = "hybrid".equalsIgnoreCase(mode);
|
||||
useOpenAI = "openai".equalsIgnoreCase(mode) || isHybridMode;
|
||||
useSimpleStore = true; // 默认使用简单存储
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class KnowledgeService {
|
|||
List<String> chunks = textSplitter.split(text);
|
||||
log.info("Split document into {} chunks", chunks.size());
|
||||
|
||||
// 5. 向量化 - 添加异常处理
|
||||
// 5. 向量化
|
||||
List<float[]> embeddings = null;
|
||||
boolean vectorizationSuccess = false;
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class KnowledgeService {
|
|||
log.info("Successfully generated {} embeddings", embeddings.size());
|
||||
vectorizationSuccess = true;
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to generate embeddings, document will be saved without vectors: {}", e.getMessage(), e);
|
||||
log.error("Failed to generate embeddings: {}", e.getMessage());
|
||||
// 继续处理,文档仍然会被保存,只是没有向量
|
||||
}
|
||||
|
||||
|
|
@ -132,17 +132,17 @@ public class KnowledgeService {
|
|||
ids.add(docId + "_chunk_" + i);
|
||||
}
|
||||
|
||||
// 7. 存储到ChromaDB - 只有向量化成功才存储
|
||||
// 7. 存储向量 - 使用AIServiceAdapter(自动选择SimpleVectorStore或ChromaDB)
|
||||
if (vectorizationSuccess && embeddings != null) {
|
||||
try {
|
||||
chromaDBClient.addDocuments(chunks, embeddings, metadatas, ids);
|
||||
log.info("Stored {} chunks to ChromaDB", chunks.size());
|
||||
aiServiceAdapter.addDocuments(chunks, embeddings, metadatas, ids);
|
||||
log.info("Stored {} chunks to vector store", chunks.size());
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to store to ChromaDB: {}", e.getMessage());
|
||||
log.warn("Failed to store to vector store: {}", e.getMessage());
|
||||
// 继续处理,文档信息仍然会被保存
|
||||
}
|
||||
} else {
|
||||
log.info("Skipped ChromaDB storage (vectorization failed or disabled)");
|
||||
log.info("Skipped vector storage (vectorization failed)");
|
||||
}
|
||||
|
||||
// 8. 更新文档索引
|
||||
|
|
|
|||
2
xinli-ui/public/robots.txt
Normal file
2
xinli-ui/public/robots.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
User-agent: *
|
||||
Disallow: /
|
||||
|
|
@ -1,5 +1,66 @@
|
|||
import request from '@/utils/request'
|
||||
|
||||
// ========== Python RAG 服务 API ==========
|
||||
|
||||
// 检查RAG服务状态
|
||||
export function checkRagStatus() {
|
||||
return request({
|
||||
url: '/psychology/rag/status',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
||||
|
||||
// 获取RAG文档列表
|
||||
export function listRagDocuments() {
|
||||
return request({
|
||||
url: '/psychology/rag/documents',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
||||
|
||||
// 删除RAG文档
|
||||
export function delRagDocument(filename) {
|
||||
return request({
|
||||
url: '/psychology/rag/documents/' + encodeURIComponent(filename),
|
||||
method: 'delete'
|
||||
})
|
||||
}
|
||||
|
||||
// RAG搜索
|
||||
export function ragSearch(query, topK = 5) {
|
||||
return request({
|
||||
url: '/psychology/rag/search',
|
||||
method: 'post',
|
||||
data: { query, topK }
|
||||
})
|
||||
}
|
||||
|
||||
// 获取RAG统计信息
|
||||
export function getRagStats() {
|
||||
return request({
|
||||
url: '/psychology/rag/stats',
|
||||
method: 'get'
|
||||
})
|
||||
}
|
||||
|
||||
// 重建RAG索引
|
||||
export function rebuildRagIndex() {
|
||||
return request({
|
||||
url: '/psychology/rag/rebuild',
|
||||
method: 'post'
|
||||
})
|
||||
}
|
||||
|
||||
// 扫描文件夹
|
||||
export function scanRagFolder() {
|
||||
return request({
|
||||
url: '/psychology/rag/scan',
|
||||
method: 'post'
|
||||
})
|
||||
}
|
||||
|
||||
// ========== 原有 API (保留兼容) ==========
|
||||
|
||||
// 查询知识库文档列表
|
||||
export function listDocuments(query) {
|
||||
return request({
|
||||
|
|
|
|||
|
|
@ -1155,6 +1155,14 @@ export default {
|
|||
|
||||
.options-container {
|
||||
margin-top: 20px;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* 确保radio-group和checkbox-group占满宽度 */
|
||||
.options-container .el-radio-group,
|
||||
.options-container .el-checkbox-group {
|
||||
width: 100%;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.option-item {
|
||||
|
|
@ -1167,6 +1175,9 @@ export default {
|
|||
gap: 15px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 2px;
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
min-height: 48px;
|
||||
}
|
||||
|
||||
.option-item:hover {
|
||||
|
|
@ -1209,7 +1220,27 @@ export default {
|
|||
|
||||
.option-content {
|
||||
flex: 1;
|
||||
pointer-events: none;
|
||||
width: 100%;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
/* 让el-radio和el-checkbox占满整行,整行可点击 */
|
||||
.option-content .el-radio,
|
||||
.option-content .el-checkbox {
|
||||
display: flex;
|
||||
width: 100%;
|
||||
margin-right: 0;
|
||||
padding-right: 20px;
|
||||
white-space: normal;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.option-content .el-radio__label,
|
||||
.option-content .el-checkbox__label {
|
||||
flex: 1;
|
||||
white-space: normal;
|
||||
line-height: 1.5;
|
||||
padding-left: 10px;
|
||||
}
|
||||
|
||||
.option-tts-btn {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,17 @@
|
|||
<template>
|
||||
<div class="app-container">
|
||||
<!-- 服务状态提示 -->
|
||||
<el-alert
|
||||
v-if="!serviceAvailable"
|
||||
title="Python RAG 服务未启动"
|
||||
type="warning"
|
||||
description="请先运行 rag-service/start.bat 启动知识库服务"
|
||||
show-icon
|
||||
:closable="false"
|
||||
style="margin-bottom: 20px;"
|
||||
>
|
||||
</el-alert>
|
||||
|
||||
<el-row :gutter="20">
|
||||
<!-- 统计卡片 -->
|
||||
<el-col :span="6">
|
||||
|
|
@ -7,7 +19,7 @@
|
|||
<div class="stat-content">
|
||||
<i class="el-icon-document stat-icon"></i>
|
||||
<div class="stat-info">
|
||||
<div class="stat-value">{{ statistics.documentCount || 0 }}</div>
|
||||
<div class="stat-value">{{ statistics.total_files || 0 }}</div>
|
||||
<div class="stat-label">文档数量</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -18,7 +30,7 @@
|
|||
<div class="stat-content">
|
||||
<i class="el-icon-files stat-icon"></i>
|
||||
<div class="stat-info">
|
||||
<div class="stat-value">{{ statistics.chunkCount || 0 }}</div>
|
||||
<div class="stat-value">{{ statistics.total_chunks || 0 }}</div>
|
||||
<div class="stat-label">知识片段</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -27,10 +39,10 @@
|
|||
<el-col :span="6">
|
||||
<el-card class="stat-card">
|
||||
<div class="stat-content">
|
||||
<i class="el-icon-connection stat-icon"></i>
|
||||
<i class="el-icon-connection stat-icon" :style="{ color: serviceAvailable ? '#67C23A' : '#F56C6C' }"></i>
|
||||
<div class="stat-info">
|
||||
<div class="stat-value">{{ statistics.aiStatus || '未知' }}</div>
|
||||
<div class="stat-label">AI服务</div>
|
||||
<div class="stat-value">{{ serviceAvailable ? '运行中' : '未启动' }}</div>
|
||||
<div class="stat-label">RAG服务</div>
|
||||
</div>
|
||||
</div>
|
||||
</el-card>
|
||||
|
|
@ -38,10 +50,10 @@
|
|||
<el-col :span="6">
|
||||
<el-card class="stat-card">
|
||||
<div class="stat-content">
|
||||
<i class="el-icon-data-line stat-icon"></i>
|
||||
<i class="el-icon-folder-opened stat-icon"></i>
|
||||
<div class="stat-info">
|
||||
<div class="stat-value">{{ statistics.vectorStatus || '未知' }}</div>
|
||||
<div class="stat-label">向量存储</div>
|
||||
<div class="stat-value" style="font-size: 14px;">knowledge_docs</div>
|
||||
<div class="stat-label">知识库文件夹</div>
|
||||
</div>
|
||||
</div>
|
||||
</el-card>
|
||||
|
|
@ -56,8 +68,19 @@
|
|||
icon="el-icon-upload"
|
||||
size="mini"
|
||||
@click="handleUpload"
|
||||
:disabled="!serviceAvailable"
|
||||
>上传文档</el-button>
|
||||
</el-col>
|
||||
<el-col :span="1.5">
|
||||
<el-button
|
||||
type="warning"
|
||||
icon="el-icon-folder"
|
||||
size="mini"
|
||||
@click="handleScan"
|
||||
:loading="scanLoading"
|
||||
:disabled="!serviceAvailable"
|
||||
>扫描文件夹</el-button>
|
||||
</el-col>
|
||||
<el-col :span="1.5">
|
||||
<el-button
|
||||
type="success"
|
||||
|
|
@ -65,50 +88,65 @@
|
|||
size="mini"
|
||||
@click="handleRebuild"
|
||||
:loading="rebuildLoading"
|
||||
:disabled="!serviceAvailable"
|
||||
>重建索引</el-button>
|
||||
</el-col>
|
||||
<el-col :span="1.5">
|
||||
<el-button
|
||||
type="danger"
|
||||
icon="el-icon-delete"
|
||||
size="mini"
|
||||
@click="handleClearAll"
|
||||
>清空知识库</el-button>
|
||||
</el-col>
|
||||
<el-col :span="1.5">
|
||||
<el-button
|
||||
type="info"
|
||||
icon="el-icon-refresh"
|
||||
size="mini"
|
||||
@click="getList"
|
||||
>刷新</el-button>
|
||||
@click="checkStatus"
|
||||
>刷新状态</el-button>
|
||||
</el-col>
|
||||
</el-row>
|
||||
|
||||
<!-- 搜索测试 -->
|
||||
<el-card style="margin-top: 20px;" v-if="serviceAvailable">
|
||||
<div slot="header">
|
||||
<span>知识库搜索测试</span>
|
||||
</div>
|
||||
<el-row :gutter="10">
|
||||
<el-col :span="18">
|
||||
<el-input
|
||||
v-model="searchQuery"
|
||||
placeholder="输入问题测试知识库检索..."
|
||||
@keyup.enter.native="handleSearch"
|
||||
></el-input>
|
||||
</el-col>
|
||||
<el-col :span="6">
|
||||
<el-button type="primary" @click="handleSearch" :loading="searchLoading">搜索</el-button>
|
||||
</el-col>
|
||||
</el-row>
|
||||
<div v-if="searchResults.length > 0" style="margin-top: 15px;">
|
||||
<div v-for="(result, index) in searchResults" :key="index" class="search-result">
|
||||
<div class="result-header">
|
||||
<span class="result-score">相关度: {{ (result.score * 100).toFixed(1) }}%</span>
|
||||
<span class="result-source">来源: {{ getFilename(result) }}</span>
|
||||
</div>
|
||||
<div class="result-content">{{ result.content }}</div>
|
||||
</div>
|
||||
</div>
|
||||
</el-card>
|
||||
|
||||
<!-- 文档列表 -->
|
||||
<el-table v-loading="loading" :data="documentList" style="margin-top: 20px;">
|
||||
<el-table-column label="文档名称" prop="filename" min-width="200" />
|
||||
<el-table-column label="分类" prop="category" width="120">
|
||||
<el-table-column label="片段数" prop="chunks" width="100" align="center" />
|
||||
<el-table-column label="文件大小" prop="size" width="120" align="center">
|
||||
<template slot-scope="scope">
|
||||
<el-tag v-if="scope.row.category" size="small">{{ scope.row.category }}</el-tag>
|
||||
<span v-else>-</span>
|
||||
{{ formatFileSize(scope.row.size) }}
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column label="片段数" prop="chunkCount" width="100" align="center" />
|
||||
<el-table-column label="文件大小" prop="fileSize" width="120" align="center">
|
||||
<el-table-column label="状态" width="100" align="center">
|
||||
<template slot-scope="scope">
|
||||
{{ formatFileSize(scope.row.fileSize) }}
|
||||
<el-tag :type="scope.row.exists ? 'success' : 'danger'" size="small">
|
||||
{{ scope.row.exists ? '正常' : '文件丢失' }}
|
||||
</el-tag>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column label="上传时间" prop="uploadTime" width="180" align="center" />
|
||||
<el-table-column label="操作" align="center" width="180" class-name="small-padding fixed-width">
|
||||
<el-table-column label="操作" align="center" width="120" class-name="small-padding fixed-width">
|
||||
<template slot-scope="scope">
|
||||
<el-button
|
||||
size="mini"
|
||||
type="text"
|
||||
icon="el-icon-view"
|
||||
@click="handleView(scope.row)"
|
||||
>查看</el-button>
|
||||
<el-button
|
||||
size="mini"
|
||||
type="text"
|
||||
|
|
@ -120,66 +158,31 @@
|
|||
</el-table-column>
|
||||
</el-table>
|
||||
|
||||
<pagination
|
||||
v-show="total>0"
|
||||
:total="total"
|
||||
:page.sync="queryParams.pageNum"
|
||||
:limit.sync="queryParams.pageSize"
|
||||
@pagination="getList"
|
||||
/>
|
||||
|
||||
<!-- 上传对话框 -->
|
||||
<el-dialog title="上传文档" :visible.sync="uploadDialogVisible" width="600px" append-to-body>
|
||||
<el-form ref="uploadForm" :model="uploadForm" label-width="100px">
|
||||
<el-form-item label="文档分类">
|
||||
<el-select v-model="uploadForm.category" placeholder="请选择分类" style="width: 100%;">
|
||||
<el-option label="测评标准" value="assessment" />
|
||||
<el-option label="理论知识" value="theory" />
|
||||
<el-option label="案例分析" value="case" />
|
||||
<el-option label="干预方法" value="intervention" />
|
||||
<el-option label="其他" value="other" />
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item label="选择文件">
|
||||
<el-upload
|
||||
ref="upload"
|
||||
:action="uploadUrl"
|
||||
:headers="uploadHeaders"
|
||||
:data="uploadForm"
|
||||
:on-success="handleUploadSuccess"
|
||||
:on-error="handleUploadError"
|
||||
:before-upload="beforeUpload"
|
||||
:file-list="fileList"
|
||||
:auto-upload="false"
|
||||
accept=".txt,.pdf,.docx,.md"
|
||||
drag
|
||||
>
|
||||
<i class="el-icon-upload"></i>
|
||||
<div class="el-upload__text">将文件拖到此处,或<em>点击上传</em></div>
|
||||
<div class="el-upload__tip" slot="tip">
|
||||
支持格式:txt、pdf、docx、md,单个文件不超过10MB
|
||||
</div>
|
||||
</el-upload>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
<el-upload
|
||||
ref="upload"
|
||||
:action="uploadUrl"
|
||||
:headers="uploadHeaders"
|
||||
:on-success="handleUploadSuccess"
|
||||
:on-error="handleUploadError"
|
||||
:before-upload="beforeUpload"
|
||||
:file-list="fileList"
|
||||
:auto-upload="false"
|
||||
accept=".txt,.pdf,.docx,.md"
|
||||
drag
|
||||
multiple
|
||||
>
|
||||
<i class="el-icon-upload"></i>
|
||||
<div class="el-upload__text">将文件拖到此处,或<em>点击上传</em></div>
|
||||
<div class="el-upload__tip" slot="tip">
|
||||
支持格式:txt、pdf、docx、md,单个文件不超过10MB<br>
|
||||
提示:也可以直接将文件放入 rag-service/knowledge_docs 文件夹
|
||||
</div>
|
||||
</el-upload>
|
||||
<div slot="footer" class="dialog-footer">
|
||||
<el-button @click="uploadDialogVisible = false">取 消</el-button>
|
||||
<el-button type="primary" @click="submitUpload" :loading="uploadLoading">确 定</el-button>
|
||||
</div>
|
||||
</el-dialog>
|
||||
|
||||
<!-- 查看对话框 -->
|
||||
<el-dialog title="文档详情" :visible.sync="viewDialogVisible" width="800px" append-to-body>
|
||||
<el-descriptions :column="2" border v-if="currentDocument">
|
||||
<el-descriptions-item label="文档名称">{{ currentDocument.filename }}</el-descriptions-item>
|
||||
<el-descriptions-item label="文档ID">{{ currentDocument.id }}</el-descriptions-item>
|
||||
<el-descriptions-item label="分类">{{ currentDocument.category || '-' }}</el-descriptions-item>
|
||||
<el-descriptions-item label="片段数">{{ currentDocument.chunkCount }}</el-descriptions-item>
|
||||
<el-descriptions-item label="文件大小">{{ formatFileSize(currentDocument.fileSize) }}</el-descriptions-item>
|
||||
<el-descriptions-item label="上传时间">{{ currentDocument.uploadTime }}</el-descriptions-item>
|
||||
</el-descriptions>
|
||||
<div slot="footer" class="dialog-footer">
|
||||
<el-button @click="viewDialogVisible = false">关 闭</el-button>
|
||||
<el-button type="primary" @click="submitUpload" :loading="uploadLoading">上 传</el-button>
|
||||
</div>
|
||||
</el-dialog>
|
||||
</div>
|
||||
|
|
@ -187,84 +190,99 @@
|
|||
|
||||
<script>
|
||||
import { getToken } from "@/utils/auth";
|
||||
import { listDocuments, getDocument, delDocument, searchDocuments, rebuildIndex, getStatistics, clearKnowledge } from "@/api/psychology/knowledge";
|
||||
import { checkRagStatus, listRagDocuments, delRagDocument, ragSearch, getRagStats, rebuildRagIndex, scanRagFolder } from "@/api/psychology/knowledge";
|
||||
|
||||
export default {
|
||||
name: "KnowledgeManagement",
|
||||
data() {
|
||||
return {
|
||||
// 服务状态
|
||||
serviceAvailable: false,
|
||||
// 加载状态
|
||||
loading: false,
|
||||
rebuildLoading: false,
|
||||
scanLoading: false,
|
||||
uploadLoading: false,
|
||||
searchLoading: false,
|
||||
// 统计信息
|
||||
statistics: {
|
||||
documentCount: 0,
|
||||
chunkCount: 0,
|
||||
aiStatus: '检查中...',
|
||||
vectorStatus: '检查中...'
|
||||
total_files: 0,
|
||||
total_chunks: 0
|
||||
},
|
||||
// 文档列表
|
||||
documentList: [],
|
||||
total: 0,
|
||||
// 查询参数
|
||||
queryParams: {
|
||||
pageNum: 1,
|
||||
pageSize: 10,
|
||||
category: undefined
|
||||
},
|
||||
// 搜索
|
||||
searchQuery: '',
|
||||
searchResults: [],
|
||||
// 上传相关
|
||||
uploadDialogVisible: false,
|
||||
uploadForm: {
|
||||
category: 'other'
|
||||
},
|
||||
uploadUrl: process.env.VUE_APP_BASE_API + "/psychology/knowledge/upload",
|
||||
uploadUrl: process.env.VUE_APP_BASE_API + "/psychology/rag/upload",
|
||||
uploadHeaders: {
|
||||
Authorization: "Bearer " + getToken()
|
||||
},
|
||||
fileList: [],
|
||||
// 查看相关
|
||||
viewDialogVisible: false,
|
||||
currentDocument: null
|
||||
fileList: []
|
||||
};
|
||||
},
|
||||
created() {
|
||||
this.getStatistics();
|
||||
this.getList();
|
||||
this.checkStatus();
|
||||
},
|
||||
methods: {
|
||||
/** 检查服务状态 */
|
||||
checkStatus() {
|
||||
this.loading = true;
|
||||
checkRagStatus().then(response => {
|
||||
if (response.code === 200 && response.data) {
|
||||
this.serviceAvailable = response.data.available || false;
|
||||
if (response.data.stats) {
|
||||
this.statistics = response.data.stats;
|
||||
}
|
||||
if (this.serviceAvailable) {
|
||||
this.getList();
|
||||
}
|
||||
}
|
||||
this.loading = false;
|
||||
}).catch(() => {
|
||||
this.serviceAvailable = false;
|
||||
this.loading = false;
|
||||
});
|
||||
},
|
||||
/** 查询文档列表 */
|
||||
getList() {
|
||||
if (!this.serviceAvailable) return;
|
||||
|
||||
this.loading = true;
|
||||
listDocuments(this.queryParams).then(response => {
|
||||
this.documentList = response.rows || [];
|
||||
this.total = response.total || 0;
|
||||
listRagDocuments().then(response => {
|
||||
if (response.code === 200) {
|
||||
this.documentList = response.data || [];
|
||||
}
|
||||
this.loading = false;
|
||||
}).catch(() => {
|
||||
this.loading = false;
|
||||
});
|
||||
},
|
||||
/** 获取统计信息 */
|
||||
getStatistics() {
|
||||
getStatistics().then(response => {
|
||||
/** 搜索 */
|
||||
handleSearch() {
|
||||
if (!this.searchQuery.trim()) {
|
||||
this.$message.warning('请输入搜索内容');
|
||||
return;
|
||||
}
|
||||
|
||||
this.searchLoading = true;
|
||||
ragSearch(this.searchQuery, 5).then(response => {
|
||||
if (response.code === 200) {
|
||||
this.statistics = response.data;
|
||||
this.searchResults = response.data || [];
|
||||
if (this.searchResults.length === 0) {
|
||||
this.$message.info('未找到相关内容');
|
||||
}
|
||||
}
|
||||
}).catch(error => {
|
||||
console.error('获取统计信息失败:', error);
|
||||
// 设置默认值,避免页面一直加载
|
||||
this.statistics = {
|
||||
documentCount: 0,
|
||||
chunkCount: 0,
|
||||
aiStatus: '服务未启动',
|
||||
vectorStatus: '服务未启动'
|
||||
};
|
||||
this.searchLoading = false;
|
||||
}).catch(() => {
|
||||
this.searchLoading = false;
|
||||
});
|
||||
},
|
||||
/** 上传按钮操作 */
|
||||
handleUpload() {
|
||||
this.uploadDialogVisible = true;
|
||||
this.uploadForm = { category: 'other' };
|
||||
this.fileList = [];
|
||||
},
|
||||
/** 上传前校验 */
|
||||
|
|
@ -291,10 +309,9 @@ export default {
|
|||
handleUploadSuccess(response, file, fileList) {
|
||||
this.uploadLoading = false;
|
||||
if (response.code === 200) {
|
||||
this.$message.success('文档上传成功,已自动加入知识库');
|
||||
this.$message.success('文档上传成功');
|
||||
this.uploadDialogVisible = false;
|
||||
this.getList();
|
||||
this.getStatistics();
|
||||
this.checkStatus();
|
||||
} else {
|
||||
this.$message.error(response.msg || '上传失败');
|
||||
}
|
||||
|
|
@ -302,22 +319,37 @@ export default {
|
|||
/** 上传失败回调 */
|
||||
handleUploadError(err, file, fileList) {
|
||||
this.uploadLoading = false;
|
||||
this.$message.error('上传失败:' + err);
|
||||
this.$message.error('上传失败');
|
||||
},
|
||||
/** 扫描文件夹 */
|
||||
handleScan() {
|
||||
this.scanLoading = true;
|
||||
scanRagFolder().then(response => {
|
||||
this.scanLoading = false;
|
||||
if (response.code === 200) {
|
||||
const data = response.data || {};
|
||||
this.$message.success(`扫描完成,新索引 ${data.indexed || 0} 个文件`);
|
||||
this.checkStatus();
|
||||
} else {
|
||||
this.$message.error(response.msg || '扫描失败');
|
||||
}
|
||||
}).catch(() => {
|
||||
this.scanLoading = false;
|
||||
});
|
||||
},
|
||||
/** 重建索引 */
|
||||
handleRebuild() {
|
||||
this.$confirm('重建索引将重新处理所有文档,可能需要较长时间,是否继续?', '提示', {
|
||||
this.$confirm('重建索引将重新处理所有文档,是否继续?', '提示', {
|
||||
confirmButtonText: '确定',
|
||||
cancelButtonText: '取消',
|
||||
type: 'warning'
|
||||
}).then(() => {
|
||||
this.rebuildLoading = true;
|
||||
rebuildIndex().then(response => {
|
||||
rebuildRagIndex().then(response => {
|
||||
this.rebuildLoading = false;
|
||||
if (response.code === 200) {
|
||||
this.$message.success('索引重建成功');
|
||||
this.getList();
|
||||
this.getStatistics();
|
||||
this.checkStatus();
|
||||
} else {
|
||||
this.$message.error(response.msg || '重建失败');
|
||||
}
|
||||
|
|
@ -326,29 +358,6 @@ export default {
|
|||
});
|
||||
});
|
||||
},
|
||||
/** 清空知识库 */
|
||||
handleClearAll() {
|
||||
this.$confirm('此操作将清空所有知识库数据,是否继续?', '警告', {
|
||||
confirmButtonText: '确定',
|
||||
cancelButtonText: '取消',
|
||||
type: 'error'
|
||||
}).then(() => {
|
||||
clearKnowledge().then(response => {
|
||||
if (response.code === 200) {
|
||||
this.$message.success('知识库已清空');
|
||||
this.getList();
|
||||
this.getStatistics();
|
||||
} else {
|
||||
this.$message.error(response.msg || '清空失败');
|
||||
}
|
||||
});
|
||||
});
|
||||
},
|
||||
/** 查看详情 */
|
||||
handleView(row) {
|
||||
this.currentDocument = row;
|
||||
this.viewDialogVisible = true;
|
||||
},
|
||||
/** 删除文档 */
|
||||
handleDelete(row) {
|
||||
this.$confirm('确认删除文档 "' + row.filename + '" 吗?', '提示', {
|
||||
|
|
@ -356,11 +365,10 @@ export default {
|
|||
cancelButtonText: '取消',
|
||||
type: 'warning'
|
||||
}).then(() => {
|
||||
delDocument(row.id).then(response => {
|
||||
delRagDocument(row.filename).then(response => {
|
||||
if (response.code === 200) {
|
||||
this.$message.success('删除成功');
|
||||
this.getList();
|
||||
this.getStatistics();
|
||||
this.checkStatus();
|
||||
} else {
|
||||
this.$message.error(response.msg || '删除失败');
|
||||
}
|
||||
|
|
@ -373,6 +381,13 @@ export default {
|
|||
if (bytes < 1024) return bytes + ' B';
|
||||
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(2) + ' KB';
|
||||
return (bytes / 1024 / 1024).toFixed(2) + ' MB';
|
||||
},
|
||||
/** 获取文件名 */
|
||||
getFilename(result) {
|
||||
if (result && result.metadata && result.metadata.filename) {
|
||||
return result.metadata.filename;
|
||||
}
|
||||
return '未知';
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -409,4 +424,33 @@ export default {
|
|||
color: #909399;
|
||||
margin-top: 5px;
|
||||
}
|
||||
|
||||
.search-result {
|
||||
background: #f5f7fa;
|
||||
border-radius: 4px;
|
||||
padding: 12px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.result-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 8px;
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.result-score {
|
||||
color: #67C23A;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.result-source {
|
||||
color: #909399;
|
||||
}
|
||||
|
||||
.result-content {
|
||||
color: #606266;
|
||||
line-height: 1.6;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
</style>
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user