From 16b8ca49b57734f1f0237eda44b095fb8c71a6a2 Mon Sep 17 00:00:00 2001 From: zstar <65890619+zstar1003@users.noreply.github.com> Date: Mon, 14 Apr 2025 10:11:02 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E7=9F=A5=E8=AF=86=E5=BA=93):=20=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E6=96=87=E4=BB=B6=E7=AE=A1=E7=90=86=E6=9F=A5=E8=AF=A2?= =?UTF-8?q?=E3=80=81=E6=8F=92=E5=85=A5=E9=80=BB=E8=BE=91=EF=BC=8C=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E7=9F=A5=E8=AF=86=E5=BA=93=E7=AE=A1=E7=90=86=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=20(#25)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 知识库管理功能,包括: 1. 新增知识库相关路由、服务和前端接口 2. 新增知识库文档管理功能 3. 新增知识库图标及类型定义 4. 优化文件上传和下载逻辑 5. 新增标准响应格式工具函数 --- management/server/app.py | 8 +- management/server/routes/__init__.py | 5 +- management/server/routes/files/routes.py | 97 +- .../server/routes/knowledgebases/routes.py | 160 +++ management/server/services/files/service.py | 495 ++++----- .../server/services/knowledgebases/service.py | 661 ++++++++++++ management/server/utils.py | 22 +- .../web/src/common/apis/kbs/document.ts | 128 +++ .../web/src/common/apis/kbs/knowledgebase.ts | 79 ++ management/web/src/common/apis/kbs/type.ts | 63 ++ .../web/src/common/assets/icons/file.svg | 44 +- management/web/src/common/assets/icons/kb.svg | 54 + .../common/assets/icons/team-management.svg | 26 +- .../src/common/assets/icons/user-config.svg | 18 +- management/web/src/http/axios.ts | 2 + .../web/src/pages/knowledgebase/index.vue | 955 ++++++++++++++++++ management/web/src/router/index.ts | 138 +-- management/web/types/auto/components.d.ts | 3 + .../web/types/auto/svg-component-global.d.ts | 6 +- management/web/types/auto/svg-component.d.ts | 10 +- 20 files changed, 2470 insertions(+), 504 deletions(-) create mode 100644 management/server/routes/knowledgebases/routes.py create mode 100644 management/server/services/knowledgebases/service.py create mode 100644 management/web/src/common/apis/kbs/document.ts create mode 100644 management/web/src/common/apis/kbs/knowledgebase.ts create mode 100644 management/web/src/common/apis/kbs/type.ts create mode 100644 management/web/src/common/assets/icons/kb.svg create mode 100644 management/web/src/pages/knowledgebase/index.vue diff --git a/management/server/app.py b/management/server/app.py index 00e8632..029645e 100644 --- a/management/server/app.py +++ b/management/server/app.py @@ -12,7 +12,13 @@ load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file_ app = Flask(__name__) # 启用CORS,允许前端访问 -CORS(app, resources={r"/api/*": {"origins": "*"}}, supports_credentials=True) +CORS(app, resources={ + r"/api/*": { + "origins": "*", + "methods": ["GET", "POST", "PUT", "DELETE", "OPTIONS"], + "allow_headers": ["Content-Type", "Authorization"] + } +}) # 注册所有路由 register_routes(app) diff --git a/management/server/routes/__init__.py b/management/server/routes/__init__.py index b12491c..89825b6 100644 --- a/management/server/routes/__init__.py +++ b/management/server/routes/__init__.py @@ -6,12 +6,14 @@ users_bp = Blueprint('users', __name__, url_prefix='/api/v1/users') teams_bp = Blueprint('teams', __name__, url_prefix='/api/v1/teams') tenants_bp = Blueprint('tenants', __name__, url_prefix='/api/v1/tenants') files_bp = Blueprint('files', __name__, url_prefix='/api/v1/files') +knowledgebase_bp = Blueprint('knowledgebases', __name__, url_prefix='/api/v1/knowledgebases') # 导入路由 from .users.routes import * from .teams.routes import * from .tenants.routes import * from .files.routes import * +from .knowledgebases.routes import * def register_routes(app): @@ -19,4 +21,5 @@ def register_routes(app): app.register_blueprint(users_bp) app.register_blueprint(teams_bp) app.register_blueprint(tenants_bp) - app.register_blueprint(files_bp) \ No newline at end of file + app.register_blueprint(files_bp) + app.register_blueprint(knowledgebase_bp) diff --git a/management/server/routes/files/routes.py b/management/server/routes/files/routes.py index 5023870..39cca4c 100644 --- a/management/server/routes/files/routes.py +++ b/management/server/routes/files/routes.py @@ -15,6 +15,7 @@ from services.files.service import ( get_minio_client, upload_files_to_server ) +from services.files.utils import FileType UPLOAD_FOLDER = '/data/uploads' ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'doc', 'docx', 'xls', 'xlsx'} @@ -26,13 +27,17 @@ def allowed_file(filename): @files_bp.route('/upload', methods=['POST']) def upload_file(): if 'files' not in request.files: - return jsonify({'code': 400, 'message': '未选择文件'}), 400 + return jsonify({'code': 400, 'message': '未选择文件', 'data': None}), 400 files = request.files.getlist('files') upload_result = upload_files_to_server(files) - return jsonify(upload_result) - + # 返回标准格式 + return jsonify({ + 'code': 0, + 'message': '上传成功', + 'data': upload_result['data'] + }) @files_bp.route('', methods=['GET', 'OPTIONS']) def get_files(): @@ -66,82 +71,56 @@ def get_files(): def download_file(file_id): try: current_app.logger.info(f"开始处理文件下载请求: {file_id}") - document, _, storage_bucket, storage_location = get_file_info(file_id) - if not document: + # 获取文件信息 + file = get_file_info(file_id) + + if not file: current_app.logger.error(f"文件不存在: {file_id}") return jsonify({ "code": 404, "message": f"文件 {file_id} 不存在", "details": "文件记录不存在或已被删除" }), 404 + + if file['type'] == FileType.FOLDER.value: + current_app.logger.error(f"不能下载文件夹: {file_id}") + return jsonify({ + "code": 400, + "message": "不能下载文件夹", + "details": "请选择一个文件进行下载" + }), 400 - current_app.logger.info(f"文件信息获取成功: {file_id}, 存储位置: {storage_bucket}/{storage_location}") + current_app.logger.info(f"文件信息获取成功: {file_id}, 存储位置: {file['parent_id']}/{file['location']}") try: - minio_client = get_minio_client() - current_app.logger.info(f"MinIO客户端创建成功, 准备检查文件: {storage_bucket}/{storage_location}") + # 从MinIO下载文件 + file_data, filename = download_file_from_minio(file_id) - obj = minio_client.stat_object(storage_bucket, storage_location) - if not obj: - current_app.logger.error(f"文件对象为空: {storage_bucket}/{storage_location}") - return jsonify({ - "code": 404, - "message": "文件内容为空", - "details": "MinIO存储桶中存在文件记录但内容为空" - }), 404 - - if obj.size == 0: - current_app.logger.error(f"文件大小为0: {storage_bucket}/{storage_location}") - return jsonify({ - "code": 404, - "message": "文件内容为空", - "details": "MinIO存储桶中文件大小为0" - }), 404 - - current_app.logger.info(f"文件检查成功, 大小: {obj.size} 字节, 准备下载") - - response = minio_client.get_object(storage_bucket, storage_location) - file_data = response.read() - - current_app.logger.info(f"文件读取成功, 大小: {len(file_data)} 字节, 准备发送") + # 创建内存文件对象 + file_stream = BytesIO(file_data) + # 返回文件 return send_file( - BytesIO(file_data), - mimetype='application/octet-stream', + file_stream, + download_name=filename, as_attachment=True, - download_name=document['name'] + mimetype='application/octet-stream' ) except Exception as e: - current_app.logger.error(f"MinIO操作异常: {str(e)}", exc_info=True) - # 检查是否是连接错误 - if "connection" in str(e).lower(): - return jsonify({ - "code": 503, - "message": "存储服务连接失败", - "details": f"无法连接到MinIO服务: {str(e)}" - }), 503 - # 检查是否是权限错误 - elif "access denied" in str(e).lower() or "permission" in str(e).lower(): - return jsonify({ - "code": 403, - "message": "存储服务访问被拒绝", - "details": f"MinIO访问权限错误: {str(e)}" - }), 403 - # 其他错误 - else: - return jsonify({ - "code": 500, - "message": "存储服务异常", - "details": str(e) - }), 500 - + current_app.logger.error(f"下载文件失败: {str(e)}") + return jsonify({ + "code": 500, + "message": "下载文件失败", + "details": str(e) + }), 500 + except Exception as e: - current_app.logger.error(f"文件下载异常: {str(e)}", exc_info=True) + current_app.logger.error(f"处理下载请求时出错: {str(e)}") return jsonify({ "code": 500, - "message": "文件下载失败", + "message": "处理下载请求时出错", "details": str(e) }), 500 diff --git a/management/server/routes/knowledgebases/routes.py b/management/server/routes/knowledgebases/routes.py new file mode 100644 index 0000000..347a437 --- /dev/null +++ b/management/server/routes/knowledgebases/routes.py @@ -0,0 +1,160 @@ +from flask import Blueprint, request +from services.knowledgebases.service import KnowledgebaseService +from utils import success_response, error_response +from .. import knowledgebase_bp + +@knowledgebase_bp.route('', methods=['GET']) +def get_knowledgebase_list(): + """获取知识库列表""" + try: + params = { + 'page': int(request.args.get('currentPage', 1)), + 'size': int(request.args.get('size', 10)), + 'name': request.args.get('name', '') + } + result = KnowledgebaseService.get_knowledgebase_list(**params) + return success_response(result) + except ValueError as e: + return error_response("参数类型错误", code=400) + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('/', methods=['GET']) +def get_knowledgebase_detail(kb_id): + """获取知识库详情""" + try: + knowledgebase = KnowledgebaseService.get_knowledgebase_detail( + kb_id=kb_id + ) + if not knowledgebase: + return error_response('知识库不存在', code=404) + return success_response(knowledgebase) + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('', methods=['POST']) +def create_knowledgebase(): + """创建知识库""" + try: + data = request.json + if not data.get('name'): + return error_response('知识库名称不能为空', code=400) + + # 移除 created_by 参数 + kb = KnowledgebaseService.create_knowledgebase(**data) + return success_response(kb, "创建成功", code=0) + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('/', methods=['PUT']) +def update_knowledgebase(kb_id): + """更新知识库""" + try: + data = request.json + kb = KnowledgebaseService.update_knowledgebase( + kb_id=kb_id, + **data + ) + if not kb: + return error_response('知识库不存在', code=404) + return success_response(kb) + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('/', methods=['DELETE']) +def delete_knowledgebase(kb_id): + """删除知识库""" + try: + result = KnowledgebaseService.delete_knowledgebase( + kb_id=kb_id + ) + if not result: + return error_response('知识库不存在', code=404) + return success_response(message='删除成功') + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('/batch', methods=['DELETE']) +def batch_delete_knowledgebase(): + """批量删除知识库""" + try: + data = request.json + if not data or not data.get('ids'): + return error_response('请选择要删除的知识库', code=400) + + result = KnowledgebaseService.batch_delete_knowledgebase( + kb_ids=data['ids'] + ) + return success_response(message=f'成功删除 {result} 个知识库') + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('//documents', methods=['GET']) +def get_knowledgebase_documents(kb_id): + """获取知识库下的文档列表""" + try: + params = { + 'kb_id': kb_id, + 'page': int(request.args.get('currentPage', 1)), + 'size': int(request.args.get('size', 10)), + 'name': request.args.get('name', '') + } + result = KnowledgebaseService.get_knowledgebase_documents(**params) + return success_response(result) + except ValueError as e: + return error_response("参数类型错误", code=400) + except Exception as e: + return error_response(str(e)) + +@knowledgebase_bp.route('//documents', methods=['POST']) +def add_documents_to_knowledgebase(kb_id): + """添加文档到知识库""" + try: + print(f"[DEBUG] 接收到添加文档请求,kb_id: {kb_id}") + data = request.json + if not data: + print("[ERROR] 请求数据为空") + return error_response('请求数据不能为空', code=400) + + file_ids = data.get('file_ids', []) + print(f"[DEBUG] 接收到的file_ids: {file_ids}, 类型: {type(file_ids)}") + + try: + result = KnowledgebaseService.add_documents_to_knowledgebase( + kb_id=kb_id, + file_ids=file_ids + ) + print(f"[DEBUG] 服务层处理成功,结果: {result}") + return success_response( + data=result, + message="添加成功", + code=201 + ) + except Exception as service_error: + print(f"[ERROR] 服务层错误详情: {str(service_error)}") + import traceback + traceback.print_exc() + return error_response(str(service_error), code=500) + + except Exception as e: + print(f"[ERROR] 路由层错误详情: {str(e)}") + import traceback + traceback.print_exc() + return error_response(str(e), code=500) + +@knowledgebase_bp.route('/documents/', methods=['DELETE', 'OPTIONS']) +def delete_document(doc_id): + """删除文档""" + # 处理 OPTIONS 预检请求 + if request.method == 'OPTIONS': + response = success_response({}) + # 添加 CORS 相关头 + response.headers.add('Access-Control-Allow-Methods', 'DELETE') + response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization') + return response + + try: + KnowledgebaseService.delete_document(doc_id) + return success_response(message="删除成功") + except Exception as e: + return error_response(str(e)) \ No newline at end of file diff --git a/management/server/services/files/service.py b/management/server/services/files/service.py index 0218684..c5200d8 100644 --- a/management/server/services/files/service.py +++ b/management/server/services/files/service.py @@ -37,7 +37,7 @@ def filename_type(filename): return FileType.EXCEL.value elif ext in ['.ppt', '.pptx']: return FileType.PPT.value - elif ext in ['.txt', '.md']: # 添加对 txt 和 md 文件的支持 + elif ext in ['.txt', '.md']: return FileType.TEXT.value return FileType.OTHER.value @@ -55,13 +55,14 @@ def get_db_connection(): """创建数据库连接""" return mysql.connector.connect(**DB_CONFIG) -def get_files_list(current_page, page_size, name_filter=""): +def get_files_list(current_page, page_size, parent_id=None, name_filter=""): """ 获取文件列表 Args: current_page: 当前页码 page_size: 每页大小 + parent_id: 父文件夹ID name_filter: 文件名过滤条件 Returns: @@ -76,17 +77,21 @@ def get_files_list(current_page, page_size, name_filter=""): cursor = conn.cursor(dictionary=True) # 构建查询条件 - where_clause = "" + where_clause = "WHERE f.type != 'folder'" # 排除文件夹类型 params = [] + if parent_id: + where_clause += " AND f.parent_id = %s" + params.append(parent_id) + if name_filter: - where_clause = "WHERE d.name LIKE %s" + where_clause += " AND f.name LIKE %s" params.append(f"%{name_filter}%") # 查询总数 count_query = f""" SELECT COUNT(*) as total - FROM document d + FROM file f {where_clause} """ cursor.execute(count_query, params) @@ -94,70 +99,19 @@ def get_files_list(current_page, page_size, name_filter=""): # 查询文件列表 query = f""" - SELECT d.id, d.name, d.kb_id, d.location, d.size, d.type, d.create_time - FROM document d + SELECT f.id, f.name, f.parent_id, f.type, f.size, f.location, f.source_type, f.create_time + FROM file f {where_clause} - ORDER BY d.create_time DESC + ORDER BY f.create_time DESC LIMIT %s OFFSET %s """ cursor.execute(query, params + [page_size, offset]) - documents = cursor.fetchall() - - # 获取文档与文件的关联信息 - doc_ids = [doc['id'] for doc in documents] - file_mappings = {} - - if doc_ids: - placeholders = ', '.join(['%s'] * len(doc_ids)) - cursor.execute(f""" - SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type - FROM file2document f2d - JOIN file f ON f2d.file_id = f.id - WHERE f2d.document_id IN ({placeholders}) - """, doc_ids) - - for row in cursor.fetchall(): - file_mappings[row['document_id']] = { - 'file_id': row['file_id'], - 'parent_id': row['parent_id'], - 'source_type': row['source_type'] - } - - # 整合信息 - result = [] - for doc in documents: - doc_id = doc['id'] - kb_id = doc['kb_id'] - location = doc['location'] - - # 确定存储位置 - storage_bucket = kb_id - storage_location = location - - # 如果有文件映射,检查是否需要使用文件的parent_id作为bucket - if doc_ids and doc_id in file_mappings: - file_info = file_mappings[doc_id] - # 模拟File2DocumentService.get_storage_address的逻辑 - if file_info.get('source_type') is None or file_info.get('source_type') == 0: # LOCAL - storage_bucket = file_info['parent_id'] - - # 构建结果字典 - result_item = { - 'id': doc_id, - 'name': doc.get('name', ''), - 'kb_id': kb_id, - 'size': doc.get('size', 0), - 'type': doc.get('type', ''), - 'location': location, - 'create_time': doc.get('create_time', 0) - } - - result.append(result_item) + files = cursor.fetchall() cursor.close() conn.close() - return result, total + return files, total except Exception as e: raise e @@ -170,69 +124,53 @@ def get_file_info(file_id): file_id: 文件ID Returns: - tuple: (文档信息, 文件映射信息, 存储桶, 存储位置) + dict: 文件信息 """ try: # 连接数据库 conn = get_db_connection() cursor = conn.cursor(dictionary=True) - # 查询文档信息 + # 查询文件信息 cursor.execute(""" - SELECT d.id, d.name, d.kb_id, d.location, d.type - FROM document d - WHERE d.id = %s + SELECT id, name, parent_id, type, size, location, source_type + FROM file + WHERE id = %s """, (file_id,)) - document = cursor.fetchone() - if not document: - cursor.close() - conn.close() - return None, None, None, None - - # 获取文档与文件的关联信息 - cursor.execute(""" - SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type - FROM file2document f2d - JOIN file f ON f2d.file_id = f.id - WHERE f2d.document_id = %s - """, (file_id,)) - - file_mapping = cursor.fetchone() - - # 确定存储位置 - storage_bucket = document['kb_id'] - storage_location = document['location'] - - # 如果有文件映射,检查是否需要使用文件的parent_id作为bucket - if file_mapping: - # 模拟File2DocumentService.get_storage_address的逻辑 - if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL - storage_bucket = file_mapping['parent_id'] - + file = cursor.fetchone() cursor.close() conn.close() - return document, file_mapping, storage_bucket, storage_location + return file except Exception as e: raise e -def download_file_from_minio(storage_bucket, storage_location): +def download_file_from_minio(file_id): """ 从MinIO下载文件 Args: - storage_bucket: 存储桶 - storage_location: 存储位置 + file_id: 文件ID Returns: - bytes: 文件数据 + tuple: (文件数据, 文件名) """ try: + # 获取文件信息 + file = get_file_info(file_id) + + if not file: + raise Exception(f"文件 {file_id} 不存在") + # 从MinIO下载文件 minio_client = get_minio_client() + # 使用parent_id作为存储桶 + storage_bucket = file['parent_id'] + storage_location = file['location'] + # 检查bucket是否存在 if not minio_client.bucket_exists(storage_bucket): raise Exception(f"存储桶 {storage_bucket} 不存在") @@ -241,7 +179,7 @@ def download_file_from_minio(storage_bucket, storage_location): response = minio_client.get_object(storage_bucket, storage_location) file_data = response.read() - return file_data + return file_data, file['name'] except Exception as e: raise e @@ -257,56 +195,98 @@ def delete_file(file_id): bool: 是否删除成功 """ try: - # 获取文件信息 - document, file_mapping, storage_bucket, storage_location = get_file_info(file_id) - - if not document: - return False - # 连接数据库 conn = get_db_connection() cursor = conn.cursor(dictionary=True) - # 如果有文件映射,获取文件ID - file_id_to_delete = None - if file_mapping: - file_id_to_delete = file_mapping['file_id'] + # 查询文件信息 + cursor.execute(""" + SELECT id, parent_id, name, location, type + FROM file + WHERE id = %s + """, (file_id,)) + + file = cursor.fetchone() + if not file: + cursor.close() + conn.close() + return False + + # 如果是文件夹,直接返回成功(不处理文件夹) + if file['type'] == FileType.FOLDER.value: + cursor.close() + conn.close() + return True + + # 查询关联的document记录 + cursor.execute(""" + SELECT f2d.document_id, d.kb_id, d.location + FROM file2document f2d + JOIN document d ON f2d.document_id = d.id + WHERE f2d.file_id = %s + """, (file_id,)) + + document_mappings = cursor.fetchall() + + # 创建MinIO客户端(在事务外创建) + minio_client = get_minio_client() # 开始事务 - conn.start_transaction() - try: - # 1. 删除document表中的记录 - cursor.execute("DELETE FROM document WHERE id = %s", (file_id,)) + # 注意:这里不再使用conn.start_transaction(),而是使用execute直接执行事务相关命令 + cursor.execute("START TRANSACTION") - # 2. 如果有关联的file2document记录,删除它 - if file_mapping: - cursor.execute("DELETE FROM file2document WHERE document_id = %s", (file_id,)) + # 1. 先删除file表中的记录 + cursor.execute("DELETE FROM file WHERE id = %s", (file_id,)) - # 3. 如果有关联的file记录,删除它 - if file_id_to_delete: - cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,)) + # 2. 删除关联的file2document记录 + cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,)) + + # 3. 删除关联的document记录 + for doc_mapping in document_mappings: + cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping['document_id'],)) # 提交事务 - conn.commit() + cursor.execute("COMMIT") - # 从MinIO删除文件 + # 从MinIO删除文件(在事务提交后进行) try: - minio_client = get_minio_client() + # 检查bucket是否存在,如果不存在则跳过MinIO删除操作 + parent_id = file.get('parent_id') + if parent_id and minio_client.bucket_exists(parent_id): + try: + # 删除文件,忽略文件不存在的错误 + minio_client.remove_object(parent_id, file['location']) + print(f"从MinIO删除文件成功: {parent_id}/{file['location']}") + except Exception as e: + print(f"从MinIO删除文件失败: {parent_id}/{file['location']} - {str(e)}") + else: + print(f"存储桶不存在,跳过MinIO删除操作: {parent_id}") - # 检查bucket是否存在 - if minio_client.bucket_exists(storage_bucket): - # 删除文件 - minio_client.remove_object(storage_bucket, storage_location) + # 如果有关联的document,也删除document存储的文件 + for doc_mapping in document_mappings: + kb_id = doc_mapping.get('kb_id') + doc_location = doc_mapping.get('location') + if kb_id and doc_location and minio_client.bucket_exists(kb_id): + try: + minio_client.remove_object(kb_id, doc_location) + print(f"从MinIO删除document文件成功: {kb_id}/{doc_location}") + except Exception as e: + print(f"从MinIO删除document文件失败: {kb_id}/{doc_location} - {str(e)}") + else: + print(f"document存储桶不存在或位置为空,跳过MinIO删除操作: {kb_id}/{doc_location}") except Exception as e: # 即使MinIO删除失败,也不影响数据库操作的成功 - print(f"从MinIO删除文件失败: {str(e)}") + print(f"MinIO操作失败,但不影响数据库删除: {str(e)}") return True except Exception as e: # 回滚事务 - conn.rollback() + try: + cursor.execute("ROLLBACK") + except: + pass raise e finally: @@ -314,6 +294,7 @@ def delete_file(file_id): conn.close() except Exception as e: + print(f"删除文件时发生错误: {str(e)}") raise e def batch_delete_files(file_ids): @@ -338,76 +319,93 @@ def batch_delete_files(file_ids): minio_client = get_minio_client() # 开始事务 - conn.start_transaction() - try: + cursor.execute("START TRANSACTION") + success_count = 0 for file_id in file_ids: - # 查询文档信息 + # 查询文件信息 cursor.execute(""" - SELECT d.id, d.kb_id, d.location - FROM document d - WHERE d.id = %s + SELECT id, parent_id, name, location, type + FROM file + WHERE id = %s """, (file_id,)) - document = cursor.fetchone() - if not document: + file = cursor.fetchone() + if not file: continue - # 获取文档与文件的关联信息 + # 如果是文件夹,跳过 + if file['type'] == FileType.FOLDER.value: + continue + + # 查询关联的document记录 cursor.execute(""" - SELECT f2d.id as f2d_id, f2d.document_id, f2d.file_id, f.parent_id, f.source_type + SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location FROM file2document f2d - JOIN file f ON f2d.file_id = f.id - WHERE f2d.document_id = %s + JOIN document d ON f2d.document_id = d.id + WHERE f2d.file_id = %s """, (file_id,)) - file_mapping = cursor.fetchone() + document_mappings = cursor.fetchall() - # 确定存储位置 - storage_bucket = document['kb_id'] - storage_location = document['location'] + # 1. 先删除file表中的记录 + cursor.execute("DELETE FROM file WHERE id = %s", (file_id,)) - # 如果有文件映射,检查是否需要使用文件的parent_id作为bucket - file_id_to_delete = None - if file_mapping: - file_id_to_delete = file_mapping['file_id'] - # 模拟File2DocumentService.get_storage_address的逻辑 - if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL - storage_bucket = file_mapping['parent_id'] + # 2. 删除关联的file2document记录 + cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,)) - # 1. 删除document表中的记录 - cursor.execute("DELETE FROM document WHERE id = %s", (file_id,)) - - # 2. 如果有关联的file2document记录,删除它 - if file_mapping: - cursor.execute("DELETE FROM file2document WHERE id = %s", (file_mapping['f2d_id'],)) - - # 3. 如果有关联的file记录,删除它 - if file_id_to_delete: - cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,)) - - # 从MinIO删除文件 - try: - # 检查bucket是否存在 - if minio_client.bucket_exists(storage_bucket): - # 删除文件 - minio_client.remove_object(storage_bucket, storage_location) - except Exception as e: - # 即使MinIO删除失败,也不影响数据库操作的成功 - print(f"从MinIO删除文件失败: {str(e)}") + # 3. 删除关联的document记录 + for doc_mapping in document_mappings: + cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping['document_id'],)) success_count += 1 # 提交事务 - conn.commit() + cursor.execute("COMMIT") + + # 从MinIO删除文件(在事务提交后进行) + for file_id in file_ids: + try: + # 查询文件信息 + cursor.execute(""" + SELECT id, parent_id, name, location, type + FROM file + WHERE id = %s + """, (file_id,)) + + file = cursor.fetchone() + if not file and file['type'] != FileType.FOLDER.value: + # 检查bucket是否存在 + if minio_client.bucket_exists(file['parent_id']): + # 删除文件 + minio_client.remove_object(file['parent_id'], file['location']) + + # 如果有关联的document,也删除document存储的文件 + cursor.execute(""" + SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location + FROM file2document f2d + JOIN document d ON f2d.document_id = d.id + WHERE f2d.file_id = %s + """, (file_id,)) + + document_mappings = cursor.fetchall() + for doc_mapping in document_mappings: + if minio_client.bucket_exists(doc_mapping['kb_id']): + minio_client.remove_object(doc_mapping['kb_id'], doc_mapping['location']) + except Exception as e: + # 即使MinIO删除失败,也不影响数据库操作的成功 + print(f"从MinIO删除文件失败: {str(e)}") return success_count except Exception as e: # 回滚事务 - conn.rollback() + try: + cursor.execute("ROLLBACK") + except: + pass raise e finally: @@ -415,9 +413,10 @@ def batch_delete_files(file_ids): conn.close() except Exception as e: + print(f"批量删除文件时发生错误: {str(e)}") raise e -def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None): +def upload_files_to_server(files, parent_id=None, user_id=None): """处理文件上传到服务器的核心逻辑""" if user_id is None: try: @@ -446,57 +445,34 @@ def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None): print(f"查询最早用户ID失败: {str(e)}") user_id = 'system' - # 如果没有指定parent_id,则获取用户的根文件夹ID + # 如果没有指定parent_id,则获取file表中的第一个记录作为parent_id if parent_id is None: try: conn = get_db_connection() cursor = conn.cursor(dictionary=True) - # 查询用户的根文件夹 - query_root_folder = """ + # 查询file表中的第一个记录 + query_first_file = """ SELECT id FROM file - WHERE tenant_id = %s AND parent_id = id LIMIT 1 """ - cursor.execute(query_root_folder, (user_id,)) - root_folder = cursor.fetchone() + cursor.execute(query_first_file) + first_file = cursor.fetchone() - if root_folder: - parent_id = root_folder['id'] - print(f"使用用户根文件夹ID: {parent_id}") + if first_file: + parent_id = first_file['id'] + print(f"使用file表中的第一个记录ID作为parent_id: {parent_id}") else: - # 如果没有找到根文件夹,创建一个 - root_id = get_uuid() - # 修改时间格式,包含时分秒 - current_time = int(datetime.now().timestamp()) - current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - - root_folder = { - "id": root_id, - "parent_id": root_id, # 根文件夹的parent_id指向自己 - "tenant_id": user_id, - "created_by": user_id, - "name": "/", - "type": FileType.FOLDER.value, - "size": 0, - "location": "", - "source_type": FileSource.LOCAL.value, - "create_time": current_time, - "create_date": current_date, - "update_time": current_time, - "update_date": current_date - } - - FileService.insert(root_folder) - parent_id = root_id - print(f"创建并使用新的根文件夹ID: {parent_id}") + # 如果没有找到记录,创建一个新的ID + parent_id = get_uuid() + print(f"file表中没有记录,创建新的parent_id: {parent_id}") cursor.close() conn.close() except Exception as e: - print(f"查询根文件夹ID失败: {str(e)}") - # 如果无法获取根文件夹,使用file_bucket_id作为备选 - parent_id = None + print(f"查询file表第一个记录失败: {str(e)}") + parent_id = get_uuid() # 如果无法获取,生成一个新的ID + print(f"生成新的parent_id: {parent_id}") results = [] @@ -505,8 +481,6 @@ def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None): continue if file and allowed_file(file.filename): - # 为每个文件生成独立的存储桶名称 - file_bucket_id = FileService.generate_bucket_name() original_filename = file.filename # 修复文件名处理逻辑,保留中文字符 name, ext = os.path.splitext(original_filename) @@ -526,9 +500,8 @@ def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None): os.makedirs(UPLOAD_FOLDER, exist_ok=True) file.save(filepath) print(f"文件已保存到临时目录: {filepath}") - print(f"原始文件名: {original_filename}, 处理后文件名: {filename}, 扩展名: {ext[1:]}") # 修改打印信息 - # 2. 获取文件类型 - 使用修复后的文件名 + # 2. 获取文件类型 filetype = filename_type(filename) if filetype == FileType.OTHER.value: raise RuntimeError("不支持的文件类型") @@ -537,103 +510,59 @@ def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None): minio_client = get_minio_client() location = filename - # 确保bucket存在(使用文件独立的bucket) - if not minio_client.bucket_exists(file_bucket_id): - minio_client.make_bucket(file_bucket_id) - print(f"创建MinIO存储桶: {file_bucket_id}") + # 确保bucket存在 + if not minio_client.bucket_exists(parent_id): + minio_client.make_bucket(parent_id) + print(f"创建MinIO存储桶: {parent_id}") - # 4. 上传到MinIO(使用文件独立的bucket) + # 4. 上传到MinIO with open(filepath, 'rb') as file_data: minio_client.put_object( - bucket_name=file_bucket_id, + bucket_name=parent_id, object_name=location, data=file_data, length=os.path.getsize(filepath) ) - print(f"文件已上传到MinIO: {file_bucket_id}/{location}") + print(f"文件已上传到MinIO: {parent_id}/{location}") - # 5. 创建缩略图(如果是图片/PDF等) - thumbnail_location = '' - if filetype in [FileType.VISUAL.value, FileType.PDF.value]: - try: - thumbnail_location = f'thumbnail_{get_uuid()}.png' - except Exception as e: - print(f"生成缩略图失败: {str(e)}") - - # 6. 创建数据库记录 - doc_id = get_uuid() - # 修改时间格式,包含时分秒 + # 5. 创建文件记录 + file_id = get_uuid() current_time = int(datetime.now().timestamp()) current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - doc = { - "id": doc_id, - "kb_id": file_bucket_id, # 使用文件独立的bucket_id - "parser_id": FileService.get_parser(filetype, filename, ""), - "parser_config": {"pages": [[1, 1000000]]}, - "source_type": "local", - "created_by": user_id or 'system', - "type": filetype, + file_record = { + "id": file_id, + "parent_id": parent_id, + "tenant_id": user_id, + "created_by": user_id, "name": filename, - "location": location, + "type": filetype, "size": os.path.getsize(filepath), - "thumbnail": thumbnail_location, - "token_num": 0, - "chunk_num": 0, - "progress": 0, - "progress_msg": "", - "run": "0", - "status": StatusEnum.VALID.value, + "location": location, + "source_type": FileSource.LOCAL.value, "create_time": current_time, "create_date": current_date, "update_time": current_time, "update_date": current_date } - # 7. 保存文档记录 (添加事务处理) + # 保存文件记录 conn = get_db_connection() try: cursor = conn.cursor() - DocumentService.insert(doc) - print(f"文档记录已保存到MySQL: {doc_id}") - # 8. 创建文件记录和关联 - file_record = { - "id": get_uuid(), - "parent_id": parent_id or file_bucket_id, # 优先使用指定的parent_id - "tenant_id": user_id or 'system', - "created_by": user_id or 'system', - "name": filename, - "type": filetype, - "size": doc["size"], - "location": location, - "source_type": FileSource.KNOWLEDGEBASE.value, - "create_time": current_time, - "create_date": current_date, - "update_time": current_time, - "update_date": current_date - } - FileService.insert(file_record) - print(f"文件记录已保存到MySQL: {file_record['id']}") - - # 9. 创建文件-文档关联 - File2DocumentService.insert({ - "id": get_uuid(), - "file_id": file_record["id"], - "document_id": doc_id, - "create_time": current_time, - "create_date": current_date, - "update_time": current_time, - "update_date": current_date - }) - print(f"关联记录已保存到MySQL: {file_record['id']} -> {doc_id}") + # 插入文件记录 + columns = ', '.join(file_record.keys()) + placeholders = ', '.join(['%s'] * len(file_record)) + query = f"INSERT INTO file ({columns}) VALUES ({placeholders})" + cursor.execute(query, list(file_record.values())) conn.commit() results.append({ - 'id': doc_id, + 'id': file_id, 'name': filename, - 'size': doc["size"], + 'size': file_record["size"], 'type': filetype, 'status': 'success' }) diff --git a/management/server/services/knowledgebases/service.py b/management/server/services/knowledgebases/service.py new file mode 100644 index 0000000..eb6452d --- /dev/null +++ b/management/server/services/knowledgebases/service.py @@ -0,0 +1,661 @@ +import mysql.connector +import json +from flask import current_app +from datetime import datetime +from utils import generate_uuid +from database import DB_CONFIG + +class KnowledgebaseService: + + @classmethod + def _get_db_connection(cls): + """Get database connection""" + return mysql.connector.connect(**DB_CONFIG) + + @classmethod + def get_knowledgebase_list(cls, page=1, size=10, name=''): + """获取知识库列表""" + conn = cls._get_db_connection() + cursor = conn.cursor(dictionary=True) + + query = """ + SELECT + k.id, + k.name, + k.description, + k.create_date, + k.update_date, + k.doc_num, + k.language, + k.permission + FROM knowledgebase k + """ + params = [] + + if name: + query += " WHERE k.name LIKE %s" + params.append(f"%{name}%") + + query += " LIMIT %s OFFSET %s" + params.extend([size, (page-1)*size]) + + cursor.execute(query, params) + results = cursor.fetchall() + + # 处理结果 + for result in results: + # 处理空描述 + if not result.get('description'): + result['description'] = "暂无描述" + # 处理时间格式 + if result.get('create_date'): + if isinstance(result['create_date'], datetime): + result['create_date'] = result['create_date'].strftime('%Y-%m-%d %H:%M:%S') + elif isinstance(result['create_date'], str): + try: + # 尝试解析已有字符串格式 + datetime.strptime(result['create_date'], '%Y-%m-%d %H:%M:%S') + except ValueError: + result['create_date'] = "" + + # 获取总数 + count_query = "SELECT COUNT(*) as total FROM knowledgebase" + if name: + count_query += " WHERE name LIKE %s" + cursor.execute(count_query, params[:1] if name else []) + total = cursor.fetchone()['total'] + + cursor.close() + conn.close() + + return { + 'list': results, + 'total': total + } + + @classmethod + def get_knowledgebase_detail(cls, kb_id): + """获取知识库详情""" + conn = cls._get_db_connection() + cursor = conn.cursor(dictionary=True) + + query = """ + SELECT + k.id, + k.name, + k.description, + k.create_date, + k.update_date, + k.doc_num + FROM knowledgebase k + WHERE k.id = %s + """ + cursor.execute(query, (kb_id,)) + result = cursor.fetchone() + + if result: + # 处理空描述 + if not result.get('description'): + result['description'] = "暂无描述" + # 处理时间格式 + if result.get('create_date'): + if isinstance(result['create_date'], datetime): + result['create_date'] = result['create_date'].strftime('%Y-%m-%d %H:%M:%S') + elif isinstance(result['create_date'], str): + try: + datetime.strptime(result['create_date'], '%Y-%m-%d %H:%M:%S') + except ValueError: + result['create_date'] = "" + + cursor.close() + conn.close() + + return result + + @classmethod + def _check_name_exists(cls, name): + """检查知识库名称是否已存在""" + conn = cls._get_db_connection() + cursor = conn.cursor() + + query = """ + SELECT COUNT(*) as count + FROM knowledgebase + WHERE name = %s + """ + cursor.execute(query, (name,)) + result = cursor.fetchone() + + cursor.close() + conn.close() + + return result[0] > 0 + + @classmethod + def create_knowledgebase(cls, **data): + """创建知识库""" + try: + # 检查知识库名称是否已存在 + exists = cls._check_name_exists(data['name']) + if exists: + raise Exception("知识库名称已存在") + + conn = cls._get_db_connection() + cursor = conn.cursor(dictionary=True) + + # 获取最早的用户ID作为tenant_id和created_by + tenant_id = None + created_by = None + try: + query_earliest_user = """ + SELECT id FROM user + WHERE create_time = (SELECT MIN(create_time) FROM user) + LIMIT 1 + """ + cursor.execute(query_earliest_user) + earliest_user = cursor.fetchone() + + if earliest_user: + tenant_id = earliest_user['id'] + created_by = earliest_user['id'] # 使用最早用户ID作为created_by + print(f"使用创建时间最早的用户ID作为tenant_id和created_by: {tenant_id}") + else: + # 如果找不到用户,使用默认值 + tenant_id = "system" + created_by = "system" + print(f"未找到用户, 使用默认值作为tenant_id和created_by: {tenant_id}") + except Exception as e: + print(f"获取用户ID失败: {str(e)},使用默认值") + tenant_id = "system" + created_by = "system" + + current_time = datetime.now() + create_date = current_time.strftime('%Y-%m-%d %H:%M:%S') + create_time = int(current_time.timestamp() * 1000) # 毫秒级时间戳 + update_date = create_date + update_time = create_time + + # 完整的字段列表 + query = """ + INSERT INTO knowledgebase ( + id, create_time, create_date, update_time, update_date, + avatar, tenant_id, name, language, description, + embd_id, permission, created_by, doc_num, token_num, + chunk_num, similarity_threshold, vector_similarity_weight, parser_id, parser_config, + pagerank, status + ) VALUES ( + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s + ) + """ + + # 设置默认值 + default_parser_config = json.dumps({ + "layout_recognize": "DeepDOC", + "chunk_token_num": 512, + "delimiter": "\n!?;。;!?", + "auto_keywords": 0, + "auto_questions": 0, + "html4excel": False, + "raptor": {"use_raptor": False}, + "graphrag": {"use_graphrag": False} + }) + + kb_id = generate_uuid() + cursor.execute(query, ( + kb_id, # id + create_time, # create_time + create_date, # create_date + update_time, # update_time + update_date, # update_date + None, # avatar + tenant_id, # tenant_id + data['name'], # name + data.get('language', 'Chinese'), # language + data.get('description', ''), # description + 'bge-m3:latest@Ollama', # embd_id + data.get('permission', 'me'), # permission + created_by, # created_by - 使用内部获取的值 + 0, # doc_num + 0, # token_num + 0, # chunk_num + 0.7, # similarity_threshold + 0.3, # vector_similarity_weight + 'naive', # parser_id + default_parser_config, # parser_config + 0, # pagerank + '1' # status + )) + conn.commit() + + cursor.close() + conn.close() + + # 返回创建后的知识库详情 + return cls.get_knowledgebase_detail(kb_id) + + except Exception as e: + current_app.logger.error(f"创建知识库失败: {str(e)}") + raise Exception(f"创建知识库失败: {str(e)}") + + @classmethod + def update_knowledgebase(cls, kb_id, **data): + """更新知识库""" + try: + # 直接通过ID检查知识库是否存在 + kb = cls.get_knowledgebase_detail(kb_id) + if not kb: + return None + + conn = cls._get_db_connection() + cursor = conn.cursor() + + # 如果要更新名称,先检查名称是否已存在 + if data.get('name') and data['name'] != kb['name']: + exists = cls._check_name_exists(data['name']) + if exists: + raise Exception("知识库名称已存在") + + # 构建更新语句 + update_fields = [] + params = [] + + if data.get('name'): + update_fields.append("name = %s") + params.append(data['name']) + + if 'description' in data: + update_fields.append("description = %s") + params.append(data['description']) + + # 更新时间 + current_time = datetime.now() + update_date = current_time.strftime('%Y-%m-%d %H:%M:%S') + update_fields.append("update_date = %s") + params.append(update_date) + + # 如果没有要更新的字段,直接返回 + if not update_fields: + return kb_id + + # 构建并执行更新语句 + query = f""" + UPDATE knowledgebase + SET {', '.join(update_fields)} + WHERE id = %s + """ + params.append(kb_id) + + cursor.execute(query, params) + conn.commit() + + cursor.close() + conn.close() + + # 返回更新后的知识库详情 + return cls.get_knowledgebase_detail(kb_id) + + except Exception as e: + print(f"更新知识库失败: {str(e)}") + raise Exception(f"更新知识库失败: {str(e)}") + + @classmethod + def delete_knowledgebase(cls, kb_id): + """删除知识库""" + try: + conn = cls._get_db_connection() + cursor = conn.cursor() + + # 先检查知识库是否存在 + check_query = "SELECT id FROM knowledgebase WHERE id = %s" + cursor.execute(check_query, (kb_id,)) + if not cursor.fetchone(): + raise Exception("知识库不存在") + + # 执行删除 + delete_query = "DELETE FROM knowledgebase WHERE id = %s" + cursor.execute(delete_query, (kb_id,)) + conn.commit() + + cursor.close() + conn.close() + + return True + except Exception as e: + current_app.logger.error(f"删除知识库失败: {str(e)}") + raise Exception(f"删除知识库失败: {str(e)}") + + @classmethod + def batch_delete_knowledgebase(cls, kb_ids): + """批量删除知识库""" + try: + conn = cls._get_db_connection() + cursor = conn.cursor() + + # 检查所有ID是否存在 + check_query = "SELECT id FROM knowledgebase WHERE id IN (%s)" % \ + ','.join(['%s'] * len(kb_ids)) + cursor.execute(check_query, kb_ids) + existing_ids = [row[0] for row in cursor.fetchall()] + + if len(existing_ids) != len(kb_ids): + missing_ids = set(kb_ids) - set(existing_ids) + raise Exception(f"以下知识库不存在: {', '.join(missing_ids)}") + + # 执行批量删除 + delete_query = "DELETE FROM knowledgebase WHERE id IN (%s)" % \ + ','.join(['%s'] * len(kb_ids)) + cursor.execute(delete_query, kb_ids) + conn.commit() + + cursor.close() + conn.close() + + return len(kb_ids) + except Exception as e: + current_app.logger.error(f"批量删除知识库失败: {str(e)}") + raise Exception(f"批量删除知识库失败: {str(e)}") + + @classmethod + def get_knowledgebase_documents(cls, kb_id, page=1, size=10, name=''): + """获取知识库下的文档列表""" + try: + conn = cls._get_db_connection() + cursor = conn.cursor(dictionary=True) + + # 先检查知识库是否存在 + check_query = "SELECT id FROM knowledgebase WHERE id = %s" + cursor.execute(check_query, (kb_id,)) + if not cursor.fetchone(): + raise Exception("知识库不存在") + + # 查询文档列表 + query = """ + SELECT + d.id, + d.name, + d.chunk_num, + d.create_date, + d.status, + d.run, + d.progress, + d.parser_id, + d.parser_config, + d.meta_fields + FROM document d + WHERE d.kb_id = %s + """ + params = [kb_id] + + if name: + query += " AND d.name LIKE %s" + params.append(f"%{name}%") + + query += " ORDER BY d.create_date DESC LIMIT %s OFFSET %s" + params.extend([size, (page-1)*size]) + + cursor.execute(query, params) + results = cursor.fetchall() + + # 处理日期时间格式 + for result in results: + if result.get('create_date'): + result['create_date'] = result['create_date'].strftime('%Y-%m-%d %H:%M:%S') + + # 获取总数 + count_query = "SELECT COUNT(*) as total FROM document WHERE kb_id = %s" + count_params = [kb_id] + if name: + count_query += " AND name LIKE %s" + count_params.append(f"%{name}%") + + cursor.execute(count_query, count_params) + total = cursor.fetchone()['total'] + + cursor.close() + conn.close() + + print(results) + return { + 'list': results, + 'total': total + } + + except Exception as e: + current_app.logger.error(f"获取知识库文档列表失败: {str(e)}") + raise Exception(f"获取知识库文档列表失败: {str(e)}") + + @classmethod + def add_documents_to_knowledgebase(cls, kb_id, file_ids, created_by=None): + """添加文档到知识库""" + try: + print(f"[DEBUG] 开始添加文档,参数: kb_id={kb_id}, file_ids={file_ids}") + + # 如果没有传入created_by,则获取最早的用户ID + if created_by is None: + conn = cls._get_db_connection() + cursor = conn.cursor(dictionary=True) + + # 查询创建时间最早的用户ID + query_earliest_user = """ + SELECT id FROM user + WHERE create_time = (SELECT MIN(create_time) FROM user) + LIMIT 1 + """ + cursor.execute(query_earliest_user) + earliest_user = cursor.fetchone() + + if earliest_user: + created_by = earliest_user['id'] + print(f"使用创建时间最早的用户ID: {created_by}") + else: + created_by = 'system' + print("未找到用户, 使用默认用户ID: system") + + cursor.close() + conn.close() + + # 检查知识库是否存在 + kb = cls.get_knowledgebase_detail(kb_id) + print(f"[DEBUG] 知识库检查结果: {kb}") + if not kb: + print(f"[ERROR] 知识库不存在: {kb_id}") + raise Exception("知识库不存在") + + conn = cls._get_db_connection() + cursor = conn.cursor() + + # 获取文件信息 + file_query = """ + SELECT id, name, location, size, type + FROM file + WHERE id IN (%s) + """ % ','.join(['%s'] * len(file_ids)) + + print(f"[DEBUG] 执行文件查询SQL: {file_query}") + print(f"[DEBUG] 查询参数: {file_ids}") + + try: + cursor.execute(file_query, file_ids) + files = cursor.fetchall() + print(f"[DEBUG] 查询到的文件数据: {files}") + except Exception as e: + print(f"[ERROR] 文件查询失败: {str(e)}") + raise + + if len(files) != len(file_ids): + print(f"部分文件不存在: 期望={len(file_ids)}, 实际={len(files)}") + raise Exception("部分文件不存在") + + # 添加文档记录 + added_count = 0 + for file in files: + file_id = file[0] + file_name = file[1] + print(f"处理文件: id={file_id}, name={file_name}") + + file_location = file[2] + file_size = file[3] + file_type = file[4] + + # 检查文档是否已存在于知识库 + check_query = """ + SELECT COUNT(*) + FROM document d + JOIN file2document f2d ON d.id = f2d.document_id + WHERE d.kb_id = %s AND f2d.file_id = %s + """ + cursor.execute(check_query, (kb_id, file_id)) + exists = cursor.fetchone()[0] > 0 + + if exists: + continue # 跳过已存在的文档 + + # 创建文档记录 + doc_id = generate_uuid() + current_datetime = datetime.now() + create_time = int(current_datetime.timestamp() * 1000) # 毫秒级时间戳 + current_date = current_datetime.strftime("%Y-%m-%d %H:%M:%S") # 格式化日期字符串 + + # 设置默认值 + default_parser_id = "naive" + default_parser_config = json.dumps({ + "layout_recognize": "DeepDOC", + "chunk_token_num": 512, + "delimiter": "\n!?;。;!?", + "auto_keywords": 0, + "auto_questions": 0, + "html4excel": False, + "raptor": { + "use_raptor": False + }, + "graphrag": { + "use_graphrag": False + } + }) + default_source_type = "local" + + # 插入document表 + doc_query = """ + INSERT INTO document ( + id, create_time, create_date, update_time, update_date, + thumbnail, kb_id, parser_id, parser_config, source_type, + type, created_by, name, location, size, + token_num, chunk_num, progress, progress_msg, process_begin_at, + process_duation, meta_fields, run, status + ) VALUES ( + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s + ) + """ + + doc_params = [ + doc_id, create_time, current_date, create_time, current_date, # ID和时间 + None, kb_id, default_parser_id, default_parser_config, default_source_type, # thumbnail到source_type + file_type, created_by, file_name, file_location, file_size, # type到size + 0, 0, 0.0, None, None, # token_num到process_begin_at + 0.0, None, '0', '1' # process_duation到status + ] + + cursor.execute(doc_query, doc_params) + + # 创建文件到文档的映射 + f2d_id = generate_uuid() + f2d_query = """ + INSERT INTO file2document ( + id, create_time, create_date, update_time, update_date, + file_id, document_id + ) VALUES ( + %s, %s, %s, %s, %s, + %s, %s + ) + """ + + f2d_params = [ + f2d_id, create_time, current_date, create_time, current_date, + file_id, doc_id + ] + + cursor.execute(f2d_query, f2d_params) + + added_count += 1 + + # 更新知识库文档数量 + if added_count > 0: + try: + update_query = """ + UPDATE knowledgebase + SET doc_num = doc_num + %s, + update_date = %s + WHERE id = %s + """ + cursor.execute(update_query, (added_count, current_date, kb_id)) + conn.commit() # 先提交更新操作 + except Exception as e: + print(f"[WARNING] 更新知识库文档数量失败,但文档已添加: {str(e)}") + # 这里不抛出异常,因为文档已经添加成功 + + cursor.close() + conn.close() + + return { + "added_count": added_count + } + + except Exception as e: + print(f"[ERROR] 添加文档失败: {str(e)}") + print(f"[ERROR] 错误类型: {type(e)}") + import traceback + print(f"[ERROR] 堆栈信息: {traceback.format_exc()}") + raise Exception(f"添加文档到知识库失败: {str(e)}") + + @classmethod + def delete_document(cls, doc_id): + """删除文档""" + try: + conn = cls._get_db_connection() + cursor = conn.cursor() + + # 先检查文档是否存在 + check_query = "SELECT kb_id FROM document WHERE id = %s" + cursor.execute(check_query, (doc_id,)) + result = cursor.fetchone() + + if not result: + raise Exception("文档不存在") + + kb_id = result[0] + + # 删除文件到文档的映射 + f2d_query = "DELETE FROM file2document WHERE document_id = %s" + cursor.execute(f2d_query, (doc_id,)) + + # 删除文档 + doc_query = "DELETE FROM document WHERE id = %s" + cursor.execute(doc_query, (doc_id,)) + + # 更新知识库文档数量 + update_query = """ + UPDATE knowledgebase + SET doc_num = doc_num - 1, + update_date = %s + WHERE id = %s + """ + current_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + cursor.execute(update_query, (current_date, kb_id)) + + conn.commit() + cursor.close() + conn.close() + + return True + + except Exception as e: + print(f"[ERROR] 删除文档失败: {str(e)}") + raise Exception(f"删除文档失败: {str(e)}") \ No newline at end of file diff --git a/management/server/utils.py b/management/server/utils.py index 0ccac3a..0240f32 100644 --- a/management/server/utils.py +++ b/management/server/utils.py @@ -1,5 +1,6 @@ import uuid import base64 +from flask import jsonify from Cryptodome.PublicKey import RSA from Cryptodome.Cipher import PKCS1_v1_5 from werkzeug.security import generate_password_hash @@ -23,4 +24,23 @@ def rsa_psw(password: str) -> str: # 加密密码 def encrypt_password(raw_password: str) -> str: base64_password = base64.b64encode(raw_password.encode()).decode() - return generate_password_hash(base64_password) \ No newline at end of file + return generate_password_hash(base64_password) + +# 标准响应格式 +def success_response(data=None, message="操作成功", code=0): + return jsonify({ + "code": code, + "message": message, + "data": data + }) + +# 错误响应格式 +def error_response(message="操作失败", code=500, details=None): + """标准错误响应格式""" + response = { + "code": code, + "message": message + } + if details: + response["details"] = details + return jsonify(response), code if code >= 400 else 500 \ No newline at end of file diff --git a/management/web/src/common/apis/kbs/document.ts b/management/web/src/common/apis/kbs/document.ts new file mode 100644 index 0000000..f4ab16d --- /dev/null +++ b/management/web/src/common/apis/kbs/document.ts @@ -0,0 +1,128 @@ +import { request } from "@/http/axios" + +interface UploadResponse { + code: number + message?: string + data: any +} + +// 获取文档列表 +export function getDocumentListApi(params: { + kb_id: string + currentPage: number + size: number + name?: string +}) { + return request({ + url: `/api/v1/knowledgebases/${params.kb_id}/documents`, + method: "get", + params: { + currentPage: params.currentPage, + size: params.size, + name: params.name + } + }) +} + +// 获取文档详情 +export function getDocumentDetailApi(id: string) { + return request({ + url: `/api/v1/documents/${id}`, + method: "get" + }) +} + +// 上传文档 +export function uploadDocumentApi(formData: FormData): Promise { + return request({ + url: "/api/v1/knowledgebases/documents/upload", + method: "post", + data: formData, + headers: { + "Content-Type": "multipart/form-data" + } + }).then((response) => { + if (response.code !== 0) { + throw new Error(response.message || "上传失败") + } + return response.data + }) +} + +// 删除文档 +export function deleteDocumentApi(docId: string) { + return request({ + url: `/api/v1/knowledgebases/documents/${docId}`, + method: "delete" + }) +} + +// 批量删除文档 +export function batchDeleteDocumentsApi(ids: string[]) { + return request({ + url: "/api/v1/knowledgebases/documents/batch", + method: "delete", + data: { ids } + }) +} + +// 更改文档状态(启用/禁用) +export function changeDocumentStatusApi(id: string, status: string) { + return request({ + url: `/api/v1/knowledgebases/documents/${id}/status`, + method: "put", + data: { status } + }) +} + +// 运行文档解析 +export function runDocumentParseApi(id: string) { + return request({ + url: `/api/v1/knowledgebases/documents/${id}/parse`, + method: "post" + }) +} + +// 获取文档分块列表 +export function getDocumentChunksApi(params: { + doc_id: string + currentPage: number + size: number + content?: string +}) { + return request({ + url: "/api/v1/chunks", + method: "get", + params + }) +} + +// 获取文件列表 +export function getFileListApi(params: { + currentPage: number + size: number + name?: string +}) { + return request({ + url: "/api/v1/files", + method: "get", + params + }) +} + +// 添加文档到知识库 +export function addDocumentToKnowledgeBaseApi(data: { + kb_id: string + file_ids: string[] +}) { + return request<{ code: number, message?: string, data?: any }>({ + url: `/api/v1/knowledgebases/${data.kb_id}/documents`, + method: "post", + data: { file_ids: data.file_ids } + }).then((response) => { + if (response.code === 0 || response.code === 201) { + return response.data || { added_count: data.file_ids.length } + } + throw new Error(response.message || "添加文档失败") + }) +} diff --git a/management/web/src/common/apis/kbs/knowledgebase.ts b/management/web/src/common/apis/kbs/knowledgebase.ts new file mode 100644 index 0000000..e5b9c5a --- /dev/null +++ b/management/web/src/common/apis/kbs/knowledgebase.ts @@ -0,0 +1,79 @@ +import { request } from "@/http/axios" + +// 获取知识库列表 +export function getKnowledgeBaseListApi(params: { + currentPage: number + size: number + name?: string +}) { + return request({ + url: "/api/v1/knowledgebases", + method: "get", + params + }) +} + +// 获取知识库详情 +export function getKnowledgeBaseDetailApi(id: string) { + return request({ + url: `/api/v1/knowledgebases/${id}`, + method: "get" + }) +} + +// 创建知识库 +export function createKnowledgeBaseApi(data: { + name: string + description?: string + language?: string + permission?: string +}) { + return request({ + url: "/api/v1/knowledgebases", + method: "post", + data + }) +} + +// 更新知识库 +export function updateKnowledgeBaseApi(id: string, data: { + name?: string + description?: string + language?: string + permission?: string +}) { + return request({ + url: `/api/v1/knowledgebases/${id}`, + method: "put", + data + }) +} + +// 删除知识库 +export function deleteKnowledgeBaseApi(id: string) { + return request({ + url: `/api/v1/knowledgebases/${id}`, + method: "delete" + }) +} + +// 批量删除知识库 +export function batchDeleteKnowledgeBaseApi(ids: string[]) { + return request({ + url: "/api/v1/knowledgebases/batch", + method: "delete", + data: { ids } + }) +} + +// 添加文档到知识库 +export function addDocumentToKnowledgeBaseApi(data: { + kb_id: string + file_ids: string[] +}) { + return request({ + url: `/api/v1/knowledgebases/${data.kb_id}/documents`, + method: "post", + data: { file_ids: data.file_ids } + }) +} diff --git a/management/web/src/common/apis/kbs/type.ts b/management/web/src/common/apis/kbs/type.ts new file mode 100644 index 0000000..7556682 --- /dev/null +++ b/management/web/src/common/apis/kbs/type.ts @@ -0,0 +1,63 @@ +/** + * 文件数据类型 + */ +export interface FileData { + /** 文件ID */ + id: string + /** 文件名称 */ + name: string + /** 文件大小(字节) */ + size: number + /** 文件类型 */ + type: string + /** 知识库ID */ + kb_id: string + /** 存储位置 */ + location: string + /** 创建时间 */ + create_time?: number + /** 更新时间 */ + update_time?: number +} + +/** + * 文件列表结果 + */ +export interface FileListResult { + /** 文件列表 */ + list: FileData[] + /** 总条数 */ + total: number +} + +/** + * 分页查询参数 + */ +export interface PageQuery { + /** 当前页码 */ + currentPage: number + /** 每页条数 */ + size: number +} + +/** + * 分页结果 + */ +export interface PageResult { + /** 数据列表 */ + list: T[] + /** 总条数 */ + total: number +} + +/** + * 通用响应结构 + */ +export interface ApiResponse { + /** 状态码 */ + code: number + /** 响应数据 */ + data: T + /** 响应消息 */ + message: string +} diff --git a/management/web/src/common/assets/icons/file.svg b/management/web/src/common/assets/icons/file.svg index e42afcd..a5af49c 100644 --- a/management/web/src/common/assets/icons/file.svg +++ b/management/web/src/common/assets/icons/file.svg @@ -1,25 +1,35 @@ - - - - + - - + - - + - - + - - + - - + - - - + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/management/web/src/common/assets/icons/kb.svg b/management/web/src/common/assets/icons/kb.svg new file mode 100644 index 0000000..bf529b8 --- /dev/null +++ b/management/web/src/common/assets/icons/kb.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/management/web/src/common/assets/icons/team-management.svg b/management/web/src/common/assets/icons/team-management.svg index ccaa694..b6e5715 100644 --- a/management/web/src/common/assets/icons/team-management.svg +++ b/management/web/src/common/assets/icons/team-management.svg @@ -1,9 +1,21 @@ - - - - - - - + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/management/web/src/common/assets/icons/user-config.svg b/management/web/src/common/assets/icons/user-config.svg index 2d417dc..7f3cf79 100644 --- a/management/web/src/common/assets/icons/user-config.svg +++ b/management/web/src/common/assets/icons/user-config.svg @@ -1,9 +1,13 @@ - - - - - - - + + + + \ No newline at end of file diff --git a/management/web/src/http/axios.ts b/management/web/src/http/axios.ts index 8970ba5..f0cb95e 100644 --- a/management/web/src/http/axios.ts +++ b/management/web/src/http/axios.ts @@ -24,6 +24,7 @@ function createInstance() { // 响应拦截器(可根据具体业务作出相应的调整) instance.interceptors.response.use( (response) => { + // console.log("API Response:", response) // apiData 是 api 返回的数据 const apiData = response.data // 二进制数据则直接返回 @@ -101,6 +102,7 @@ function createInstance() { function createRequest(instance: AxiosInstance) { return (config: AxiosRequestConfig): Promise => { const token = getToken() + // console.log("Request config:", config) // 默认配置 const defaultConfig: AxiosRequestConfig = { // 接口地址 diff --git a/management/web/src/pages/knowledgebase/index.vue b/management/web/src/pages/knowledgebase/index.vue new file mode 100644 index 0000000..ccb2fb9 --- /dev/null +++ b/management/web/src/pages/knowledgebase/index.vue @@ -0,0 +1,955 @@ + + + + + diff --git a/management/web/src/router/index.ts b/management/web/src/router/index.ts index a19716f..9a5ef14 100644 --- a/management/web/src/router/index.ts +++ b/management/web/src/router/index.ts @@ -116,127 +116,25 @@ export const constantRoutes: RouteRecordRaw[] = [ } } ] + }, + { + path: "/kb", + component: Layouts, + redirect: "/kb/index", + children: [ + { + path: "index", + component: () => import("@/pages/knowledgebase/index.vue"), + name: "KB", + meta: { + title: "知识库管理", + svgIcon: "kb", + affix: false, + keepAlive: true + } + } + ] } - // { - // path: "/", - // component: () => import("@/pages/demo/element-plus/index.vue"), - // name: "ElementPlus", - // meta: { - // title: "Element Plus", - // keepAlive: true - // } - // } - - // { - // path: "/demo", - // component: Layouts, - // redirect: "/demo/unocss", - // name: "Demo", - // meta: { - // title: "示例集合", - // elIcon: "DataBoard" - // }, - // children: [ - // { - // path: "unocss", - // component: () => import("@/pages/demo/unocss/index.vue"), - // name: "UnoCSS", - // meta: { - // title: "UnoCSS" - // } - // }, - - // { - // path: "vxe-table", - // component: () => import("@/pages/demo/vxe-table/index.vue"), - // name: "VxeTable", - // meta: { - // title: "Vxe Table", - // keepAlive: true - // } - // }, - // { - // path: "level2", - // component: () => import("@/pages/demo/level2/index.vue"), - // redirect: "/demo/level2/level3", - // name: "Level2", - // meta: { - // title: "二级路由", - // alwaysShow: true - // }, - // children: [ - // { - // path: "level3", - // component: () => import("@/pages/demo/level2/level3/index.vue"), - // name: "Level3", - // meta: { - // title: "三级路由", - // keepAlive: true - // } - // } - // ] - // }, - // { - // path: "composable-demo", - // redirect: "/demo/composable-demo/use-fetch-select", - // name: "ComposableDemo", - // meta: { - // title: "组合式函数" - // }, - // children: [ - // { - // path: "use-fetch-select", - // component: () => import("@/pages/demo/composable-demo/use-fetch-select.vue"), - // name: "UseFetchSelect", - // meta: { - // title: "useFetchSelect" - // } - // }, - // { - // path: "use-fullscreen-loading", - // component: () => import("@/pages/demo/composable-demo/use-fullscreen-loading.vue"), - // name: "UseFullscreenLoading", - // meta: { - // title: "useFullscreenLoading" - // } - // }, - // { - // path: "use-watermark", - // component: () => import("@/pages/demo/composable-demo/use-watermark.vue"), - // name: "UseWatermark", - // meta: { - // title: "useWatermark" - // } - // } - // ] - // } - // ] - // }, - // { - // path: "/link", - // meta: { - // title: "文档链接", - // elIcon: "Link" - // }, - // children: [ - // { - // path: "https://juejin.cn/post/7445151895121543209", - // component: () => {}, - // name: "Link1", - // meta: { - // title: "中文文档" - // } - // }, - // { - // path: "https://juejin.cn/column/7207659644487139387", - // component: () => {}, - // name: "Link2", - // meta: { - // title: "新手教程" - // } - // } - // ] - // } ] /** diff --git a/management/web/types/auto/components.d.ts b/management/web/types/auto/components.d.ts index f5bfc3d..7bd0ea5 100644 --- a/management/web/types/auto/components.d.ts +++ b/management/web/types/auto/components.d.ts @@ -8,6 +8,7 @@ export {} /* prettier-ignore */ declare module 'vue' { export interface GlobalComponents { + ConfirmDialog: typeof import('./../../src/components/ConfirmDialog.vue')['default'] ElAside: typeof import('element-plus/es')['ElAside'] ElAvatar: typeof import('element-plus/es')['ElAvatar'] ElBacktop: typeof import('element-plus/es')['ElBacktop'] @@ -18,6 +19,8 @@ declare module 'vue' { ElCard: typeof import('element-plus/es')['ElCard'] ElConfigProvider: typeof import('element-plus/es')['ElConfigProvider'] ElContainer: typeof import('element-plus/es')['ElContainer'] + ElDescriptions: typeof import('element-plus/es')['ElDescriptions'] + ElDescriptionsItem: typeof import('element-plus/es')['ElDescriptionsItem'] ElDialog: typeof import('element-plus/es')['ElDialog'] ElDivider: typeof import('element-plus/es')['ElDivider'] ElDrawer: typeof import('element-plus/es')['ElDrawer'] diff --git a/management/web/types/auto/svg-component-global.d.ts b/management/web/types/auto/svg-component-global.d.ts index e358aa6..cd5da82 100644 --- a/management/web/types/auto/svg-component-global.d.ts +++ b/management/web/types/auto/svg-component-global.d.ts @@ -9,18 +9,18 @@ declare module 'vue' { export interface GlobalComponents { SvgIcon: import("vue").DefineComponent<{ name: { - type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; + type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; default: string; required: true; }; }, {}, unknown, {}, {}, import("vue").ComponentOptionsMixin, import("vue").ComponentOptionsMixin, {}, string, import("vue").VNodeProps & import("vue").AllowedComponentProps & import("vue").ComponentCustomProps, Readonly; + type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; default: string; required: true; }; }>>, { - name: "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; + name: "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; }>; } } diff --git a/management/web/types/auto/svg-component.d.ts b/management/web/types/auto/svg-component.d.ts index 7c0d45c..4dc999b 100644 --- a/management/web/types/auto/svg-component.d.ts +++ b/management/web/types/auto/svg-component.d.ts @@ -7,20 +7,20 @@ declare module '~virtual/svg-component' { const SvgIcon: import("vue").DefineComponent<{ name: { - type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; + type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; default: string; required: true; }; }, {}, unknown, {}, {}, import("vue").ComponentOptionsMixin, import("vue").ComponentOptionsMixin, {}, string, import("vue").VNodeProps & import("vue").AllowedComponentProps & import("vue").ComponentCustomProps, Readonly; + type: import("vue").PropType<"dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management">; default: string; required: true; }; }>>, { - name: "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; + name: "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; }>; - export const svgNames: ["dashboard", "file", "fullscreen-exit", "fullscreen", "keyboard-down", "keyboard-enter", "keyboard-esc", "keyboard-up", "search", "team-management", "user-config", "user-management"]; - export type SvgName = "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; + export const svgNames: ["dashboard", "file", "fullscreen-exit", "fullscreen", "kb", "keyboard-down", "keyboard-enter", "keyboard-esc", "keyboard-up", "search", "team-management", "user-config", "user-management"]; + export type SvgName = "dashboard" | "file" | "fullscreen-exit" | "fullscreen" | "kb" | "keyboard-down" | "keyboard-enter" | "keyboard-esc" | "keyboard-up" | "search" | "team-management" | "user-config" | "user-management"; export default SvgIcon; }