From 8cfc4bba4c2a3d642abd00aac4c9db653ec56603 Mon Sep 17 00:00:00 2001 From: zstar <65890619+zstar1003@users.noreply.github.com> Date: Thu, 5 Jun 2025 11:34:45 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E9=80=82=E9=85=8D=E6=96=B0=E7=89=88?= =?UTF-8?q?=E6=9C=ACtask=E7=9A=84=E8=A1=A8=E7=BB=93=E6=9E=84=EF=BC=8C?= =?UTF-8?q?=E8=A7=A3=E5=86=B3Unknown=20column=20'priority'in=20'field=20li?= =?UTF-8?q?st'=20=E6=8A=A5=E9=94=99=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../knowledgebases/document_parser.py | 48 ++++++++++++++----- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/management/server/services/knowledgebases/document_parser.py b/management/server/services/knowledgebases/document_parser.py index ddb1ddd..3b92a1a 100644 --- a/management/server/services/knowledgebases/document_parser.py +++ b/management/server/services/knowledgebases/document_parser.py @@ -123,12 +123,17 @@ def _update_kb_chunk_count(kb_id, count_delta): def _create_task_record(doc_id, chunk_ids_list): - """创建task记录""" + """创建task记录,兼容无 priority 字段的新表结构""" conn = None cursor = None try: conn = get_db_connection() cursor = conn.cursor() + + # 检查 task 表中是否有 priority 字段 + cursor.execute("SHOW COLUMNS FROM task LIKE 'priority'") + has_priority = cursor.fetchone() is not None + task_id = generate_uuid() current_datetime = datetime.now() current_timestamp = int(current_datetime.timestamp() * 1000) @@ -136,17 +141,38 @@ def _create_task_record(doc_id, chunk_ids_list): digest = f"{doc_id}_{0}_{1}" # 假设 from_page=0, to_page=1 chunk_ids_str = " ".join(chunk_ids_list) - task_insert = """ - INSERT INTO task ( - id, create_time, create_date, update_time, update_date, - doc_id, from_page, to_page, begin_at, process_duation, - progress, progress_msg, retry_count, digest, chunk_ids, task_type, priority - ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - """ - task_params = [task_id, current_timestamp, current_date_only, current_timestamp, current_date_only, doc_id, 0, 1, None, 0.0, 1.0, "MinerU解析完成", 1, digest, chunk_ids_str, "", 0] - cursor.execute(task_insert, task_params) + common_fields = [ + "id", + "create_time", + "create_date", + "update_time", + "update_date", + "doc_id", + "from_page", + "to_page", + "begin_at", + "process_duation", + "progress", + "progress_msg", + "retry_count", + "digest", + "chunk_ids", + "task_type", + ] + common_values = [task_id, current_timestamp, current_date_only, current_timestamp, current_date_only, doc_id, 0, 1, None, 0.0, 1.0, "MinerU解析完成", 1, digest, chunk_ids_str, ""] + + if has_priority: + common_fields.append("priority") + common_values.append(0) + + fields_sql = ", ".join(common_fields) + placeholders = ", ".join(["%s"] * len(common_values)) + + task_insert = f"INSERT INTO task ({fields_sql}) VALUES ({placeholders})" + cursor.execute(task_insert, common_values) conn.commit() print(f"[Parser-INFO] Task记录创建成功,Task ID: {task_id}") + except Exception as e: print(f"[Parser-ERROR] 创建Task记录失败: {e}") finally: @@ -284,7 +310,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info): update_progress(0.3, "分析PDF类型") is_ocr = ds.classify() == SupportedPdfParseMethod.OCR mode_msg = "OCR模式" if is_ocr else "文本模式" - update_progress(0.4, f"使用{mode_msg}处理PDF,处理中,具体进度可查看日志") + update_progress(0.4, f"使用{mode_msg}处理PDF,处理中,具体进度可查看容器日志") infer_result = ds.apply(doc_analyze, ocr=is_ocr)