fix:适配新版本task的表结构,解决Unknown column 'priority'in 'field list' 报错信息
This commit is contained in:
parent
1fbb0d58c0
commit
8cfc4bba4c
|
@ -123,12 +123,17 @@ def _update_kb_chunk_count(kb_id, count_delta):
|
||||||
|
|
||||||
|
|
||||||
def _create_task_record(doc_id, chunk_ids_list):
|
def _create_task_record(doc_id, chunk_ids_list):
|
||||||
"""创建task记录"""
|
"""创建task记录,兼容无 priority 字段的新表结构"""
|
||||||
conn = None
|
conn = None
|
||||||
cursor = None
|
cursor = None
|
||||||
try:
|
try:
|
||||||
conn = get_db_connection()
|
conn = get_db_connection()
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# 检查 task 表中是否有 priority 字段
|
||||||
|
cursor.execute("SHOW COLUMNS FROM task LIKE 'priority'")
|
||||||
|
has_priority = cursor.fetchone() is not None
|
||||||
|
|
||||||
task_id = generate_uuid()
|
task_id = generate_uuid()
|
||||||
current_datetime = datetime.now()
|
current_datetime = datetime.now()
|
||||||
current_timestamp = int(current_datetime.timestamp() * 1000)
|
current_timestamp = int(current_datetime.timestamp() * 1000)
|
||||||
|
@ -136,17 +141,38 @@ def _create_task_record(doc_id, chunk_ids_list):
|
||||||
digest = f"{doc_id}_{0}_{1}" # 假设 from_page=0, to_page=1
|
digest = f"{doc_id}_{0}_{1}" # 假设 from_page=0, to_page=1
|
||||||
chunk_ids_str = " ".join(chunk_ids_list)
|
chunk_ids_str = " ".join(chunk_ids_list)
|
||||||
|
|
||||||
task_insert = """
|
common_fields = [
|
||||||
INSERT INTO task (
|
"id",
|
||||||
id, create_time, create_date, update_time, update_date,
|
"create_time",
|
||||||
doc_id, from_page, to_page, begin_at, process_duation,
|
"create_date",
|
||||||
progress, progress_msg, retry_count, digest, chunk_ids, task_type, priority
|
"update_time",
|
||||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
"update_date",
|
||||||
"""
|
"doc_id",
|
||||||
task_params = [task_id, current_timestamp, current_date_only, current_timestamp, current_date_only, doc_id, 0, 1, None, 0.0, 1.0, "MinerU解析完成", 1, digest, chunk_ids_str, "", 0]
|
"from_page",
|
||||||
cursor.execute(task_insert, task_params)
|
"to_page",
|
||||||
|
"begin_at",
|
||||||
|
"process_duation",
|
||||||
|
"progress",
|
||||||
|
"progress_msg",
|
||||||
|
"retry_count",
|
||||||
|
"digest",
|
||||||
|
"chunk_ids",
|
||||||
|
"task_type",
|
||||||
|
]
|
||||||
|
common_values = [task_id, current_timestamp, current_date_only, current_timestamp, current_date_only, doc_id, 0, 1, None, 0.0, 1.0, "MinerU解析完成", 1, digest, chunk_ids_str, ""]
|
||||||
|
|
||||||
|
if has_priority:
|
||||||
|
common_fields.append("priority")
|
||||||
|
common_values.append(0)
|
||||||
|
|
||||||
|
fields_sql = ", ".join(common_fields)
|
||||||
|
placeholders = ", ".join(["%s"] * len(common_values))
|
||||||
|
|
||||||
|
task_insert = f"INSERT INTO task ({fields_sql}) VALUES ({placeholders})"
|
||||||
|
cursor.execute(task_insert, common_values)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
print(f"[Parser-INFO] Task记录创建成功,Task ID: {task_id}")
|
print(f"[Parser-INFO] Task记录创建成功,Task ID: {task_id}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[Parser-ERROR] 创建Task记录失败: {e}")
|
print(f"[Parser-ERROR] 创建Task记录失败: {e}")
|
||||||
finally:
|
finally:
|
||||||
|
@ -284,7 +310,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info):
|
||||||
update_progress(0.3, "分析PDF类型")
|
update_progress(0.3, "分析PDF类型")
|
||||||
is_ocr = ds.classify() == SupportedPdfParseMethod.OCR
|
is_ocr = ds.classify() == SupportedPdfParseMethod.OCR
|
||||||
mode_msg = "OCR模式" if is_ocr else "文本模式"
|
mode_msg = "OCR模式" if is_ocr else "文本模式"
|
||||||
update_progress(0.4, f"使用{mode_msg}处理PDF,处理中,具体进度可查看日志")
|
update_progress(0.4, f"使用{mode_msg}处理PDF,处理中,具体进度可查看容器日志")
|
||||||
|
|
||||||
infer_result = ds.apply(doc_analyze, ocr=is_ocr)
|
infer_result = ds.apply(doc_analyze, ocr=is_ocr)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue