fix: 移除文档时删除es中的记录,修复移除文档后知识残留的问题
This commit is contained in:
parent
0b1126b1c8
commit
d99c9e5c42
|
@ -400,14 +400,6 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info):
|
||||||
chunk_id = generate_uuid()
|
chunk_id = generate_uuid()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 上传文本块到 MinIO
|
|
||||||
minio_client.put_object(
|
|
||||||
bucket_name=output_bucket,
|
|
||||||
object_name=chunk_id,
|
|
||||||
data=BytesIO(content.encode("utf-8", errors="replace")),
|
|
||||||
length=len(content.encode("utf-8", errors="replace")),
|
|
||||||
)
|
|
||||||
|
|
||||||
# 准备ES文档
|
# 准备ES文档
|
||||||
current_time_es = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
current_time_es = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
current_timestamp_es = datetime.now().timestamp()
|
current_timestamp_es = datetime.now().timestamp()
|
||||||
|
@ -513,7 +505,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info):
|
||||||
|
|
||||||
# 如果找到了最近的图片,则更新文本块的img_id
|
# 如果找到了最近的图片,则更新文本块的img_id
|
||||||
if nearest_image:
|
if nearest_image:
|
||||||
# v0.4.1更新,改成存储提取其相对路径部分
|
# 存储相对路径部分
|
||||||
parsed_url = urlparse(nearest_image["url"])
|
parsed_url = urlparse(nearest_image["url"])
|
||||||
relative_path = parsed_url.path.lstrip("/") # 去掉开头的斜杠
|
relative_path = parsed_url.path.lstrip("/") # 去掉开头的斜杠
|
||||||
# 更新ES中的文档
|
# 更新ES中的文档
|
||||||
|
|
|
@ -6,7 +6,7 @@ from datetime import datetime
|
||||||
|
|
||||||
import mysql.connector
|
import mysql.connector
|
||||||
import requests
|
import requests
|
||||||
from database import DB_CONFIG
|
from database import DB_CONFIG, get_es_client
|
||||||
from utils import generate_uuid
|
from utils import generate_uuid
|
||||||
|
|
||||||
# 解析相关模块
|
# 解析相关模块
|
||||||
|
@ -700,17 +700,25 @@ class KnowledgebaseService:
|
||||||
"""删除文档"""
|
"""删除文档"""
|
||||||
try:
|
try:
|
||||||
conn = cls._get_db_connection()
|
conn = cls._get_db_connection()
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor(dictionary=True)
|
||||||
|
|
||||||
# 先检查文档是否存在
|
# 先检查文档是否存在
|
||||||
check_query = "SELECT kb_id FROM document WHERE id = %s"
|
check_query = """
|
||||||
|
SELECT
|
||||||
|
d.kb_id,
|
||||||
|
kb.created_by AS tenant_id -- 获取 tenant_id (knowledgebase的创建者)
|
||||||
|
FROM document d
|
||||||
|
JOIN knowledgebase kb ON d.kb_id = kb.id -- JOIN knowledgebase 表
|
||||||
|
WHERE d.id = %s
|
||||||
|
"""
|
||||||
cursor.execute(check_query, (doc_id,))
|
cursor.execute(check_query, (doc_id,))
|
||||||
result = cursor.fetchone()
|
doc_data = cursor.fetchone()
|
||||||
|
|
||||||
if not result:
|
if not doc_data:
|
||||||
raise Exception("文档不存在")
|
print(f"[INFO] 文档 {doc_id} 在数据库中未找到。")
|
||||||
|
return False
|
||||||
|
|
||||||
kb_id = result[0]
|
kb_id = doc_data["kb_id"]
|
||||||
|
|
||||||
# 删除文件到文档的映射
|
# 删除文件到文档的映射
|
||||||
f2d_query = "DELETE FROM file2document WHERE document_id = %s"
|
f2d_query = "DELETE FROM file2document WHERE document_id = %s"
|
||||||
|
@ -734,6 +742,28 @@ class KnowledgebaseService:
|
||||||
cursor.close()
|
cursor.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
es_client = get_es_client()
|
||||||
|
tenant_id_for_cleanup = doc_data["tenant_id"]
|
||||||
|
|
||||||
|
# 删除 Elasticsearch 中的相关文档块
|
||||||
|
if es_client and tenant_id_for_cleanup:
|
||||||
|
es_index_name = f"ragflow_{tenant_id_for_cleanup}"
|
||||||
|
try:
|
||||||
|
if es_client.indices.exists(index=es_index_name):
|
||||||
|
query_body = {"query": {"term": {"doc_id": doc_id}}}
|
||||||
|
resp = es_client.delete_by_query(
|
||||||
|
index=es_index_name,
|
||||||
|
body=query_body,
|
||||||
|
refresh=True, # 确保立即生效
|
||||||
|
ignore_unavailable=True, # 如果索引在此期间被删除
|
||||||
|
)
|
||||||
|
deleted_count = resp.get("deleted", 0)
|
||||||
|
print(f"[ES-SUCCESS] 从索引 {es_index_name} 中删除 {deleted_count} 个与 doc_id {doc_id} 相关的块。")
|
||||||
|
else:
|
||||||
|
print(f"[ES-INFO] 索引 {es_index_name} 不存在,跳过 ES 清理 for doc_id {doc_id}。")
|
||||||
|
except Exception as es_err:
|
||||||
|
print(f"[ES-ERROR] 清理 ES 块 for doc_id {doc_id} (index {es_index_name}) 失败: {str(es_err)}")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Reference in New Issue