400 lines
12 KiB
Python
400 lines
12 KiB
Python
|
import os
|
|||
|
import mysql.connector
|
|||
|
from io import BytesIO
|
|||
|
from minio import Minio
|
|||
|
from dotenv import load_dotenv
|
|||
|
|
|||
|
# 加载环境变量
|
|||
|
load_dotenv("../../docker/.env")
|
|||
|
|
|||
|
# 数据库连接配置
|
|||
|
DB_CONFIG = {
|
|||
|
"host": "localhost", # 如果在Docker外运行,使用localhost
|
|||
|
"port": int(os.getenv("MYSQL_PORT", "5455")),
|
|||
|
"user": "root",
|
|||
|
"password": os.getenv("MYSQL_PASSWORD", "infini_rag_flow"),
|
|||
|
"database": "rag_flow"
|
|||
|
}
|
|||
|
|
|||
|
# MinIO连接配置
|
|||
|
MINIO_CONFIG = {
|
|||
|
"endpoint": "localhost:" + os.getenv("MINIO_PORT", "9000"),
|
|||
|
"access_key": os.getenv("MINIO_USER", "rag_flow"),
|
|||
|
"secret_key": os.getenv("MINIO_PASSWORD", "infini_rag_flow"),
|
|||
|
"secure": False
|
|||
|
}
|
|||
|
|
|||
|
def get_minio_client():
|
|||
|
"""创建MinIO客户端"""
|
|||
|
return Minio(
|
|||
|
endpoint=MINIO_CONFIG["endpoint"],
|
|||
|
access_key=MINIO_CONFIG["access_key"],
|
|||
|
secret_key=MINIO_CONFIG["secret_key"],
|
|||
|
secure=MINIO_CONFIG["secure"]
|
|||
|
)
|
|||
|
|
|||
|
def get_db_connection():
|
|||
|
"""创建数据库连接"""
|
|||
|
return mysql.connector.connect(**DB_CONFIG)
|
|||
|
|
|||
|
def get_files_list(current_page, page_size, name_filter=""):
|
|||
|
"""
|
|||
|
获取文件列表
|
|||
|
|
|||
|
Args:
|
|||
|
current_page: 当前页码
|
|||
|
page_size: 每页大小
|
|||
|
name_filter: 文件名过滤条件
|
|||
|
|
|||
|
Returns:
|
|||
|
tuple: (文件列表, 总数)
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 计算偏移量
|
|||
|
offset = (current_page - 1) * page_size
|
|||
|
|
|||
|
# 连接数据库
|
|||
|
conn = get_db_connection()
|
|||
|
cursor = conn.cursor(dictionary=True)
|
|||
|
|
|||
|
# 构建查询条件
|
|||
|
where_clause = ""
|
|||
|
params = []
|
|||
|
|
|||
|
if name_filter:
|
|||
|
where_clause = "WHERE d.name LIKE %s"
|
|||
|
params.append(f"%{name_filter}%")
|
|||
|
|
|||
|
# 查询总数
|
|||
|
count_query = f"""
|
|||
|
SELECT COUNT(*) as total
|
|||
|
FROM document d
|
|||
|
{where_clause}
|
|||
|
"""
|
|||
|
cursor.execute(count_query, params)
|
|||
|
total = cursor.fetchone()['total']
|
|||
|
|
|||
|
# 查询文件列表
|
|||
|
query = f"""
|
|||
|
SELECT d.id, d.name, d.kb_id, d.location, d.size, d.type, d.create_time
|
|||
|
FROM document d
|
|||
|
{where_clause}
|
|||
|
ORDER BY d.create_time DESC
|
|||
|
LIMIT %s OFFSET %s
|
|||
|
"""
|
|||
|
cursor.execute(query, params + [page_size, offset])
|
|||
|
documents = cursor.fetchall()
|
|||
|
|
|||
|
# 获取文档与文件的关联信息
|
|||
|
doc_ids = [doc['id'] for doc in documents]
|
|||
|
file_mappings = {}
|
|||
|
|
|||
|
if doc_ids:
|
|||
|
placeholders = ', '.join(['%s'] * len(doc_ids))
|
|||
|
cursor.execute(f"""
|
|||
|
SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type
|
|||
|
FROM file2document f2d
|
|||
|
JOIN file f ON f2d.file_id = f.id
|
|||
|
WHERE f2d.document_id IN ({placeholders})
|
|||
|
""", doc_ids)
|
|||
|
|
|||
|
for row in cursor.fetchall():
|
|||
|
file_mappings[row['document_id']] = {
|
|||
|
'file_id': row['file_id'],
|
|||
|
'parent_id': row['parent_id'],
|
|||
|
'source_type': row['source_type']
|
|||
|
}
|
|||
|
|
|||
|
# 整合信息
|
|||
|
result = []
|
|||
|
for doc in documents:
|
|||
|
doc_id = doc['id']
|
|||
|
kb_id = doc['kb_id']
|
|||
|
location = doc['location']
|
|||
|
|
|||
|
# 确定存储位置
|
|||
|
storage_bucket = kb_id
|
|||
|
storage_location = location
|
|||
|
|
|||
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|||
|
if doc_ids and doc_id in file_mappings:
|
|||
|
file_info = file_mappings[doc_id]
|
|||
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|||
|
if file_info.get('source_type') is None or file_info.get('source_type') == 0: # LOCAL
|
|||
|
storage_bucket = file_info['parent_id']
|
|||
|
|
|||
|
# 构建结果字典
|
|||
|
result_item = {
|
|||
|
'id': doc_id,
|
|||
|
'name': doc.get('name', ''),
|
|||
|
'kb_id': kb_id,
|
|||
|
'size': doc.get('size', 0),
|
|||
|
'type': doc.get('type', ''),
|
|||
|
'location': location,
|
|||
|
'create_time': doc.get('create_time', 0)
|
|||
|
}
|
|||
|
|
|||
|
result.append(result_item)
|
|||
|
|
|||
|
cursor.close()
|
|||
|
conn.close()
|
|||
|
|
|||
|
return result, total
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
raise e
|
|||
|
|
|||
|
def get_file_info(file_id):
|
|||
|
"""
|
|||
|
获取文件信息
|
|||
|
|
|||
|
Args:
|
|||
|
file_id: 文件ID
|
|||
|
|
|||
|
Returns:
|
|||
|
tuple: (文档信息, 文件映射信息, 存储桶, 存储位置)
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 连接数据库
|
|||
|
conn = get_db_connection()
|
|||
|
cursor = conn.cursor(dictionary=True)
|
|||
|
|
|||
|
# 查询文档信息
|
|||
|
cursor.execute("""
|
|||
|
SELECT d.id, d.name, d.kb_id, d.location, d.type
|
|||
|
FROM document d
|
|||
|
WHERE d.id = %s
|
|||
|
""", (file_id,))
|
|||
|
|
|||
|
document = cursor.fetchone()
|
|||
|
if not document:
|
|||
|
cursor.close()
|
|||
|
conn.close()
|
|||
|
return None, None, None, None
|
|||
|
|
|||
|
# 获取文档与文件的关联信息
|
|||
|
cursor.execute("""
|
|||
|
SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type
|
|||
|
FROM file2document f2d
|
|||
|
JOIN file f ON f2d.file_id = f.id
|
|||
|
WHERE f2d.document_id = %s
|
|||
|
""", (file_id,))
|
|||
|
|
|||
|
file_mapping = cursor.fetchone()
|
|||
|
|
|||
|
# 确定存储位置
|
|||
|
storage_bucket = document['kb_id']
|
|||
|
storage_location = document['location']
|
|||
|
|
|||
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|||
|
if file_mapping:
|
|||
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|||
|
if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL
|
|||
|
storage_bucket = file_mapping['parent_id']
|
|||
|
|
|||
|
cursor.close()
|
|||
|
conn.close()
|
|||
|
|
|||
|
return document, file_mapping, storage_bucket, storage_location
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
raise e
|
|||
|
|
|||
|
def download_file_from_minio(storage_bucket, storage_location):
|
|||
|
"""
|
|||
|
从MinIO下载文件
|
|||
|
|
|||
|
Args:
|
|||
|
storage_bucket: 存储桶
|
|||
|
storage_location: 存储位置
|
|||
|
|
|||
|
Returns:
|
|||
|
bytes: 文件数据
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 从MinIO下载文件
|
|||
|
minio_client = get_minio_client()
|
|||
|
|
|||
|
# 检查bucket是否存在
|
|||
|
if not minio_client.bucket_exists(storage_bucket):
|
|||
|
raise Exception(f"存储桶 {storage_bucket} 不存在")
|
|||
|
|
|||
|
# 下载文件
|
|||
|
response = minio_client.get_object(storage_bucket, storage_location)
|
|||
|
file_data = response.read()
|
|||
|
|
|||
|
return file_data
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
raise e
|
|||
|
|
|||
|
def delete_file(file_id):
|
|||
|
"""
|
|||
|
删除文件
|
|||
|
|
|||
|
Args:
|
|||
|
file_id: 文件ID
|
|||
|
|
|||
|
Returns:
|
|||
|
bool: 是否删除成功
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 获取文件信息
|
|||
|
document, file_mapping, storage_bucket, storage_location = get_file_info(file_id)
|
|||
|
|
|||
|
if not document:
|
|||
|
return False
|
|||
|
|
|||
|
# 连接数据库
|
|||
|
conn = get_db_connection()
|
|||
|
cursor = conn.cursor(dictionary=True)
|
|||
|
|
|||
|
# 如果有文件映射,获取文件ID
|
|||
|
file_id_to_delete = None
|
|||
|
if file_mapping:
|
|||
|
file_id_to_delete = file_mapping['file_id']
|
|||
|
|
|||
|
# 开始事务
|
|||
|
conn.start_transaction()
|
|||
|
|
|||
|
try:
|
|||
|
# 1. 删除document表中的记录
|
|||
|
cursor.execute("DELETE FROM document WHERE id = %s", (file_id,))
|
|||
|
|
|||
|
# 2. 如果有关联的file2document记录,删除它
|
|||
|
if file_mapping:
|
|||
|
cursor.execute("DELETE FROM file2document WHERE document_id = %s", (file_id,))
|
|||
|
|
|||
|
# 3. 如果有关联的file记录,删除它
|
|||
|
if file_id_to_delete:
|
|||
|
cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,))
|
|||
|
|
|||
|
# 提交事务
|
|||
|
conn.commit()
|
|||
|
|
|||
|
# 从MinIO删除文件
|
|||
|
try:
|
|||
|
minio_client = get_minio_client()
|
|||
|
|
|||
|
# 检查bucket是否存在
|
|||
|
if minio_client.bucket_exists(storage_bucket):
|
|||
|
# 删除文件
|
|||
|
minio_client.remove_object(storage_bucket, storage_location)
|
|||
|
except Exception as e:
|
|||
|
# 即使MinIO删除失败,也不影响数据库操作的成功
|
|||
|
print(f"从MinIO删除文件失败: {str(e)}")
|
|||
|
|
|||
|
return True
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
# 回滚事务
|
|||
|
conn.rollback()
|
|||
|
raise e
|
|||
|
|
|||
|
finally:
|
|||
|
cursor.close()
|
|||
|
conn.close()
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
raise e
|
|||
|
|
|||
|
def batch_delete_files(file_ids):
|
|||
|
"""
|
|||
|
批量删除文件
|
|||
|
|
|||
|
Args:
|
|||
|
file_ids: 文件ID列表
|
|||
|
|
|||
|
Returns:
|
|||
|
int: 成功删除的文件数量
|
|||
|
"""
|
|||
|
if not file_ids:
|
|||
|
return 0
|
|||
|
|
|||
|
try:
|
|||
|
# 连接数据库
|
|||
|
conn = get_db_connection()
|
|||
|
cursor = conn.cursor(dictionary=True)
|
|||
|
|
|||
|
# 创建MinIO客户端
|
|||
|
minio_client = get_minio_client()
|
|||
|
|
|||
|
# 开始事务
|
|||
|
conn.start_transaction()
|
|||
|
|
|||
|
try:
|
|||
|
success_count = 0
|
|||
|
|
|||
|
for file_id in file_ids:
|
|||
|
# 查询文档信息
|
|||
|
cursor.execute("""
|
|||
|
SELECT d.id, d.kb_id, d.location
|
|||
|
FROM document d
|
|||
|
WHERE d.id = %s
|
|||
|
""", (file_id,))
|
|||
|
|
|||
|
document = cursor.fetchone()
|
|||
|
if not document:
|
|||
|
continue
|
|||
|
|
|||
|
# 获取文档与文件的关联信息
|
|||
|
cursor.execute("""
|
|||
|
SELECT f2d.id as f2d_id, f2d.document_id, f2d.file_id, f.parent_id, f.source_type
|
|||
|
FROM file2document f2d
|
|||
|
JOIN file f ON f2d.file_id = f.id
|
|||
|
WHERE f2d.document_id = %s
|
|||
|
""", (file_id,))
|
|||
|
|
|||
|
file_mapping = cursor.fetchone()
|
|||
|
|
|||
|
# 确定存储位置
|
|||
|
storage_bucket = document['kb_id']
|
|||
|
storage_location = document['location']
|
|||
|
|
|||
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|||
|
file_id_to_delete = None
|
|||
|
if file_mapping:
|
|||
|
file_id_to_delete = file_mapping['file_id']
|
|||
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|||
|
if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL
|
|||
|
storage_bucket = file_mapping['parent_id']
|
|||
|
|
|||
|
# 1. 删除document表中的记录
|
|||
|
cursor.execute("DELETE FROM document WHERE id = %s", (file_id,))
|
|||
|
|
|||
|
# 2. 如果有关联的file2document记录,删除它
|
|||
|
if file_mapping:
|
|||
|
cursor.execute("DELETE FROM file2document WHERE id = %s", (file_mapping['f2d_id'],))
|
|||
|
|
|||
|
# 3. 如果有关联的file记录,删除它
|
|||
|
if file_id_to_delete:
|
|||
|
cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,))
|
|||
|
|
|||
|
# 从MinIO删除文件
|
|||
|
try:
|
|||
|
# 检查bucket是否存在
|
|||
|
if minio_client.bucket_exists(storage_bucket):
|
|||
|
# 删除文件
|
|||
|
minio_client.remove_object(storage_bucket, storage_location)
|
|||
|
except Exception as e:
|
|||
|
# 即使MinIO删除失败,也不影响数据库操作的成功
|
|||
|
print(f"从MinIO删除文件失败: {str(e)}")
|
|||
|
|
|||
|
success_count += 1
|
|||
|
|
|||
|
# 提交事务
|
|||
|
conn.commit()
|
|||
|
|
|||
|
return success_count
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
# 回滚事务
|
|||
|
conn.rollback()
|
|||
|
raise e
|
|||
|
|
|||
|
finally:
|
|||
|
cursor.close()
|
|||
|
conn.close()
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
raise e
|