2025-04-11 16:33:28 +08:00
|
|
|
|
import os
|
|
|
|
|
import mysql.connector
|
2025-04-12 00:42:19 +08:00
|
|
|
|
import re
|
2025-04-11 16:33:28 +08:00
|
|
|
|
from io import BytesIO
|
|
|
|
|
from minio import Minio
|
|
|
|
|
from dotenv import load_dotenv
|
2025-04-12 00:42:19 +08:00
|
|
|
|
from werkzeug.utils import secure_filename
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from .utils import FileType, FileSource, StatusEnum, get_uuid
|
|
|
|
|
from .document_service import DocumentService
|
|
|
|
|
from .file_service import FileService
|
|
|
|
|
from .file2document_service import File2DocumentService
|
2025-04-12 16:40:35 +08:00
|
|
|
|
from database import DB_CONFIG, MINIO_CONFIG
|
2025-04-11 16:33:28 +08:00
|
|
|
|
|
|
|
|
|
# 加载环境变量
|
|
|
|
|
load_dotenv("../../docker/.env")
|
|
|
|
|
|
2025-04-12 00:42:19 +08:00
|
|
|
|
UPLOAD_FOLDER = '/data/uploads'
|
|
|
|
|
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'jpg', 'jpeg', 'png', 'txt', 'md'}
|
|
|
|
|
|
|
|
|
|
def allowed_file(filename):
|
|
|
|
|
"""Check if the file extension is allowed"""
|
|
|
|
|
return '.' in filename and \
|
|
|
|
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
|
|
|
|
def filename_type(filename):
|
|
|
|
|
"""根据文件名确定文件类型"""
|
|
|
|
|
ext = os.path.splitext(filename)[1].lower()
|
|
|
|
|
|
|
|
|
|
if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']:
|
|
|
|
|
return FileType.VISUAL.value
|
|
|
|
|
elif ext in ['.pdf']:
|
|
|
|
|
return FileType.PDF.value
|
|
|
|
|
elif ext in ['.doc', '.docx']:
|
|
|
|
|
return FileType.WORD.value
|
|
|
|
|
elif ext in ['.xls', '.xlsx']:
|
|
|
|
|
return FileType.EXCEL.value
|
|
|
|
|
elif ext in ['.ppt', '.pptx']:
|
|
|
|
|
return FileType.PPT.value
|
|
|
|
|
elif ext in ['.txt', '.md']: # 添加对 txt 和 md 文件的支持
|
|
|
|
|
return FileType.TEXT.value
|
|
|
|
|
|
|
|
|
|
return FileType.OTHER.value
|
|
|
|
|
|
2025-04-11 16:33:28 +08:00
|
|
|
|
def get_minio_client():
|
|
|
|
|
"""创建MinIO客户端"""
|
|
|
|
|
return Minio(
|
|
|
|
|
endpoint=MINIO_CONFIG["endpoint"],
|
|
|
|
|
access_key=MINIO_CONFIG["access_key"],
|
|
|
|
|
secret_key=MINIO_CONFIG["secret_key"],
|
|
|
|
|
secure=MINIO_CONFIG["secure"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def get_db_connection():
|
|
|
|
|
"""创建数据库连接"""
|
|
|
|
|
return mysql.connector.connect(**DB_CONFIG)
|
|
|
|
|
|
|
|
|
|
def get_files_list(current_page, page_size, name_filter=""):
|
|
|
|
|
"""
|
|
|
|
|
获取文件列表
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
current_page: 当前页码
|
|
|
|
|
page_size: 每页大小
|
|
|
|
|
name_filter: 文件名过滤条件
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
tuple: (文件列表, 总数)
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 计算偏移量
|
|
|
|
|
offset = (current_page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# 连接数据库
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 构建查询条件
|
|
|
|
|
where_clause = ""
|
|
|
|
|
params = []
|
|
|
|
|
|
|
|
|
|
if name_filter:
|
|
|
|
|
where_clause = "WHERE d.name LIKE %s"
|
|
|
|
|
params.append(f"%{name_filter}%")
|
|
|
|
|
|
|
|
|
|
# 查询总数
|
|
|
|
|
count_query = f"""
|
|
|
|
|
SELECT COUNT(*) as total
|
|
|
|
|
FROM document d
|
|
|
|
|
{where_clause}
|
|
|
|
|
"""
|
|
|
|
|
cursor.execute(count_query, params)
|
|
|
|
|
total = cursor.fetchone()['total']
|
|
|
|
|
|
|
|
|
|
# 查询文件列表
|
|
|
|
|
query = f"""
|
|
|
|
|
SELECT d.id, d.name, d.kb_id, d.location, d.size, d.type, d.create_time
|
|
|
|
|
FROM document d
|
|
|
|
|
{where_clause}
|
|
|
|
|
ORDER BY d.create_time DESC
|
|
|
|
|
LIMIT %s OFFSET %s
|
|
|
|
|
"""
|
|
|
|
|
cursor.execute(query, params + [page_size, offset])
|
|
|
|
|
documents = cursor.fetchall()
|
|
|
|
|
|
|
|
|
|
# 获取文档与文件的关联信息
|
|
|
|
|
doc_ids = [doc['id'] for doc in documents]
|
|
|
|
|
file_mappings = {}
|
|
|
|
|
|
|
|
|
|
if doc_ids:
|
|
|
|
|
placeholders = ', '.join(['%s'] * len(doc_ids))
|
|
|
|
|
cursor.execute(f"""
|
|
|
|
|
SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type
|
|
|
|
|
FROM file2document f2d
|
|
|
|
|
JOIN file f ON f2d.file_id = f.id
|
|
|
|
|
WHERE f2d.document_id IN ({placeholders})
|
|
|
|
|
""", doc_ids)
|
|
|
|
|
|
|
|
|
|
for row in cursor.fetchall():
|
|
|
|
|
file_mappings[row['document_id']] = {
|
|
|
|
|
'file_id': row['file_id'],
|
|
|
|
|
'parent_id': row['parent_id'],
|
|
|
|
|
'source_type': row['source_type']
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 整合信息
|
|
|
|
|
result = []
|
|
|
|
|
for doc in documents:
|
|
|
|
|
doc_id = doc['id']
|
|
|
|
|
kb_id = doc['kb_id']
|
|
|
|
|
location = doc['location']
|
|
|
|
|
|
|
|
|
|
# 确定存储位置
|
|
|
|
|
storage_bucket = kb_id
|
|
|
|
|
storage_location = location
|
|
|
|
|
|
|
|
|
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|
|
|
|
if doc_ids and doc_id in file_mappings:
|
|
|
|
|
file_info = file_mappings[doc_id]
|
|
|
|
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|
|
|
|
if file_info.get('source_type') is None or file_info.get('source_type') == 0: # LOCAL
|
|
|
|
|
storage_bucket = file_info['parent_id']
|
|
|
|
|
|
|
|
|
|
# 构建结果字典
|
|
|
|
|
result_item = {
|
|
|
|
|
'id': doc_id,
|
|
|
|
|
'name': doc.get('name', ''),
|
|
|
|
|
'kb_id': kb_id,
|
|
|
|
|
'size': doc.get('size', 0),
|
|
|
|
|
'type': doc.get('type', ''),
|
|
|
|
|
'location': location,
|
|
|
|
|
'create_time': doc.get('create_time', 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
result.append(result_item)
|
|
|
|
|
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
return result, total
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
def get_file_info(file_id):
|
|
|
|
|
"""
|
|
|
|
|
获取文件信息
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
file_id: 文件ID
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
tuple: (文档信息, 文件映射信息, 存储桶, 存储位置)
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 连接数据库
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 查询文档信息
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
SELECT d.id, d.name, d.kb_id, d.location, d.type
|
|
|
|
|
FROM document d
|
|
|
|
|
WHERE d.id = %s
|
|
|
|
|
""", (file_id,))
|
|
|
|
|
|
|
|
|
|
document = cursor.fetchone()
|
|
|
|
|
if not document:
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
return None, None, None, None
|
|
|
|
|
|
|
|
|
|
# 获取文档与文件的关联信息
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
SELECT f2d.document_id, f.id as file_id, f.parent_id, f.source_type
|
|
|
|
|
FROM file2document f2d
|
|
|
|
|
JOIN file f ON f2d.file_id = f.id
|
|
|
|
|
WHERE f2d.document_id = %s
|
|
|
|
|
""", (file_id,))
|
|
|
|
|
|
|
|
|
|
file_mapping = cursor.fetchone()
|
|
|
|
|
|
|
|
|
|
# 确定存储位置
|
|
|
|
|
storage_bucket = document['kb_id']
|
|
|
|
|
storage_location = document['location']
|
|
|
|
|
|
|
|
|
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|
|
|
|
if file_mapping:
|
|
|
|
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|
|
|
|
if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL
|
|
|
|
|
storage_bucket = file_mapping['parent_id']
|
|
|
|
|
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
return document, file_mapping, storage_bucket, storage_location
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
def download_file_from_minio(storage_bucket, storage_location):
|
|
|
|
|
"""
|
|
|
|
|
从MinIO下载文件
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
storage_bucket: 存储桶
|
|
|
|
|
storage_location: 存储位置
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bytes: 文件数据
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 从MinIO下载文件
|
|
|
|
|
minio_client = get_minio_client()
|
|
|
|
|
|
|
|
|
|
# 检查bucket是否存在
|
|
|
|
|
if not minio_client.bucket_exists(storage_bucket):
|
|
|
|
|
raise Exception(f"存储桶 {storage_bucket} 不存在")
|
|
|
|
|
|
|
|
|
|
# 下载文件
|
|
|
|
|
response = minio_client.get_object(storage_bucket, storage_location)
|
|
|
|
|
file_data = response.read()
|
|
|
|
|
|
|
|
|
|
return file_data
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
def delete_file(file_id):
|
|
|
|
|
"""
|
|
|
|
|
删除文件
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
file_id: 文件ID
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bool: 是否删除成功
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 获取文件信息
|
|
|
|
|
document, file_mapping, storage_bucket, storage_location = get_file_info(file_id)
|
|
|
|
|
|
|
|
|
|
if not document:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# 连接数据库
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 如果有文件映射,获取文件ID
|
|
|
|
|
file_id_to_delete = None
|
|
|
|
|
if file_mapping:
|
|
|
|
|
file_id_to_delete = file_mapping['file_id']
|
|
|
|
|
|
|
|
|
|
# 开始事务
|
|
|
|
|
conn.start_transaction()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 1. 删除document表中的记录
|
|
|
|
|
cursor.execute("DELETE FROM document WHERE id = %s", (file_id,))
|
|
|
|
|
|
|
|
|
|
# 2. 如果有关联的file2document记录,删除它
|
|
|
|
|
if file_mapping:
|
|
|
|
|
cursor.execute("DELETE FROM file2document WHERE document_id = %s", (file_id,))
|
|
|
|
|
|
|
|
|
|
# 3. 如果有关联的file记录,删除它
|
|
|
|
|
if file_id_to_delete:
|
|
|
|
|
cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,))
|
|
|
|
|
|
|
|
|
|
# 提交事务
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
# 从MinIO删除文件
|
|
|
|
|
try:
|
|
|
|
|
minio_client = get_minio_client()
|
|
|
|
|
|
|
|
|
|
# 检查bucket是否存在
|
|
|
|
|
if minio_client.bucket_exists(storage_bucket):
|
|
|
|
|
# 删除文件
|
|
|
|
|
minio_client.remove_object(storage_bucket, storage_location)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# 即使MinIO删除失败,也不影响数据库操作的成功
|
|
|
|
|
print(f"从MinIO删除文件失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# 回滚事务
|
|
|
|
|
conn.rollback()
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
def batch_delete_files(file_ids):
|
|
|
|
|
"""
|
|
|
|
|
批量删除文件
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
file_ids: 文件ID列表
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
int: 成功删除的文件数量
|
|
|
|
|
"""
|
|
|
|
|
if not file_ids:
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 连接数据库
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 创建MinIO客户端
|
|
|
|
|
minio_client = get_minio_client()
|
|
|
|
|
|
|
|
|
|
# 开始事务
|
|
|
|
|
conn.start_transaction()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
success_count = 0
|
|
|
|
|
|
|
|
|
|
for file_id in file_ids:
|
|
|
|
|
# 查询文档信息
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
SELECT d.id, d.kb_id, d.location
|
|
|
|
|
FROM document d
|
|
|
|
|
WHERE d.id = %s
|
|
|
|
|
""", (file_id,))
|
|
|
|
|
|
|
|
|
|
document = cursor.fetchone()
|
|
|
|
|
if not document:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 获取文档与文件的关联信息
|
|
|
|
|
cursor.execute("""
|
|
|
|
|
SELECT f2d.id as f2d_id, f2d.document_id, f2d.file_id, f.parent_id, f.source_type
|
|
|
|
|
FROM file2document f2d
|
|
|
|
|
JOIN file f ON f2d.file_id = f.id
|
|
|
|
|
WHERE f2d.document_id = %s
|
|
|
|
|
""", (file_id,))
|
|
|
|
|
|
|
|
|
|
file_mapping = cursor.fetchone()
|
|
|
|
|
|
|
|
|
|
# 确定存储位置
|
|
|
|
|
storage_bucket = document['kb_id']
|
|
|
|
|
storage_location = document['location']
|
|
|
|
|
|
|
|
|
|
# 如果有文件映射,检查是否需要使用文件的parent_id作为bucket
|
|
|
|
|
file_id_to_delete = None
|
|
|
|
|
if file_mapping:
|
|
|
|
|
file_id_to_delete = file_mapping['file_id']
|
|
|
|
|
# 模拟File2DocumentService.get_storage_address的逻辑
|
|
|
|
|
if file_mapping.get('source_type') is None or file_mapping.get('source_type') == 0: # LOCAL
|
|
|
|
|
storage_bucket = file_mapping['parent_id']
|
|
|
|
|
|
|
|
|
|
# 1. 删除document表中的记录
|
|
|
|
|
cursor.execute("DELETE FROM document WHERE id = %s", (file_id,))
|
|
|
|
|
|
|
|
|
|
# 2. 如果有关联的file2document记录,删除它
|
|
|
|
|
if file_mapping:
|
|
|
|
|
cursor.execute("DELETE FROM file2document WHERE id = %s", (file_mapping['f2d_id'],))
|
|
|
|
|
|
|
|
|
|
# 3. 如果有关联的file记录,删除它
|
|
|
|
|
if file_id_to_delete:
|
|
|
|
|
cursor.execute("DELETE FROM file WHERE id = %s", (file_id_to_delete,))
|
|
|
|
|
|
|
|
|
|
# 从MinIO删除文件
|
|
|
|
|
try:
|
|
|
|
|
# 检查bucket是否存在
|
|
|
|
|
if minio_client.bucket_exists(storage_bucket):
|
|
|
|
|
# 删除文件
|
|
|
|
|
minio_client.remove_object(storage_bucket, storage_location)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# 即使MinIO删除失败,也不影响数据库操作的成功
|
|
|
|
|
print(f"从MinIO删除文件失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
success_count += 1
|
|
|
|
|
|
|
|
|
|
# 提交事务
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
return success_count
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
# 回滚事务
|
|
|
|
|
conn.rollback()
|
|
|
|
|
raise e
|
|
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2025-04-12 00:42:19 +08:00
|
|
|
|
raise e
|
|
|
|
|
|
2025-04-12 16:40:35 +08:00
|
|
|
|
def upload_files_to_server(files, kb_id=None, user_id=None, parent_id=None):
|
2025-04-12 00:42:19 +08:00
|
|
|
|
"""处理文件上传到服务器的核心逻辑"""
|
2025-04-12 16:40:35 +08:00
|
|
|
|
if user_id is None:
|
|
|
|
|
try:
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 查询创建时间最早的用户ID
|
|
|
|
|
query_earliest_user = """
|
|
|
|
|
SELECT id FROM user
|
|
|
|
|
WHERE create_time = (SELECT MIN(create_time) FROM user)
|
|
|
|
|
LIMIT 1
|
|
|
|
|
"""
|
|
|
|
|
cursor.execute(query_earliest_user)
|
|
|
|
|
earliest_user = cursor.fetchone()
|
|
|
|
|
|
|
|
|
|
if earliest_user:
|
|
|
|
|
user_id = earliest_user['id']
|
|
|
|
|
print(f"使用创建时间最早的用户ID: {user_id}")
|
|
|
|
|
else:
|
|
|
|
|
user_id = 'system'
|
|
|
|
|
print("未找到用户, 使用默认用户ID: system")
|
|
|
|
|
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"查询最早用户ID失败: {str(e)}")
|
|
|
|
|
user_id = 'system'
|
|
|
|
|
|
|
|
|
|
# 如果没有指定parent_id,则获取用户的根文件夹ID
|
|
|
|
|
if parent_id is None:
|
|
|
|
|
try:
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
cursor = conn.cursor(dictionary=True)
|
|
|
|
|
|
|
|
|
|
# 查询用户的根文件夹
|
|
|
|
|
query_root_folder = """
|
|
|
|
|
SELECT id FROM file
|
|
|
|
|
WHERE tenant_id = %s AND parent_id = id
|
|
|
|
|
LIMIT 1
|
|
|
|
|
"""
|
|
|
|
|
cursor.execute(query_root_folder, (user_id,))
|
|
|
|
|
root_folder = cursor.fetchone()
|
|
|
|
|
|
|
|
|
|
if root_folder:
|
|
|
|
|
parent_id = root_folder['id']
|
|
|
|
|
print(f"使用用户根文件夹ID: {parent_id}")
|
|
|
|
|
else:
|
|
|
|
|
# 如果没有找到根文件夹,创建一个
|
|
|
|
|
root_id = get_uuid()
|
|
|
|
|
# 修改时间格式,包含时分秒
|
|
|
|
|
current_time = int(datetime.now().timestamp())
|
|
|
|
|
current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
|
|
|
|
root_folder = {
|
|
|
|
|
"id": root_id,
|
|
|
|
|
"parent_id": root_id, # 根文件夹的parent_id指向自己
|
|
|
|
|
"tenant_id": user_id,
|
|
|
|
|
"created_by": user_id,
|
|
|
|
|
"name": "/",
|
|
|
|
|
"type": FileType.FOLDER.value,
|
|
|
|
|
"size": 0,
|
|
|
|
|
"location": "",
|
|
|
|
|
"source_type": FileSource.LOCAL.value,
|
|
|
|
|
"create_time": current_time,
|
|
|
|
|
"create_date": current_date,
|
|
|
|
|
"update_time": current_time,
|
|
|
|
|
"update_date": current_date
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FileService.insert(root_folder)
|
|
|
|
|
parent_id = root_id
|
|
|
|
|
print(f"创建并使用新的根文件夹ID: {parent_id}")
|
|
|
|
|
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"查询根文件夹ID失败: {str(e)}")
|
|
|
|
|
# 如果无法获取根文件夹,使用file_bucket_id作为备选
|
|
|
|
|
parent_id = None
|
|
|
|
|
|
2025-04-12 00:42:19 +08:00
|
|
|
|
results = []
|
|
|
|
|
|
|
|
|
|
for file in files:
|
|
|
|
|
if file.filename == '':
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if file and allowed_file(file.filename):
|
|
|
|
|
# 为每个文件生成独立的存储桶名称
|
|
|
|
|
file_bucket_id = FileService.generate_bucket_name()
|
|
|
|
|
original_filename = file.filename
|
|
|
|
|
# 修复文件名处理逻辑,保留中文字符
|
|
|
|
|
name, ext = os.path.splitext(original_filename)
|
|
|
|
|
|
|
|
|
|
# 只替换文件系统不安全的字符,保留中文和其他Unicode字符
|
|
|
|
|
safe_name = re.sub(r'[\\/:*?"<>|]', '_', name)
|
|
|
|
|
|
|
|
|
|
# 如果处理后文件名为空,则使用随机字符串
|
|
|
|
|
if not safe_name or safe_name.strip() == '':
|
|
|
|
|
safe_name = f"file_{get_uuid()[:8]}"
|
|
|
|
|
|
|
|
|
|
filename = safe_name + ext.lower()
|
|
|
|
|
filepath = os.path.join(UPLOAD_FOLDER, filename)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 1. 保存文件到本地临时目录
|
|
|
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
|
|
|
|
file.save(filepath)
|
|
|
|
|
print(f"文件已保存到临时目录: {filepath}")
|
|
|
|
|
print(f"原始文件名: {original_filename}, 处理后文件名: {filename}, 扩展名: {ext[1:]}") # 修改打印信息
|
|
|
|
|
|
|
|
|
|
# 2. 获取文件类型 - 使用修复后的文件名
|
|
|
|
|
filetype = filename_type(filename)
|
|
|
|
|
if filetype == FileType.OTHER.value:
|
|
|
|
|
raise RuntimeError("不支持的文件类型")
|
|
|
|
|
|
|
|
|
|
# 3. 生成唯一存储位置
|
|
|
|
|
minio_client = get_minio_client()
|
|
|
|
|
location = filename
|
|
|
|
|
|
|
|
|
|
# 确保bucket存在(使用文件独立的bucket)
|
|
|
|
|
if not minio_client.bucket_exists(file_bucket_id):
|
|
|
|
|
minio_client.make_bucket(file_bucket_id)
|
|
|
|
|
print(f"创建MinIO存储桶: {file_bucket_id}")
|
|
|
|
|
|
|
|
|
|
# 4. 上传到MinIO(使用文件独立的bucket)
|
|
|
|
|
with open(filepath, 'rb') as file_data:
|
|
|
|
|
minio_client.put_object(
|
|
|
|
|
bucket_name=file_bucket_id,
|
|
|
|
|
object_name=location,
|
|
|
|
|
data=file_data,
|
|
|
|
|
length=os.path.getsize(filepath)
|
|
|
|
|
)
|
|
|
|
|
print(f"文件已上传到MinIO: {file_bucket_id}/{location}")
|
|
|
|
|
|
|
|
|
|
# 5. 创建缩略图(如果是图片/PDF等)
|
|
|
|
|
thumbnail_location = ''
|
|
|
|
|
if filetype in [FileType.VISUAL.value, FileType.PDF.value]:
|
|
|
|
|
try:
|
|
|
|
|
thumbnail_location = f'thumbnail_{get_uuid()}.png'
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"生成缩略图失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
# 6. 创建数据库记录
|
|
|
|
|
doc_id = get_uuid()
|
2025-04-12 16:40:35 +08:00
|
|
|
|
# 修改时间格式,包含时分秒
|
2025-04-12 00:42:19 +08:00
|
|
|
|
current_time = int(datetime.now().timestamp())
|
2025-04-12 16:40:35 +08:00
|
|
|
|
current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
2025-04-12 00:42:19 +08:00
|
|
|
|
|
|
|
|
|
doc = {
|
|
|
|
|
"id": doc_id,
|
|
|
|
|
"kb_id": file_bucket_id, # 使用文件独立的bucket_id
|
|
|
|
|
"parser_id": FileService.get_parser(filetype, filename, ""),
|
|
|
|
|
"parser_config": {"pages": [[1, 1000000]]},
|
|
|
|
|
"source_type": "local",
|
|
|
|
|
"created_by": user_id or 'system',
|
|
|
|
|
"type": filetype,
|
|
|
|
|
"name": filename,
|
|
|
|
|
"location": location,
|
|
|
|
|
"size": os.path.getsize(filepath),
|
|
|
|
|
"thumbnail": thumbnail_location,
|
|
|
|
|
"token_num": 0,
|
|
|
|
|
"chunk_num": 0,
|
|
|
|
|
"progress": 0,
|
|
|
|
|
"progress_msg": "",
|
|
|
|
|
"run": "0",
|
|
|
|
|
"status": StatusEnum.VALID.value,
|
|
|
|
|
"create_time": current_time,
|
|
|
|
|
"create_date": current_date,
|
|
|
|
|
"update_time": current_time,
|
|
|
|
|
"update_date": current_date
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 7. 保存文档记录 (添加事务处理)
|
|
|
|
|
conn = get_db_connection()
|
|
|
|
|
try:
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
DocumentService.insert(doc)
|
|
|
|
|
print(f"文档记录已保存到MySQL: {doc_id}")
|
|
|
|
|
|
|
|
|
|
# 8. 创建文件记录和关联
|
|
|
|
|
file_record = {
|
|
|
|
|
"id": get_uuid(),
|
2025-04-12 16:40:35 +08:00
|
|
|
|
"parent_id": parent_id or file_bucket_id, # 优先使用指定的parent_id
|
2025-04-12 00:42:19 +08:00
|
|
|
|
"tenant_id": user_id or 'system',
|
|
|
|
|
"created_by": user_id or 'system',
|
|
|
|
|
"name": filename,
|
|
|
|
|
"type": filetype,
|
|
|
|
|
"size": doc["size"],
|
|
|
|
|
"location": location,
|
|
|
|
|
"source_type": FileSource.KNOWLEDGEBASE.value,
|
|
|
|
|
"create_time": current_time,
|
|
|
|
|
"create_date": current_date,
|
|
|
|
|
"update_time": current_time,
|
|
|
|
|
"update_date": current_date
|
|
|
|
|
}
|
|
|
|
|
FileService.insert(file_record)
|
|
|
|
|
print(f"文件记录已保存到MySQL: {file_record['id']}")
|
|
|
|
|
|
|
|
|
|
# 9. 创建文件-文档关联
|
|
|
|
|
File2DocumentService.insert({
|
|
|
|
|
"id": get_uuid(),
|
|
|
|
|
"file_id": file_record["id"],
|
|
|
|
|
"document_id": doc_id,
|
|
|
|
|
"create_time": current_time,
|
|
|
|
|
"create_date": current_date,
|
|
|
|
|
"update_time": current_time,
|
|
|
|
|
"update_date": current_date
|
|
|
|
|
})
|
|
|
|
|
print(f"关联记录已保存到MySQL: {file_record['id']} -> {doc_id}")
|
|
|
|
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
results.append({
|
|
|
|
|
'id': doc_id,
|
|
|
|
|
'name': filename,
|
|
|
|
|
'size': doc["size"],
|
|
|
|
|
'type': filetype,
|
|
|
|
|
'status': 'success'
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
conn.rollback()
|
|
|
|
|
print(f"数据库操作失败: {str(e)}")
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
|
|
|
|
cursor.close()
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
results.append({
|
|
|
|
|
'name': filename,
|
|
|
|
|
'error': str(e),
|
|
|
|
|
'status': 'failed'
|
|
|
|
|
})
|
|
|
|
|
print(f"文件上传过程中出错: {filename}, 错误: {str(e)}")
|
|
|
|
|
finally:
|
|
|
|
|
# 删除临时文件
|
|
|
|
|
if os.path.exists(filepath):
|
|
|
|
|
os.remove(filepath)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'code': 0,
|
|
|
|
|
'data': results,
|
|
|
|
|
'message': f'成功上传 {len([r for r in results if r["status"] == "success"])}/{len(files)} 个文件'
|
|
|
|
|
}
|