feat(文件管理): 添加文件列表排序功能 (#79)

在文件管理模块中,新增了文件列表的排序功能。用户可以根据文件名、大小、类型和创建时间等字段进行升序或降序排序。同时,前端页面也增加了排序相关的UI交互,后端接口支持传递排序字段和排序方式参数。
This commit is contained in:
zstar 2025-05-12 00:17:28 +08:00 committed by GitHub
parent e6c18119da
commit 79360c48bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 249 additions and 206 deletions

View File

@ -3,7 +3,7 @@ from io import BytesIO
from .. import files_bp from .. import files_bp
from services.files.service import get_files_list, get_file_info, download_file_from_minio, delete_file, batch_delete_files, get_minio_client, upload_files_to_server from services.files.service import get_files_list, get_file_info, download_file_from_minio, delete_file, batch_delete_files, upload_files_to_server
from services.files.utils import FileType from services.files.utils import FileType
UPLOAD_FOLDER = "/data/uploads" UPLOAD_FOLDER = "/data/uploads"
@ -36,8 +36,10 @@ def get_files():
current_page = int(request.args.get("currentPage", 1)) current_page = int(request.args.get("currentPage", 1))
page_size = int(request.args.get("size", 10)) page_size = int(request.args.get("size", 10))
name_filter = request.args.get("name", "") name_filter = request.args.get("name", "")
sort_by = request.args.get("sort_by", "create_time")
sort_order = request.args.get("sort_order", "desc")
result, total = get_files_list(current_page, page_size, name_filter) result, total = get_files_list(current_page, page_size, name_filter, sort_by, sort_order)
return jsonify({"code": 0, "data": {"list": result, "total": total}, "message": "获取文件列表成功"}) return jsonify({"code": 0, "data": {"list": result, "total": total}, "message": "获取文件列表成功"})

View File

@ -2,94 +2,92 @@ import os
import mysql.connector import mysql.connector
import re import re
import tempfile import tempfile
from io import BytesIO
from minio import Minio from minio import Minio
from dotenv import load_dotenv from dotenv import load_dotenv
from werkzeug.utils import secure_filename from datetime import datetime
from datetime import datetime from .utils import FileType, FileSource, get_uuid
from .utils import FileType, FileSource, StatusEnum, get_uuid
from .document_service import DocumentService
from .file_service import FileService
from .file2document_service import File2DocumentService
from database import DB_CONFIG, MINIO_CONFIG from database import DB_CONFIG, MINIO_CONFIG
# 加载环境变量 # 加载环境变量
load_dotenv("../../docker/.env") load_dotenv("../../docker/.env")
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()
UPLOAD_FOLDER = os.path.join(temp_dir, "uploads") UPLOAD_FOLDER = os.path.join(temp_dir, "uploads")
ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx', 'jpg', 'jpeg', 'png', 'txt', 'md'} ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "txt", "md"}
def allowed_file(filename): def allowed_file(filename):
"""Check if the file extension is allowed""" """Check if the file extension is allowed"""
return '.' in filename and \ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def filename_type(filename): def filename_type(filename):
"""根据文件名确定文件类型""" """根据文件名确定文件类型"""
ext = os.path.splitext(filename)[1].lower() ext = os.path.splitext(filename)[1].lower()
if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp']: if ext in [".jpg", ".jpeg", ".png", ".gif", ".bmp"]:
return FileType.VISUAL.value return FileType.VISUAL.value
elif ext in ['.pdf']: elif ext in [".pdf"]:
return FileType.PDF.value return FileType.PDF.value
elif ext in ['.doc', '.docx']: elif ext in [".doc", ".docx"]:
return FileType.WORD.value return FileType.WORD.value
elif ext in ['.xls', '.xlsx']: elif ext in [".xls", ".xlsx"]:
return FileType.EXCEL.value return FileType.EXCEL.value
elif ext in ['.ppt', '.pptx']: elif ext in [".ppt", ".pptx"]:
return FileType.PPT.value return FileType.PPT.value
elif ext in ['.txt', '.md']: elif ext in [".txt", ".md"]:
return FileType.TEXT.value return FileType.TEXT.value
return FileType.OTHER.value return FileType.OTHER.value
def get_minio_client(): def get_minio_client():
"""创建MinIO客户端""" """创建MinIO客户端"""
return Minio( return Minio(endpoint=MINIO_CONFIG["endpoint"], access_key=MINIO_CONFIG["access_key"], secret_key=MINIO_CONFIG["secret_key"], secure=MINIO_CONFIG["secure"])
endpoint=MINIO_CONFIG["endpoint"],
access_key=MINIO_CONFIG["access_key"],
secret_key=MINIO_CONFIG["secret_key"],
secure=MINIO_CONFIG["secure"]
)
def get_db_connection(): def get_db_connection():
"""创建数据库连接""" """创建数据库连接"""
return mysql.connector.connect(**DB_CONFIG) return mysql.connector.connect(**DB_CONFIG)
def get_files_list(current_page, page_size, parent_id=None, name_filter=""):
def get_files_list(current_page, page_size, name_filter="", sort_by="create_time", sort_order="desc"):
""" """
获取文件列表 获取文件列表
Args: Args:
current_page: 当前页码 current_page: 当前页码
page_size: 每页大小 page_size: 每页大小
parent_id: 父文件夹ID parent_id: 父文件夹ID
name_filter: 文件名过滤条件 name_filter: 文件名过滤条件
Returns: Returns:
tuple: (文件列表, 总数) tuple: (文件列表, 总数)
""" """
try: try:
# 计算偏移量 # 计算偏移量
offset = (current_page - 1) * page_size offset = (current_page - 1) * page_size
# 连接数据库 # 连接数据库
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 构建查询条件 # 构建查询条件
where_clause = "WHERE f.type != 'folder'" # 排除文件夹类型 where_clause = "WHERE f.type != 'folder'" # 排除文件夹类型
params = [] params = []
if parent_id:
where_clause += " AND f.parent_id = %s"
params.append(parent_id)
if name_filter: if name_filter:
where_clause += " AND f.name LIKE %s" where_clause += " AND f.name LIKE %s"
params.append(f"%{name_filter}%") params.append(f"%{name_filter}%")
# 验证排序字段
valid_sort_fields = ["name", "size", "type", "create_time", "create_date"]
if sort_by not in valid_sort_fields:
sort_by = "create_time"
# 构建排序子句
sort_clause = f"ORDER BY f.{sort_by} {sort_order.upper()}"
# 查询总数 # 查询总数
count_query = f""" count_query = f"""
SELECT COUNT(*) as total SELECT COUNT(*) as total
@ -97,34 +95,40 @@ def get_files_list(current_page, page_size, parent_id=None, name_filter=""):
{where_clause} {where_clause}
""" """
cursor.execute(count_query, params) cursor.execute(count_query, params)
total = cursor.fetchone()['total'] total = cursor.fetchone()["total"]
# 查询文件列表 # 查询文件列表
query = f""" query = f"""
SELECT f.id, f.name, f.parent_id, f.type, f.size, f.location, f.source_type, f.create_time SELECT f.id, f.name, f.parent_id, f.type, f.size, f.location, f.source_type, f.create_time, f.create_date
FROM file f FROM file f
{where_clause} {where_clause}
ORDER BY f.create_time DESC {sort_clause}
LIMIT %s OFFSET %s LIMIT %s OFFSET %s
""" """
cursor.execute(query, params + [page_size, offset]) cursor.execute(query, params + [page_size, offset])
files = cursor.fetchall() files = cursor.fetchall()
# 格式化 create_date
for file_item in files:
if isinstance(file_item.get("create_date"), datetime):
file_item["create_date"] = file_item["create_date"].strftime("%Y-%m-%d %H:%M:%S")
cursor.close() cursor.close()
conn.close() conn.close()
return files, total return files, total
except Exception as e: except Exception as e:
raise e raise e
def get_file_info(file_id): def get_file_info(file_id):
""" """
获取文件信息 获取文件信息
Args: Args:
file_id: 文件ID file_id: 文件ID
Returns: Returns:
dict: 文件信息 dict: 文件信息
""" """
@ -132,67 +136,72 @@ def get_file_info(file_id):
# 连接数据库 # 连接数据库
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 查询文件信息 # 查询文件信息
cursor.execute(""" cursor.execute(
"""
SELECT id, name, parent_id, type, size, location, source_type SELECT id, name, parent_id, type, size, location, source_type
FROM file FROM file
WHERE id = %s WHERE id = %s
""", (file_id,)) """,
(file_id,),
)
file = cursor.fetchone() file = cursor.fetchone()
cursor.close() cursor.close()
conn.close() conn.close()
return file return file
except Exception as e: except Exception as e:
raise e raise e
def download_file_from_minio(file_id): def download_file_from_minio(file_id):
""" """
从MinIO下载文件 从MinIO下载文件
Args: Args:
file_id: 文件ID file_id: 文件ID
Returns: Returns:
tuple: (文件数据, 文件名) tuple: (文件数据, 文件名)
""" """
try: try:
# 获取文件信息 # 获取文件信息
file = get_file_info(file_id) file = get_file_info(file_id)
if not file: if not file:
raise Exception(f"文件 {file_id} 不存在") raise Exception(f"文件 {file_id} 不存在")
# 从MinIO下载文件 # 从MinIO下载文件
minio_client = get_minio_client() minio_client = get_minio_client()
# 使用parent_id作为存储桶 # 使用parent_id作为存储桶
storage_bucket = file['parent_id'] storage_bucket = file["parent_id"]
storage_location = file['location'] storage_location = file["location"]
# 检查bucket是否存在 # 检查bucket是否存在
if not minio_client.bucket_exists(storage_bucket): if not minio_client.bucket_exists(storage_bucket):
raise Exception(f"存储桶 {storage_bucket} 不存在") raise Exception(f"存储桶 {storage_bucket} 不存在")
# 下载文件 # 下载文件
response = minio_client.get_object(storage_bucket, storage_location) response = minio_client.get_object(storage_bucket, storage_location)
file_data = response.read() file_data = response.read()
return file_data, file['name'] return file_data, file["name"]
except Exception as e: except Exception as e:
raise e raise e
def delete_file(file_id): def delete_file(file_id):
""" """
删除文件 删除文件
Args: Args:
file_id: 文件ID file_id: 文件ID
Returns: Returns:
bool: 是否删除成功 bool: 是否删除成功
""" """
@ -200,75 +209,81 @@ def delete_file(file_id):
# 连接数据库 # 连接数据库
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 查询文件信息 # 查询文件信息
cursor.execute(""" cursor.execute(
"""
SELECT id, parent_id, name, location, type SELECT id, parent_id, name, location, type
FROM file FROM file
WHERE id = %s WHERE id = %s
""", (file_id,)) """,
(file_id,),
)
file = cursor.fetchone() file = cursor.fetchone()
if not file: if not file:
cursor.close() cursor.close()
conn.close() conn.close()
return False return False
# 如果是文件夹,直接返回成功(不处理文件夹) # 如果是文件夹,直接返回成功(不处理文件夹)
if file['type'] == FileType.FOLDER.value: if file["type"] == FileType.FOLDER.value:
cursor.close() cursor.close()
conn.close() conn.close()
return True return True
# 查询关联的document记录 # 查询关联的document记录
cursor.execute(""" cursor.execute(
"""
SELECT f2d.document_id, d.kb_id, d.location SELECT f2d.document_id, d.kb_id, d.location
FROM file2document f2d FROM file2document f2d
JOIN document d ON f2d.document_id = d.id JOIN document d ON f2d.document_id = d.id
WHERE f2d.file_id = %s WHERE f2d.file_id = %s
""", (file_id,)) """,
(file_id,),
)
document_mappings = cursor.fetchall() document_mappings = cursor.fetchall()
# 创建MinIO客户端在事务外创建 # 创建MinIO客户端在事务外创建
minio_client = get_minio_client() minio_client = get_minio_client()
# 开始事务 # 开始事务
try: try:
# 注意这里不再使用conn.start_transaction()而是使用execute直接执行事务相关命令 # 注意这里不再使用conn.start_transaction()而是使用execute直接执行事务相关命令
cursor.execute("START TRANSACTION") cursor.execute("START TRANSACTION")
# 1. 先删除file表中的记录 # 1. 先删除file表中的记录
cursor.execute("DELETE FROM file WHERE id = %s", (file_id,)) cursor.execute("DELETE FROM file WHERE id = %s", (file_id,))
# 2. 删除关联的file2document记录 # 2. 删除关联的file2document记录
cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,)) cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,))
# 3. 删除关联的document记录 # 3. 删除关联的document记录
for doc_mapping in document_mappings: for doc_mapping in document_mappings:
cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping['document_id'],)) cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping["document_id"],))
# 提交事务 # 提交事务
cursor.execute("COMMIT") cursor.execute("COMMIT")
# 从MinIO删除文件在事务提交后进行 # 从MinIO删除文件在事务提交后进行
try: try:
# 检查bucket是否存在如果不存在则跳过MinIO删除操作 # 检查bucket是否存在如果不存在则跳过MinIO删除操作
parent_id = file.get('parent_id') parent_id = file.get("parent_id")
if parent_id and minio_client.bucket_exists(parent_id): if parent_id and minio_client.bucket_exists(parent_id):
try: try:
# 删除文件,忽略文件不存在的错误 # 删除文件,忽略文件不存在的错误
minio_client.remove_object(parent_id, file['location']) minio_client.remove_object(parent_id, file["location"])
print(f"从MinIO删除文件成功: {parent_id}/{file['location']}") print(f"从MinIO删除文件成功: {parent_id}/{file['location']}")
except Exception as e: except Exception as e:
print(f"从MinIO删除文件失败: {parent_id}/{file['location']} - {str(e)}") print(f"从MinIO删除文件失败: {parent_id}/{file['location']} - {str(e)}")
else: else:
print(f"存储桶不存在跳过MinIO删除操作: {parent_id}") print(f"存储桶不存在跳过MinIO删除操作: {parent_id}")
# 如果有关联的document也删除document存储的文件 # 如果有关联的document也删除document存储的文件
for doc_mapping in document_mappings: for doc_mapping in document_mappings:
kb_id = doc_mapping.get('kb_id') kb_id = doc_mapping.get("kb_id")
doc_location = doc_mapping.get('location') doc_location = doc_mapping.get("location")
if kb_id and doc_location and minio_client.bucket_exists(kb_id): if kb_id and doc_location and minio_client.bucket_exists(kb_id):
try: try:
minio_client.remove_object(kb_id, doc_location) minio_client.remove_object(kb_id, doc_location)
@ -280,151 +295,165 @@ def delete_file(file_id):
except Exception as e: except Exception as e:
# 即使MinIO删除失败也不影响数据库操作的成功 # 即使MinIO删除失败也不影响数据库操作的成功
print(f"MinIO操作失败但不影响数据库删除: {str(e)}") print(f"MinIO操作失败但不影响数据库删除: {str(e)}")
return True return True
except Exception as e: except Exception as e:
# 回滚事务 # 回滚事务
try: try:
cursor.execute("ROLLBACK") cursor.execute("ROLLBACK")
except: except: # noqa: E722
pass pass
raise e raise e
finally: finally:
cursor.close() cursor.close()
conn.close() conn.close()
except Exception as e: except Exception as e:
print(f"删除文件时发生错误: {str(e)}") print(f"删除文件时发生错误: {str(e)}")
raise e raise e
def batch_delete_files(file_ids): def batch_delete_files(file_ids):
""" """
批量删除文件 批量删除文件
Args: Args:
file_ids: 文件ID列表 file_ids: 文件ID列表
Returns: Returns:
int: 成功删除的文件数量 int: 成功删除的文件数量
""" """
if not file_ids: if not file_ids:
return 0 return 0
try: try:
# 连接数据库 # 连接数据库
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 创建MinIO客户端 # 创建MinIO客户端
minio_client = get_minio_client() minio_client = get_minio_client()
# 开始事务 # 开始事务
try: try:
cursor.execute("START TRANSACTION") cursor.execute("START TRANSACTION")
success_count = 0 success_count = 0
for file_id in file_ids: for file_id in file_ids:
# 查询文件信息 # 查询文件信息
cursor.execute(""" cursor.execute(
"""
SELECT id, parent_id, name, location, type SELECT id, parent_id, name, location, type
FROM file FROM file
WHERE id = %s WHERE id = %s
""", (file_id,)) """,
(file_id,),
)
file = cursor.fetchone() file = cursor.fetchone()
if not file: if not file:
continue continue
# 如果是文件夹,跳过 # 如果是文件夹,跳过
if file['type'] == FileType.FOLDER.value: if file["type"] == FileType.FOLDER.value:
continue continue
# 查询关联的document记录 # 查询关联的document记录
cursor.execute(""" cursor.execute(
"""
SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location
FROM file2document f2d FROM file2document f2d
JOIN document d ON f2d.document_id = d.id JOIN document d ON f2d.document_id = d.id
WHERE f2d.file_id = %s WHERE f2d.file_id = %s
""", (file_id,)) """,
(file_id,),
)
document_mappings = cursor.fetchall() document_mappings = cursor.fetchall()
# 1. 先删除file表中的记录 # 1. 先删除file表中的记录
cursor.execute("DELETE FROM file WHERE id = %s", (file_id,)) cursor.execute("DELETE FROM file WHERE id = %s", (file_id,))
# 2. 删除关联的file2document记录 # 2. 删除关联的file2document记录
cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,)) cursor.execute("DELETE FROM file2document WHERE file_id = %s", (file_id,))
# 3. 删除关联的document记录 # 3. 删除关联的document记录
for doc_mapping in document_mappings: for doc_mapping in document_mappings:
cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping['document_id'],)) cursor.execute("DELETE FROM document WHERE id = %s", (doc_mapping["document_id"],))
success_count += 1 success_count += 1
# 提交事务 # 提交事务
cursor.execute("COMMIT") cursor.execute("COMMIT")
# 从MinIO删除文件在事务提交后进行 # 从MinIO删除文件在事务提交后进行
for file_id in file_ids: for file_id in file_ids:
try: try:
# 查询文件信息 # 查询文件信息
cursor.execute(""" cursor.execute(
"""
SELECT id, parent_id, name, location, type SELECT id, parent_id, name, location, type
FROM file FROM file
WHERE id = %s WHERE id = %s
""", (file_id,)) """,
(file_id,),
)
file = cursor.fetchone() file = cursor.fetchone()
if not file and file['type'] != FileType.FOLDER.value: if not file and file["type"] != FileType.FOLDER.value:
# 检查bucket是否存在 # 检查bucket是否存在
if minio_client.bucket_exists(file['parent_id']): if minio_client.bucket_exists(file["parent_id"]):
# 删除文件 # 删除文件
minio_client.remove_object(file['parent_id'], file['location']) minio_client.remove_object(file["parent_id"], file["location"])
# 如果有关联的document也删除document存储的文件 # 如果有关联的document也删除document存储的文件
cursor.execute(""" cursor.execute(
"""
SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location SELECT f2d.id as f2d_id, f2d.document_id, d.kb_id, d.location
FROM file2document f2d FROM file2document f2d
JOIN document d ON f2d.document_id = d.id JOIN document d ON f2d.document_id = d.id
WHERE f2d.file_id = %s WHERE f2d.file_id = %s
""", (file_id,)) """,
(file_id,),
)
document_mappings = cursor.fetchall() document_mappings = cursor.fetchall()
for doc_mapping in document_mappings: for doc_mapping in document_mappings:
if minio_client.bucket_exists(doc_mapping['kb_id']): if minio_client.bucket_exists(doc_mapping["kb_id"]):
minio_client.remove_object(doc_mapping['kb_id'], doc_mapping['location']) minio_client.remove_object(doc_mapping["kb_id"], doc_mapping["location"])
except Exception as e: except Exception as e:
# 即使MinIO删除失败也不影响数据库操作的成功 # 即使MinIO删除失败也不影响数据库操作的成功
print(f"从MinIO删除文件失败: {str(e)}") print(f"从MinIO删除文件失败: {str(e)}")
return success_count return success_count
except Exception as e: except Exception as e:
# 回滚事务 # 回滚事务
try: try:
cursor.execute("ROLLBACK") cursor.execute("ROLLBACK")
except: except: # noqa: E722
pass pass
raise e raise e
finally: finally:
cursor.close() cursor.close()
conn.close() conn.close()
except Exception as e: except Exception as e:
print(f"批量删除文件时发生错误: {str(e)}") print(f"批量删除文件时发生错误: {str(e)}")
raise e raise e
def upload_files_to_server(files, parent_id=None, user_id=None): def upload_files_to_server(files, parent_id=None, user_id=None):
"""处理文件上传到服务器的核心逻辑""" """处理文件上传到服务器的核心逻辑"""
if user_id is None: if user_id is None:
try: try:
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 查询创建时间最早的用户ID # 查询创建时间最早的用户ID
query_earliest_user = """ query_earliest_user = """
SELECT id FROM user SELECT id FROM user
@ -433,26 +462,26 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
""" """
cursor.execute(query_earliest_user) cursor.execute(query_earliest_user)
earliest_user = cursor.fetchone() earliest_user = cursor.fetchone()
if earliest_user: if earliest_user:
user_id = earliest_user['id'] user_id = earliest_user["id"]
print(f"使用创建时间最早的用户ID: {user_id}") print(f"使用创建时间最早的用户ID: {user_id}")
else: else:
user_id = 'system' user_id = "system"
print("未找到用户, 使用默认用户ID: system") print("未找到用户, 使用默认用户ID: system")
cursor.close() cursor.close()
conn.close() conn.close()
except Exception as e: except Exception as e:
print(f"查询最早用户ID失败: {str(e)}") print(f"查询最早用户ID失败: {str(e)}")
user_id = 'system' user_id = "system"
# 如果没有指定parent_id则获取file表中的第一个记录作为parent_id # 如果没有指定parent_id则获取file表中的第一个记录作为parent_id
if parent_id is None: if parent_id is None:
try: try:
conn = get_db_connection() conn = get_db_connection()
cursor = conn.cursor(dictionary=True) cursor = conn.cursor(dictionary=True)
# 查询file表中的第一个记录 # 查询file表中的第一个记录
query_first_file = """ query_first_file = """
SELECT id FROM file SELECT id FROM file
@ -460,15 +489,15 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
""" """
cursor.execute(query_first_file) cursor.execute(query_first_file)
first_file = cursor.fetchone() first_file = cursor.fetchone()
if first_file: if first_file:
parent_id = first_file['id'] parent_id = first_file["id"]
print(f"使用file表中的第一个记录ID作为parent_id: {parent_id}") print(f"使用file表中的第一个记录ID作为parent_id: {parent_id}")
else: else:
# 如果没有找到记录创建一个新的ID # 如果没有找到记录创建一个新的ID
parent_id = get_uuid() parent_id = get_uuid()
print(f"file表中没有记录创建新的parent_id: {parent_id}") print(f"file表中没有记录创建新的parent_id: {parent_id}")
cursor.close() cursor.close()
conn.close() conn.close()
except Exception as e: except Exception as e:
@ -479,59 +508,54 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
results = [] results = []
for file in files: for file in files:
if file.filename == '': if file.filename == "":
continue continue
if file and allowed_file(file.filename): if file and allowed_file(file.filename):
original_filename = file.filename original_filename = file.filename
# 修复文件名处理逻辑,保留中文字符 # 修复文件名处理逻辑,保留中文字符
name, ext = os.path.splitext(original_filename) name, ext = os.path.splitext(original_filename)
# 只替换文件系统不安全的字符保留中文和其他Unicode字符 # 只替换文件系统不安全的字符保留中文和其他Unicode字符
safe_name = re.sub(r'[\\/:*?"<>|]', '_', name) safe_name = re.sub(r'[\\/:*?"<>|]', "_", name)
# 如果处理后文件名为空,则使用随机字符串 # 如果处理后文件名为空,则使用随机字符串
if not safe_name or safe_name.strip() == '': if not safe_name or safe_name.strip() == "":
safe_name = f"file_{get_uuid()[:8]}" safe_name = f"file_{get_uuid()[:8]}"
filename = safe_name + ext.lower() filename = safe_name + ext.lower()
filepath = os.path.join(UPLOAD_FOLDER, filename) filepath = os.path.join(UPLOAD_FOLDER, filename)
try: try:
# 1. 保存文件到本地临时目录 # 1. 保存文件到本地临时目录
os.makedirs(UPLOAD_FOLDER, exist_ok=True) os.makedirs(UPLOAD_FOLDER, exist_ok=True)
file.save(filepath) file.save(filepath)
print(f"文件已保存到临时目录: {filepath}") print(f"文件已保存到临时目录: {filepath}")
# 2. 获取文件类型 # 2. 获取文件类型
filetype = filename_type(filename) filetype = filename_type(filename)
if filetype == FileType.OTHER.value: if filetype == FileType.OTHER.value:
raise RuntimeError("不支持的文件类型") raise RuntimeError("不支持的文件类型")
# 3. 生成唯一存储位置 # 3. 生成唯一存储位置
minio_client = get_minio_client() minio_client = get_minio_client()
location = filename location = filename
# 确保bucket存在 # 确保bucket存在
if not minio_client.bucket_exists(parent_id): if not minio_client.bucket_exists(parent_id):
minio_client.make_bucket(parent_id) minio_client.make_bucket(parent_id)
print(f"创建MinIO存储桶: {parent_id}") print(f"创建MinIO存储桶: {parent_id}")
# 4. 上传到MinIO # 4. 上传到MinIO
with open(filepath, 'rb') as file_data: with open(filepath, "rb") as file_data:
minio_client.put_object( minio_client.put_object(bucket_name=parent_id, object_name=location, data=file_data, length=os.path.getsize(filepath))
bucket_name=parent_id,
object_name=location,
data=file_data,
length=os.path.getsize(filepath)
)
print(f"文件已上传到MinIO: {parent_id}/{location}") print(f"文件已上传到MinIO: {parent_id}/{location}")
# 5. 创建文件记录 # 5. 创建文件记录
file_id = get_uuid() file_id = get_uuid()
current_time = int(datetime.now().timestamp()) current_time = int(datetime.now().timestamp())
current_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') current_date = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
file_record = { file_record = {
"id": file_id, "id": file_id,
"parent_id": parent_id, "parent_id": parent_id,
@ -545,30 +569,24 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
"create_time": current_time, "create_time": current_time,
"create_date": current_date, "create_date": current_date,
"update_time": current_time, "update_time": current_time,
"update_date": current_date "update_date": current_date,
} }
# 保存文件记录 # 保存文件记录
conn = get_db_connection() conn = get_db_connection()
try: try:
cursor = conn.cursor() cursor = conn.cursor()
# 插入文件记录 # 插入文件记录
columns = ', '.join(file_record.keys()) columns = ", ".join(file_record.keys())
placeholders = ', '.join(['%s'] * len(file_record)) placeholders = ", ".join(["%s"] * len(file_record))
query = f"INSERT INTO file ({columns}) VALUES ({placeholders})" query = f"INSERT INTO file ({columns}) VALUES ({placeholders})"
cursor.execute(query, list(file_record.values())) cursor.execute(query, list(file_record.values()))
conn.commit() conn.commit()
results.append({ results.append({"id": file_id, "name": filename, "size": file_record["size"], "type": filetype, "status": "success"})
'id': file_id,
'name': filename,
'size': file_record["size"],
'type': filetype,
'status': 'success'
})
except Exception as e: except Exception as e:
conn.rollback() conn.rollback()
print(f"数据库操作失败: {str(e)}") print(f"数据库操作失败: {str(e)}")
@ -576,21 +594,13 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
finally: finally:
cursor.close() cursor.close()
conn.close() conn.close()
except Exception as e: except Exception as e:
results.append({ results.append({"name": filename, "error": str(e), "status": "failed"})
'name': filename,
'error': str(e),
'status': 'failed'
})
print(f"文件上传过程中出错: {filename}, 错误: {str(e)}") print(f"文件上传过程中出错: {filename}, 错误: {str(e)}")
finally: finally:
# 删除临时文件 # 删除临时文件
if os.path.exists(filepath): if os.path.exists(filepath):
os.remove(filepath) os.remove(filepath)
return { return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"}
'code': 0,
'data': results,
'message': f'成功上传 {len([r for r in results if r["status"] == "success"])}/{len(files)} 个文件'
}

View File

@ -9,8 +9,7 @@ import re
import requests import requests
from io import BytesIO from io import BytesIO
from datetime import datetime from datetime import datetime
from elasticsearch import Elasticsearch from database import MINIO_CONFIG, DB_CONFIG, get_minio_client, get_es_client
from database import MINIO_CONFIG, ES_CONFIG, DB_CONFIG, get_minio_client, get_es_client
from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
from magic_pdf.data.dataset import PymuDocDataset from magic_pdf.data.dataset import PymuDocDataset
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
@ -37,7 +36,6 @@ def merge_chunks(sections, chunk_token_num=128, delimiter="\n。"):
for section in sections: for section in sections:
# 计算当前部分的token数量 # 计算当前部分的token数量
text = section[0] if isinstance(section, tuple) else section text = section[0] if isinstance(section, tuple) else section
position = section[1] if isinstance(section, tuple) and len(section) > 1 else ""
# 简单估算token数量 # 简单估算token数量
token_count = len(text.split()) token_count = len(text.split())
@ -139,7 +137,6 @@ def _create_task_record(doc_id, chunk_ids_list):
task_id = generate_uuid() task_id = generate_uuid()
current_datetime = datetime.now() current_datetime = datetime.now()
current_timestamp = int(current_datetime.timestamp() * 1000) current_timestamp = int(current_datetime.timestamp() * 1000)
current_time_str = current_datetime.strftime("%Y-%m-%d %H:%M:%S")
current_date_only = current_datetime.strftime("%Y-%m-%d") current_date_only = current_datetime.strftime("%Y-%m-%d")
digest = f"{doc_id}_{0}_{1}" # 假设 from_page=0, to_page=1 digest = f"{doc_id}_{0}_{1}" # 假设 from_page=0, to_page=1
chunk_ids_str = " ".join(chunk_ids_list) chunk_ids_str = " ".join(chunk_ids_list)

View File

@ -18,6 +18,8 @@ export interface FileData {
create_time?: number create_time?: number
/** 更新时间 */ /** 更新时间 */
update_time?: number update_time?: number
/** 创建日期 */
create_date?: string
} }
/** /**
@ -38,6 +40,10 @@ export interface PageQuery {
currentPage: number currentPage: number
/** 每页条数 */ /** 每页条数 */
size: number size: number
/** 排序字段 */
sort_by: string
/** 排序方式 */
sort_order: string
} }
/** /**

View File

@ -3,8 +3,8 @@ import type { FormInstance, UploadUserFile } from "element-plus"
import { batchDeleteFilesApi, deleteFileApi, getFileListApi, uploadFileApi } from "@@/apis/files" import { batchDeleteFilesApi, deleteFileApi, getFileListApi, uploadFileApi } from "@@/apis/files"
import { usePagination } from "@@/composables/usePagination" import { usePagination } from "@@/composables/usePagination"
import { Delete, Download, Refresh, Search, Upload } from "@element-plus/icons-vue" import { Delete, Download, Refresh, Search, Upload } from "@element-plus/icons-vue"
import { ElMessage, ElMessageBox } from "element-plus" import { ElLoading, ElMessage, ElMessageBox } from "element-plus"
import { ref } from "vue" import { reactive, ref } from "vue"
import "element-plus/dist/index.css" import "element-plus/dist/index.css"
import "element-plus/theme-chalk/el-message-box.css" import "element-plus/theme-chalk/el-message-box.css"
import "element-plus/theme-chalk/el-message.css" import "element-plus/theme-chalk/el-message.css"
@ -38,6 +38,12 @@ const searchData = reactive({
name: "" name: ""
}) })
//
const sortData = reactive({
sortBy: "create_date",
sortOrder: "desc" // ()
})
// //
const multipleSelection = ref<FileData[]>([]) const multipleSelection = ref<FileData[]>([])
@ -48,7 +54,9 @@ function getTableData() {
getFileListApi({ getFileListApi({
currentPage: paginationData.currentPage, currentPage: paginationData.currentPage,
size: paginationData.pageSize, size: paginationData.pageSize,
name: searchData.name name: searchData.name,
sort_by: sortData.sortBy,
sort_order: sortData.sortOrder
}).then(({ data }) => { }).then(({ data }) => {
paginationData.total = data.total paginationData.total = data.total
tableData.value = data.list tableData.value = data.list
@ -275,6 +283,25 @@ function formatFileSize(size: number) {
} }
} }
/**
* @description 处理表格排序变化事件只允许正序和倒序切换
* @param {object} sortInfo 排序信息对象包含 prop order
* @param {string} sortInfo.prop 排序的字段名
* @param {string | null} sortInfo.order 排序的顺序 ('ascending', 'descending', null)
*/
function handleSortChange({ prop }: { prop: string, order: string | null }) {
//
if (sortData.sortBy === prop) {
//
sortData.sortOrder = sortData.sortOrder === "asc" ? "desc" : "asc"
} else {
//
sortData.sortBy = prop
sortData.sortOrder = "asc"
}
getTableData()
}
// //
watch([() => paginationData.currentPage, () => paginationData.pageSize], getTableData, { immediate: true }) watch([() => paginationData.currentPage, () => paginationData.pageSize], getTableData, { immediate: true })
@ -359,20 +386,21 @@ onActivated(() => {
</template> </template>
</el-dialog> </el-dialog>
<div class="table-wrapper"> <div class="table-wrapper">
<el-table :data="tableData" @selection-change="handleSelectionChange"> <el-table :data="tableData" @selection-change="handleSelectionChange" @sort-change="handleSortChange">
<el-table-column type="selection" width="50" align="center" /> <el-table-column type="selection" width="50" align="center" />
<el-table-column label="序号" align="center" width="80"> <el-table-column label="序号" align="center" width="80">
<template #default="scope"> <template #default="scope">
{{ (paginationData.currentPage - 1) * paginationData.pageSize + scope.$index + 1 }} {{ (paginationData.currentPage - 1) * paginationData.pageSize + scope.$index + 1 }}
</template> </template>
</el-table-column> </el-table-column>
<el-table-column prop="name" label="文档名" align="center" /> <el-table-column prop="name" label="文档名" align="center" sortable="custom" />
<el-table-column label="大小" align="center" width="120"> <el-table-column label="大小" align="center" width="120" sortable="custom">
<template #default="scope"> <template #default="scope">
{{ formatFileSize(scope.row.size) }} {{ formatFileSize(scope.row.size) }}
</template> </template>
</el-table-column> </el-table-column>
<el-table-column prop="type" label="类型" align="center" width="120" /> <el-table-column prop="type" label="类型" align="center" width="120" sortable="custom" />
<el-table-column prop="create_date" label="创建时间" align="center" width="180" sortable="custom" />
<el-table-column fixed="right" label="操作" width="180" align="center"> <el-table-column fixed="right" label="操作" width="180" align="center">
<template #default="scope"> <template #default="scope">
<el-button type="primary" text bg size="small" :icon="Download" @click="handleDownload(scope.row)"> <el-button type="primary" text bg size="small" :icon="Download" @click="handleDownload(scope.row)">