56 lines
1.6 KiB
Python
56 lines
1.6 KiB
Python
from peewee import * # noqa: F403
|
|
|
|
from .base_service import BaseService
|
|
from .models import Document
|
|
from .utils import StatusEnum, get_uuid
|
|
|
|
|
|
class DocumentService(BaseService):
|
|
model = Document
|
|
|
|
@classmethod
|
|
def create_document(cls, kb_id: str, name: str, location: str, size: int, file_type: str, created_by: str = None, parser_id: str = None, parser_config: dict = None) -> Document:
|
|
"""
|
|
创建文档记录
|
|
|
|
Args:
|
|
kb_id: 知识库ID
|
|
name: 文件名
|
|
location: 存储位置
|
|
size: 文件大小
|
|
file_type: 文件类型
|
|
created_by: 创建者ID
|
|
parser_id: 解析器ID
|
|
parser_config: 解析器配置
|
|
|
|
Returns:
|
|
Document: 创建的文档对象
|
|
"""
|
|
doc_id = get_uuid()
|
|
|
|
# 构建基本文档数据
|
|
doc_data = {
|
|
"id": doc_id,
|
|
"kb_id": kb_id,
|
|
"name": name,
|
|
"location": location,
|
|
"size": size,
|
|
"type": file_type,
|
|
"created_by": created_by or "system",
|
|
"parser_id": parser_id or "",
|
|
"parser_config": parser_config or {"pages": [[1, 1000000]]},
|
|
"source_type": "local",
|
|
"token_num": 0,
|
|
"chunk_num": 0,
|
|
"progress": 0,
|
|
"progress_msg": "",
|
|
"run": "0", # 未开始解析
|
|
"status": StatusEnum.VALID.value,
|
|
}
|
|
|
|
return cls.insert(doc_data)
|
|
|
|
@classmethod
|
|
def get_by_kb_id(cls, kb_id: str) -> list[Document]:
|
|
return cls.query(kb_id=kb_id)
|