feat(文件管理): 新增支持HTML文件类型解析并完善许可证说明 (#95)
This commit is contained in:
parent
b79293b9ae
commit
3ec47d8955
12
README.md
12
README.md
|
@ -142,6 +142,18 @@ pnpm dev
|
||||||
|
|
||||||
目前该项目仍在持续更新中,更新日志会在我的微信公众号[我有一计]上发布,欢迎关注。
|
目前该项目仍在持续更新中,更新日志会在我的微信公众号[我有一计]上发布,欢迎关注。
|
||||||
|
|
||||||
|
## 📜 许可证与使用限制
|
||||||
|
1. **本仓库基于AGPLv3许可证**
|
||||||
|
由于包含第三方AGPLv3代码,本项目必须遵循AGPLv3的全部条款。这意味着:
|
||||||
|
- 任何**衍生作品**(包括修改或组合代码)必须继续使用AGPLv3并公开源代码。
|
||||||
|
- 若通过**网络服务**提供本软件,用户有权获取对应源码。
|
||||||
|
|
||||||
|
2. **商用限制说明**
|
||||||
|
- 本项目**不允许闭源商用**,除非获得所有代码版权持有人的书面授权(包括上游AGPLv3代码作者)。
|
||||||
|
|
||||||
|
3. **免责声明**
|
||||||
|
本项目不提供任何担保,使用者需自行承担合规风险。若需法律建议,请咨询专业律师。
|
||||||
|
|
||||||
## ✨ Star History
|
## ✨ Star History
|
||||||
|
|
||||||

|

|
13
README_EN.md
13
README_EN.md
|
@ -109,6 +109,19 @@ This project is developed based on the following open-source projects:
|
||||||
|
|
||||||
This project is under continuous development. Update logs will be posted on my WeChat public account **[我有一计]** — feel free to follow!
|
This project is under continuous development. Update logs will be posted on my WeChat public account **[我有一计]** — feel free to follow!
|
||||||
|
|
||||||
|
## 📜 License and Usage Restrictions
|
||||||
|
|
||||||
|
1. **This Repository is Licensed Under AGPLv3**
|
||||||
|
As it incorporates third-party AGPLv3 code, this project must fully comply with AGPLv3 terms. This means:
|
||||||
|
- Any **derivative works** (including modifications or combined code) must remain under AGPLv3 with source code publicly available.
|
||||||
|
- If provided as a **network service**, users are entitled to obtain the corresponding source code.
|
||||||
|
|
||||||
|
2. **Commercial Use Restrictions**
|
||||||
|
- **Closed-source commercial use is prohibited** unless obtaining written authorization from all copyright holders (including original AGPLv3 code authors).
|
||||||
|
|
||||||
|
3. **Disclaimer**
|
||||||
|
This project comes with no warranties. Users shall bear all compliance risks. Consult legal professionals for legal advice.
|
||||||
|
|
||||||
## ✨ Star History
|
## ✨ Star History
|
||||||
|
|
||||||

|

|
|
@ -1,4 +1,4 @@
|
||||||
from peewee import *
|
from peewee import * # noqa: F403
|
||||||
from .base_service import BaseService
|
from .base_service import BaseService
|
||||||
from .models import File
|
from .models import File
|
||||||
from .utils import FileType, get_uuid
|
from .utils import FileType, get_uuid
|
||||||
|
|
|
@ -13,7 +13,7 @@ load_dotenv("../../docker/.env")
|
||||||
|
|
||||||
temp_dir = tempfile.gettempdir()
|
temp_dir = tempfile.gettempdir()
|
||||||
UPLOAD_FOLDER = os.path.join(temp_dir, "uploads")
|
UPLOAD_FOLDER = os.path.join(temp_dir, "uploads")
|
||||||
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "txt", "md"}
|
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "bmp", "txt", "md", "html"}
|
||||||
|
|
||||||
|
|
||||||
def allowed_file(filename):
|
def allowed_file(filename):
|
||||||
|
@ -37,6 +37,8 @@ def filename_type(filename):
|
||||||
return FileType.PPT.value
|
return FileType.PPT.value
|
||||||
elif ext in [".txt", ".md"]:
|
elif ext in [".txt", ".md"]:
|
||||||
return FileType.TEXT.value
|
return FileType.TEXT.value
|
||||||
|
elif ext in [".html"]:
|
||||||
|
return FileType.HTML.value
|
||||||
|
|
||||||
return FileType.OTHER.value
|
return FileType.OTHER.value
|
||||||
|
|
||||||
|
@ -602,5 +604,7 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
|
||||||
# 删除临时文件
|
# 删除临时文件
|
||||||
if os.path.exists(filepath):
|
if os.path.exists(filepath):
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
|
else:
|
||||||
|
raise RuntimeError({"name": filename, "error": "不支持的文件类型", "status": "failed"})
|
||||||
|
|
||||||
return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"}
|
return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"}
|
||||||
|
|
|
@ -12,6 +12,7 @@ class FileType(StrEnum):
|
||||||
PPT = "ppt"
|
PPT = "ppt"
|
||||||
VISUAL = "visual"
|
VISUAL = "visual"
|
||||||
TEXT = "txt"
|
TEXT = "txt"
|
||||||
|
HTML = "html"
|
||||||
OTHER = "other"
|
OTHER = "other"
|
||||||
|
|
||||||
class FileSource(StrEnum):
|
class FileSource(StrEnum):
|
||||||
|
|
|
@ -361,7 +361,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config):
|
||||||
middle_content = pipe_result.get_middle_json()
|
middle_content = pipe_result.get_middle_json()
|
||||||
middle_json_content = json.loads(middle_content)
|
middle_json_content = json.loads(middle_content)
|
||||||
|
|
||||||
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md"):
|
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md") or file_type.endswith("html"):
|
||||||
update_progress(0.3, "使用MinerU解析器")
|
update_progress(0.3, "使用MinerU解析器")
|
||||||
# 创建临时文件保存文件内容
|
# 创建临时文件保存文件内容
|
||||||
temp_dir = tempfile.gettempdir()
|
temp_dir = tempfile.gettempdir()
|
||||||
|
|
Loading…
Reference in New Issue