From 3ec47d8955734b4d5f89c3953dd952effd1a38f4 Mon Sep 17 00:00:00 2001 From: zstar <65890619+zstar1003@users.noreply.github.com> Date: Thu, 15 May 2025 00:31:41 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E6=96=87=E4=BB=B6=E7=AE=A1=E7=90=86):=20?= =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=94=AF=E6=8C=81HTML=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E8=A7=A3=E6=9E=90=E5=B9=B6=E5=AE=8C=E5=96=84?= =?UTF-8?q?=E8=AE=B8=E5=8F=AF=E8=AF=81=E8=AF=B4=E6=98=8E=20(#95)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 ++++++++++++ README_EN.md | 13 +++++++++++++ management/server/services/files/file_service.py | 2 +- management/server/services/files/service.py | 6 +++++- management/server/services/files/utils.py | 1 + .../services/knowledgebases/document_parser.py | 2 +- 6 files changed, 33 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3b390f5..8ca12ff 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,18 @@ pnpm dev 目前该项目仍在持续更新中,更新日志会在我的微信公众号[我有一计]上发布,欢迎关注。 +## 📜 许可证与使用限制 +1. **本仓库基于AGPLv3许可证** + 由于包含第三方AGPLv3代码,本项目必须遵循AGPLv3的全部条款。这意味着: + - 任何**衍生作品**(包括修改或组合代码)必须继续使用AGPLv3并公开源代码。 + - 若通过**网络服务**提供本软件,用户有权获取对应源码。 + +2. **商用限制说明** + - 本项目**不允许闭源商用**,除非获得所有代码版权持有人的书面授权(包括上游AGPLv3代码作者)。 + +3. **免责声明** + 本项目不提供任何担保,使用者需自行承担合规风险。若需法律建议,请咨询专业律师。 + ## ✨ Star History ![Stargazers over time](https://starchart.cc/zstar1003/ragflow-plus.svg) \ No newline at end of file diff --git a/README_EN.md b/README_EN.md index c445d3f..04dfaa9 100644 --- a/README_EN.md +++ b/README_EN.md @@ -109,6 +109,19 @@ This project is developed based on the following open-source projects: This project is under continuous development. Update logs will be posted on my WeChat public account **[我有一计]** — feel free to follow! +## 📜 License and Usage Restrictions + +1. **This Repository is Licensed Under AGPLv3** + As it incorporates third-party AGPLv3 code, this project must fully comply with AGPLv3 terms. This means: + - Any **derivative works** (including modifications or combined code) must remain under AGPLv3 with source code publicly available. + - If provided as a **network service**, users are entitled to obtain the corresponding source code. + +2. **Commercial Use Restrictions** + - **Closed-source commercial use is prohibited** unless obtaining written authorization from all copyright holders (including original AGPLv3 code authors). + +3. **Disclaimer** + This project comes with no warranties. Users shall bear all compliance risks. Consult legal professionals for legal advice. + ## ✨ Star History ![Stargazers over time](https://starchart.cc/zstar1003/ragflow-plus.svg) \ No newline at end of file diff --git a/management/server/services/files/file_service.py b/management/server/services/files/file_service.py index 98a25b0..866f264 100644 --- a/management/server/services/files/file_service.py +++ b/management/server/services/files/file_service.py @@ -1,4 +1,4 @@ -from peewee import * +from peewee import * # noqa: F403 from .base_service import BaseService from .models import File from .utils import FileType, get_uuid diff --git a/management/server/services/files/service.py b/management/server/services/files/service.py index 5eed82a..ccd43a2 100644 --- a/management/server/services/files/service.py +++ b/management/server/services/files/service.py @@ -13,7 +13,7 @@ load_dotenv("../../docker/.env") temp_dir = tempfile.gettempdir() UPLOAD_FOLDER = os.path.join(temp_dir, "uploads") -ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "txt", "md"} +ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "bmp", "txt", "md", "html"} def allowed_file(filename): @@ -37,6 +37,8 @@ def filename_type(filename): return FileType.PPT.value elif ext in [".txt", ".md"]: return FileType.TEXT.value + elif ext in [".html"]: + return FileType.HTML.value return FileType.OTHER.value @@ -602,5 +604,7 @@ def upload_files_to_server(files, parent_id=None, user_id=None): # 删除临时文件 if os.path.exists(filepath): os.remove(filepath) + else: + raise RuntimeError({"name": filename, "error": "不支持的文件类型", "status": "failed"}) return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"} diff --git a/management/server/services/files/utils.py b/management/server/services/files/utils.py index aea4d1c..ec55a54 100644 --- a/management/server/services/files/utils.py +++ b/management/server/services/files/utils.py @@ -12,6 +12,7 @@ class FileType(StrEnum): PPT = "ppt" VISUAL = "visual" TEXT = "txt" + HTML = "html" OTHER = "other" class FileSource(StrEnum): diff --git a/management/server/services/knowledgebases/document_parser.py b/management/server/services/knowledgebases/document_parser.py index b884999..6a78da8 100644 --- a/management/server/services/knowledgebases/document_parser.py +++ b/management/server/services/knowledgebases/document_parser.py @@ -361,7 +361,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config): middle_content = pipe_result.get_middle_json() middle_json_content = json.loads(middle_content) - elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md"): + elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md") or file_type.endswith("html"): update_progress(0.3, "使用MinerU解析器") # 创建临时文件保存文件内容 temp_dir = tempfile.gettempdir()