feat(文件管理): 新增支持HTML文件类型解析并完善许可证说明 (#95)

This commit is contained in:
zstar 2025-05-15 00:31:41 +08:00 committed by GitHub
parent b79293b9ae
commit 3ec47d8955
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 33 additions and 3 deletions

View File

@ -142,6 +142,18 @@ pnpm dev
目前该项目仍在持续更新中,更新日志会在我的微信公众号[我有一计]上发布,欢迎关注。
## 📜 许可证与使用限制
1. **本仓库基于AGPLv3许可证**
由于包含第三方AGPLv3代码本项目必须遵循AGPLv3的全部条款。这意味着
- 任何**衍生作品**包括修改或组合代码必须继续使用AGPLv3并公开源代码。
- 若通过**网络服务**提供本软件,用户有权获取对应源码。
2. **商用限制说明**
- 本项目**不允许闭源商用**除非获得所有代码版权持有人的书面授权包括上游AGPLv3代码作者
3. **免责声明**
本项目不提供任何担保,使用者需自行承担合规风险。若需法律建议,请咨询专业律师。
## ✨ Star History
![Stargazers over time](https://starchart.cc/zstar1003/ragflow-plus.svg)

View File

@ -109,6 +109,19 @@ This project is developed based on the following open-source projects:
This project is under continuous development. Update logs will be posted on my WeChat public account **[我有一计]** — feel free to follow!
## 📜 License and Usage Restrictions
1. **This Repository is Licensed Under AGPLv3**
As it incorporates third-party AGPLv3 code, this project must fully comply with AGPLv3 terms. This means:
- Any **derivative works** (including modifications or combined code) must remain under AGPLv3 with source code publicly available.
- If provided as a **network service**, users are entitled to obtain the corresponding source code.
2. **Commercial Use Restrictions**
- **Closed-source commercial use is prohibited** unless obtaining written authorization from all copyright holders (including original AGPLv3 code authors).
3. **Disclaimer**
This project comes with no warranties. Users shall bear all compliance risks. Consult legal professionals for legal advice.
## ✨ Star History
![Stargazers over time](https://starchart.cc/zstar1003/ragflow-plus.svg)

View File

@ -1,4 +1,4 @@
from peewee import *
from peewee import * # noqa: F403
from .base_service import BaseService
from .models import File
from .utils import FileType, get_uuid

View File

@ -13,7 +13,7 @@ load_dotenv("../../docker/.env")
temp_dir = tempfile.gettempdir()
UPLOAD_FOLDER = os.path.join(temp_dir, "uploads")
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "txt", "md"}
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "bmp", "txt", "md", "html"}
def allowed_file(filename):
@ -37,6 +37,8 @@ def filename_type(filename):
return FileType.PPT.value
elif ext in [".txt", ".md"]:
return FileType.TEXT.value
elif ext in [".html"]:
return FileType.HTML.value
return FileType.OTHER.value
@ -602,5 +604,7 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
# 删除临时文件
if os.path.exists(filepath):
os.remove(filepath)
else:
raise RuntimeError({"name": filename, "error": "不支持的文件类型", "status": "failed"})
return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"}

View File

@ -12,6 +12,7 @@ class FileType(StrEnum):
PPT = "ppt"
VISUAL = "visual"
TEXT = "txt"
HTML = "html"
OTHER = "other"
class FileSource(StrEnum):

View File

@ -361,7 +361,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config):
middle_content = pipe_result.get_middle_json()
middle_json_content = json.loads(middle_content)
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md"):
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md") or file_type.endswith("html"):
update_progress(0.3, "使用MinerU解析器")
# 创建临时文件保存文件内容
temp_dir = tempfile.gettempdir()