feat(文件管理): 新增支持HTML文件类型解析并完善许可证说明 (#95)
This commit is contained in:
parent
b79293b9ae
commit
3ec47d8955
12
README.md
12
README.md
|
@ -142,6 +142,18 @@ pnpm dev
|
|||
|
||||
目前该项目仍在持续更新中,更新日志会在我的微信公众号[我有一计]上发布,欢迎关注。
|
||||
|
||||
## 📜 许可证与使用限制
|
||||
1. **本仓库基于AGPLv3许可证**
|
||||
由于包含第三方AGPLv3代码,本项目必须遵循AGPLv3的全部条款。这意味着:
|
||||
- 任何**衍生作品**(包括修改或组合代码)必须继续使用AGPLv3并公开源代码。
|
||||
- 若通过**网络服务**提供本软件,用户有权获取对应源码。
|
||||
|
||||
2. **商用限制说明**
|
||||
- 本项目**不允许闭源商用**,除非获得所有代码版权持有人的书面授权(包括上游AGPLv3代码作者)。
|
||||
|
||||
3. **免责声明**
|
||||
本项目不提供任何担保,使用者需自行承担合规风险。若需法律建议,请咨询专业律师。
|
||||
|
||||
## ✨ Star History
|
||||
|
||||

|
13
README_EN.md
13
README_EN.md
|
@ -109,6 +109,19 @@ This project is developed based on the following open-source projects:
|
|||
|
||||
This project is under continuous development. Update logs will be posted on my WeChat public account **[我有一计]** — feel free to follow!
|
||||
|
||||
## 📜 License and Usage Restrictions
|
||||
|
||||
1. **This Repository is Licensed Under AGPLv3**
|
||||
As it incorporates third-party AGPLv3 code, this project must fully comply with AGPLv3 terms. This means:
|
||||
- Any **derivative works** (including modifications or combined code) must remain under AGPLv3 with source code publicly available.
|
||||
- If provided as a **network service**, users are entitled to obtain the corresponding source code.
|
||||
|
||||
2. **Commercial Use Restrictions**
|
||||
- **Closed-source commercial use is prohibited** unless obtaining written authorization from all copyright holders (including original AGPLv3 code authors).
|
||||
|
||||
3. **Disclaimer**
|
||||
This project comes with no warranties. Users shall bear all compliance risks. Consult legal professionals for legal advice.
|
||||
|
||||
## ✨ Star History
|
||||
|
||||

|
|
@ -1,4 +1,4 @@
|
|||
from peewee import *
|
||||
from peewee import * # noqa: F403
|
||||
from .base_service import BaseService
|
||||
from .models import File
|
||||
from .utils import FileType, get_uuid
|
||||
|
|
|
@ -13,7 +13,7 @@ load_dotenv("../../docker/.env")
|
|||
|
||||
temp_dir = tempfile.gettempdir()
|
||||
UPLOAD_FOLDER = os.path.join(temp_dir, "uploads")
|
||||
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "txt", "md"}
|
||||
ALLOWED_EXTENSIONS = {"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "jpg", "jpeg", "png", "bmp", "txt", "md", "html"}
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
|
@ -37,6 +37,8 @@ def filename_type(filename):
|
|||
return FileType.PPT.value
|
||||
elif ext in [".txt", ".md"]:
|
||||
return FileType.TEXT.value
|
||||
elif ext in [".html"]:
|
||||
return FileType.HTML.value
|
||||
|
||||
return FileType.OTHER.value
|
||||
|
||||
|
@ -602,5 +604,7 @@ def upload_files_to_server(files, parent_id=None, user_id=None):
|
|||
# 删除临时文件
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
else:
|
||||
raise RuntimeError({"name": filename, "error": "不支持的文件类型", "status": "failed"})
|
||||
|
||||
return {"code": 0, "data": results, "message": f"成功上传 {len([r for r in results if r['status'] == 'success'])}/{len(files)} 个文件"}
|
||||
|
|
|
@ -12,6 +12,7 @@ class FileType(StrEnum):
|
|||
PPT = "ppt"
|
||||
VISUAL = "visual"
|
||||
TEXT = "txt"
|
||||
HTML = "html"
|
||||
OTHER = "other"
|
||||
|
||||
class FileSource(StrEnum):
|
||||
|
|
|
@ -361,7 +361,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config):
|
|||
middle_content = pipe_result.get_middle_json()
|
||||
middle_json_content = json.loads(middle_content)
|
||||
|
||||
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md"):
|
||||
elif file_type.endswith("word") or file_type.endswith("ppt") or file_type.endswith("txt") or file_type.endswith("md") or file_type.endswith("html"):
|
||||
update_progress(0.3, "使用MinerU解析器")
|
||||
# 创建临时文件保存文件内容
|
||||
temp_dir = tempfile.gettempdir()
|
||||
|
|
Loading…
Reference in New Issue