refactor(docker): 重构Docker配置以优化镜像构建和管理 (#37)

- 更新.dockerignore和.gitignore文件以排除不必要的文件
- 调整docker-compose.yml中的服务命名和配置
- 优化Dockerfile,简化构建流程并提高缓存利用率
This commit is contained in:
zstar 2025-04-19 20:43:46 +08:00 committed by GitHub
parent 2a7e4f2df1
commit 3faa18126e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 841 additions and 8028 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
web/node_modules
web/package-lock.json

5
.gitignore vendored
View File

@ -47,4 +47,7 @@ web/public/logo_secret.svg
web/public/logo_old.svg
web/public/logo.svg
web/src/locales/zh.ts
vllm/models
vllm/models
management/models--opendatalab--PDF-Extract-Kit-1.0
management/models--hantian--layoutreader
docker/models

View File

@ -1,211 +1,34 @@
# base stage
FROM ubuntu:22.04 AS base
USER root
SHELL ["/bin/bash", "-c"]
ARG NEED_MIRROR=0
ARG LIGHTEN=0
ENV LIGHTEN=${LIGHTEN}
# 使用 RAGFlow v0.17.2 镜像作为基础
FROM infiniflow/ragflow:v0.17.2
# 设置工作目录(与基础镜像保持一致)
WORKDIR /ragflow
# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
tar --exclude='.*' -cf - \
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.co/InfiniFlow/deepdoc \
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
if [ "$LIGHTEN" != "1" ]; then \
(tar -cf - \
/huggingface.co/BAAI/bge-large-zh-v1.5 \
/huggingface.co/BAAI/bge-reranker-v2-m3 \
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
| tar -xf - --strip-components=2 -C /root/.ragflow) \
fi
# 复制 Python 相关代码目录
COPY api ./api
COPY conf ./conf
COPY deepdoc ./deepdoc
COPY rag ./rag
COPY agent ./agent
COPY graphrag ./graphrag
COPY agentic_reasoning ./agentic_reasoning
# https://github.com/chrismattmann/tika-python
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
cp -r /deps/nltk_data /root/ && \
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
# 复制 Python 依赖定义文件
COPY pyproject.toml ./pyproject.toml
COPY uv.lock ./uv.lock
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
ENV DEBIAN_FRONTEND=noninteractive
# Setup apt
# Python package and implicit dependencies:
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
if [ "$NEED_MIRROR" == "1" ]; then \
sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
fi; \
rm -f /etc/apt/apt.conf.d/docker-clean && \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
chmod 1777 /tmp && \
apt update && \
apt --no-install-recommends install -y ca-certificates && \
apt update && \
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
apt install -y pkg-config libicu-dev libgdiplus && \
apt install -y default-jdk && \
apt install -y libatk-bridge2.0-0 && \
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
apt install -y libjemalloc-dev && \
apt install -y python3-pip pipx nginx unzip curl wget git vim less
RUN if [ "$NEED_MIRROR" == "1" ]; then \
pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
pip3 config set global.trusted-host mirrors.aliyun.com; \
mkdir -p /etc/uv && \
echo "[[index]]" > /etc/uv/uv.toml && \
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
echo "default = true" >> /etc/uv/uv.toml; \
fi; \
pipx install uv
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
ENV PATH=/root/.local/bin:$PATH
# nodejs 12.22 on Ubuntu 22.04 is too old
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
apt purge -y nodejs npm cargo && \
apt autoremove -y && \
apt update && \
apt install -y nodejs
# A modern version of cargo is needed for the latest version of the Rust compiler.
RUN apt update && apt install -y curl build-essential \
&& if [ "$NEED_MIRROR" == "1" ]; then \
# Use TUNA mirrors for rustup/rust dist files
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
echo "Using TUNA mirrors for Rustup."; \
fi; \
# Force curl to use HTTP/1.1
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
ENV PATH="/root/.cargo/bin:${PATH}"
RUN cargo --version && rustc --version
# Add msssql ODBC driver
# macOS ARM64 environment, install msodbcsql18.
# general x86_64 environment, install msodbcsql17.
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt update && \
arch="$(uname -m)"; \
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
# ARM64 (macOS/Apple Silicon or Linux aarch64)
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
else \
# x86_64 or others
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
fi || \
{ echo "Failed to install ODBC driver"; exit 1; }
# Add dependencies of selenium
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
unzip /chrome-linux64.zip && \
mv chrome-linux64 /opt/chrome && \
ln -s /opt/chrome/chrome /usr/local/bin/
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
mv chromedriver /usr/local/bin/ && \
rm -f /usr/bin/google-chrome
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi
# builder stage
FROM base AS builder
USER root
WORKDIR /ragflow
# install dependencies from uv.lock file
COPY pyproject.toml uv.lock ./
# https://github.com/astral-sh/uv/issues/10462
# uv records index url into uv.lock but doesn't failover among multiple indexes
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
if [ "$NEED_MIRROR" == "1" ]; then \
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
else \
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
fi; \
if [ "$LIGHTEN" == "1" ]; then \
uv sync --python 3.10 --frozen; \
else \
uv sync --python 3.10 --frozen --all-extras; \
fi
COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
cd web && npm install && npm run build
COPY .git /ragflow/.git
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
if [ "$LIGHTEN" == "1" ]; then \
version_info="$version_info slim"; \
else \
version_info="$version_info full"; \
fi; \
echo "RAGFlow version: $version_info"; \
echo $version_info > /ragflow/VERSION
# production stage
FROM base AS production
USER root
WORKDIR /ragflow
# Copy Python environment and packages
ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV PYTHONPATH=/ragflow/
COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY agentic_reasoning agentic_reasoning
COPY pyproject.toml uv.lock ./
# 复制前端源代码目录
COPY web ./web
# 复制 Docker 相关文件
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
COPY docker/entrypoint.sh docker/entrypoint-parser.sh ./
RUN chmod +x ./entrypoint*.sh
COPY docker/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh
# 重新构建前端应用
# 因为我们覆盖了 /ragflow/web 目录,需要重新生成 /ragflow/web/dist
# 基础镜像 infiniflow/ragflow:v0.17.2 应该已经包含了 Node.js 和 npm
RUN cd web && npm install && npm run build
# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
ENTRYPOINT ["./entrypoint.sh"]

View File

@ -1,10 +0,0 @@
# This builds an image that contains the resources needed by Dockerfile
#
FROM scratch
# Copy resources downloaded via download_deps.py
COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /
COPY nltk_data /nltk_data
COPY huggingface.co /huggingface.co

View File

@ -1,60 +0,0 @@
FROM opencloudos/opencloudos:9.0
USER root
WORKDIR /ragflow
RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
bash ~/miniconda.sh -b -p /root/miniconda3 && \
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
ENV PATH /root/miniconda3/bin:$PATH
RUN conda create -y --name py11 python=3.11
ENV CONDA_DEFAULT_ENV py11
ENV CONDA_PREFIX /root/miniconda3/envs/py11
ENV PATH $CONDA_PREFIX/bin:$PATH
# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash -
RUN dnf install -y nodejs
RUN dnf install -y nginx
ADD ./web ./web
ADD ./api ./api
ADD ./docs ./docs
ADD ./conf ./conf
ADD ./deepdoc ./deepdoc
ADD ./rag ./rag
ADD ./requirements.txt ./requirements.txt
ADD ./agent ./agent
ADD ./graphrag ./graphrag
RUN dnf install -y openmpi openmpi-devel python3-openmpi
ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH
ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
RUN cd ./web && npm i && npm run build
RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5
RUN conda run -n py11 pip install redis
RUN dnf update -y && \
dnf install -y glib2 mesa-libGL && \
dnf clean all
RUN conda run -n py11 pip install ollama
RUN conda run -n py11 python -m nltk.downloader punkt
RUN conda run -n py11 python -m nltk.downloader wordnet
ENV PYTHONPATH=/ragflow/
ENV HF_ENDPOINT=https://hf-mirror.com
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
ADD docker/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]

1
bulid_order.txt Normal file
View File

@ -0,0 +1 @@
docker build -t zstar1003/ragflowplus:v0.2.0 .

View File

@ -71,10 +71,10 @@ REDIS_PORT=6379
# The password for Redis.
REDIS_PASSWORD=infini_rag_flow
# The port used to expose RAGFlow's HTTP API service to the host machine,
SVR_HTTP_PORT=9380
RAGFLOW_IMAGE=infiniflow/ragflow:v0.17.2
# 镜像名称变更为zstar1003/ragflowplus:v0.2.0
RAGFLOW_IMAGE=zstar1003/ragflowplus:v0.2.0
# The local time zone.
TIMEZONE='Asia/Shanghai'

View File

@ -7,7 +7,7 @@ services:
mysql:
condition: service_healthy
image: ${RAGFLOW_IMAGE}
container_name: ragflow-server
container_name: ragflowplus-server
ports:
- ${SVR_HTTP_PORT}:9380
- 80:80
@ -25,14 +25,13 @@ services:
networks:
- ragflow
restart: on-failure
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
extra_hosts:
- "host.docker.internal:host-gateway"
# 新增加的用户后台信息管理系统
frontend:
image: zstar1003/ragflowplus-management-web:v0.1.2
# 后台信息管理系统前端
management-frontend:
container_name: ragflowplus-management-frontend
image: zstar1003/ragflowplus-management-web:v0.2.0
build:
context: .
dockerfile: Dockerfile
@ -40,27 +39,91 @@ services:
ports:
- "8888:80"
depends_on:
- backend
- management-backend
environment:
- API_BASE_URL=/api
networks:
- ragflow
backend:
image: zstar1003/ragflowplus-management-server:v0.1.2
# 后台信息管理系统后端
management-backend:
container_name: ragflowplus-management-backend
image: zstar1003/ragflowplus-management-server:v0.2.0
build:
context: .
dockerfile: Dockerfile
target: backend
ports:
- "5000:5000"
volumes:
- ./magic-pdf.json:/root/magic-pdf.json
depends_on:
mysql:
condition: service_healthy
es01:
condition: service_healthy
environment:
- FLASK_ENV=development
- CORS_ALLOWED_ORIGINS=http://frontend
- CORS_ALLOWED_ORIGINS=http://management-frontend
- MANAGEMENT_ADMIN_USERNAME=${MANAGEMENT_ADMIN_USERNAME:-admin}
- MANAGEMENT_ADMIN_PASSWORD=${MANAGEMENT_ADMIN_PASSWORD:-12345678}
- MANAGEMENT_JWT_SECRET=${MANAGEMENT_JWT_SECRET:-12345678}
extra_hosts:
- "host.docker.internal:host-gateway"
networks:
- ragflow
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
# vllm 嵌入模型
vllm-bge:
container_name: vllm-bge
image: vllm/vllm-openai:latest
ipc: host
volumes:
- ./models/bge-m3:/models
command: [
"--model", "/models",
"--served-model-name", "bge-m3",
"--dtype", "float16",
"--gpu-memory-utilization", "0.9",
]
ports:
- "8000:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
networks:
- ragflow
# vllm 语言模型
vllm-deepseek:
container_name: vllm-deepseek
image: vllm/vllm-openai:latest
ipc: host
volumes:
- ./models/DeepSeek-R1-1.5B:/models
command: [
"--model", "/models",
"--served-model-name", "deepseek-r1",
"--dtype", "float16",
"--tensor-parallel-size", "1",
"--max-model-len", "4096"
]
ports:
- "8001:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
networks:
- ragflow

52
docker/magic-pdf.json Normal file
View File

@ -0,0 +1,52 @@
{
"bucket_info": {
"bucket-name-1": [
"ak",
"sk",
"endpoint"
],
"bucket-name-2": [
"ak",
"sk",
"endpoint"
]
},
"models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
"layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
"device-mode": "cpu",
"layout-config": {
"model": "doclayout_yolo"
},
"formula-config": {
"mfd_model": "yolo_v8_mfd",
"mfr_model": "unimernet_small",
"enable": true
},
"table-config": {
"model": "rapid_table",
"sub_model": "slanet_plus",
"enable": true,
"max_time": 400
},
"llm-aided-config": {
"formula_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"text_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"title_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-32b-instruct",
"enable": false
}
},
"config_version": "1.2.0"
}

View File

@ -1,57 +0,0 @@
#!/usr/bin/env python3
# PEP 723 metadata
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "huggingface-hub",
# "nltk",
# ]
# ///
from huggingface_hub import snapshot_download
import nltk
import os
import urllib.request
urls = [
"http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb",
"http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb",
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar",
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5",
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
"https://bit.ly/chrome-linux64-121-0-6167-85",
"https://bit.ly/chromedriver-linux64-121-0-6167-85",
]
repos = [
"InfiniFlow/text_concat_xgb_v1.0",
"InfiniFlow/deepdoc",
"InfiniFlow/huqie",
"BAAI/bge-large-zh-v1.5",
"BAAI/bge-reranker-v2-m3",
"maidalun1020/bce-embedding-base_v1",
"maidalun1020/bce-reranker-base_v1",
]
def download_model(repo_id):
local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
os.makedirs(local_dir, exist_ok=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
if __name__ == "__main__":
for url in urls:
filename = url.split("/")[-1]
print(f"Downloading {url}...")
if not os.path.exists(filename):
urllib.request.urlretrieve(url, filename)
local_dir = os.path.abspath('nltk_data')
for data in ['wordnet', 'punkt', 'punkt_tab']:
print(f"Downloading nltk {data}...")
nltk.download(data, download_dir=local_dir)
for repo_id in repos:
print(f"Downloading huggingface repo {repo_id}...")
download_model(repo_id)

3
management/.dockerignore Normal file
View File

@ -0,0 +1,3 @@
node_modules
web/node_modules
pnpm-lock.yaml

View File

@ -1,13 +1,23 @@
# 前端构建阶段
FROM node:18 AS frontend-builder
WORKDIR /app/frontend
COPY web /app/frontend
# 安装 pnpm
RUN npm install -g pnpm
# 先复制依赖配置,利用 Docker 缓存机制
COPY web/package.json web/pnpm-lock.yaml ./
# 安装 pnpm固定版本以避免兼容性问题
RUN npm install -g pnpm@8
# 设置环境变量禁用交互式提示
ENV CI=true
# 安装依赖并构建
RUN pnpm i && pnpm build
# 安装依赖(这时 node_modules 会在容器里生成,而不是从宿主机复制)
RUN pnpm install
# 再复制其余源码(避免把宿主机的 node_modules 带进来)
COPY web .
# 构建产物
RUN pnpm build && chmod -R 755 dist
# 前端服务阶段
FROM nginx:alpine AS frontend
COPY nginx.conf /etc/nginx/conf.d/default.conf
@ -29,6 +39,13 @@ COPY server/requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
# 复制后端代码
COPY server /app
# 创建 huggingface 缓存目录并复制模型
RUN mkdir -p /root/.cache/huggingface/hub/
COPY models--opendatalab--PDF-Extract-Kit-1.0 /root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0
COPY models--hantian--layoutreader /root/.cache/huggingface/hub/models--hantian--layoutreader
# COPY magic-pdf.json /root/magic-pdf.json
# 暴露后端端口
EXPOSE 5000
CMD ["python", "app.py"]

View File

@ -1,6 +1,7 @@
services:
frontend:
image: zstar1003/ragflowplus-management-web:v0.1.2
management-frontend:
container_name: ragflowplus-management-frontend
image: zstar1003/ragflowplus-management-web:v0.2.0
build:
context: .
dockerfile: Dockerfile
@ -8,14 +9,15 @@ services:
ports:
- "8888:80"
depends_on:
- backend
- management-backend
environment:
- API_BASE_URL=/api
networks:
- management_network
backend:
image: zstar1003/ragflowplus-management-server:v0.1.2
management-backend:
container_name: ragflowplus-management-backend
image: zstar1003/ragflowplus-management-server:v0.2.0
build:
context: .
dockerfile: Dockerfile

52
management/magic-pdf.json Normal file
View File

@ -0,0 +1,52 @@
{
"bucket_info": {
"bucket-name-1": [
"ak",
"sk",
"endpoint"
],
"bucket-name-2": [
"ak",
"sk",
"endpoint"
]
},
"models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
"layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
"device-mode": "cpu",
"layout-config": {
"model": "doclayout_yolo"
},
"formula-config": {
"mfd_model": "yolo_v8_mfd",
"mfr_model": "unimernet_small",
"enable": true
},
"table-config": {
"model": "rapid_table",
"sub_model": "slanet_plus",
"enable": true,
"max_time": 400
},
"llm-aided-config": {
"formula_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"text_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-7b-instruct",
"enable": false
},
"title_aided": {
"api_key": "your_api_key",
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
"model": "qwen2.5-32b-instruct",
"enable": false
}
},
"config_version": "1.2.0"
}

View File

@ -12,8 +12,8 @@ server {
}
location /api/ {
# 将所有以/api/开头的请求转发到后端服务(backend容器的5000端口)
proxy_pass http://backend:5000/api/;
# 将所有以/api/开头的请求转发到后端服务(management-backend容器的5000端口)
proxy_pass http://management-backend:5000/api/;
# 设置代理请求头
proxy_set_header Host $host; # 保留原始请求的Host头
# 传递客户端真实IP

View File

@ -23,7 +23,7 @@ def is_running_in_docker():
# 根据运行环境选择合适的主机地址
DB_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
MINIO_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
ES_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
ES_HOST = 'es01' if is_running_in_docker() else 'localhost'
# 数据库连接配置
DB_CONFIG = {

View File

@ -8,4 +8,7 @@ PyJWT==2.10.1
dotenv==0.9.9
magic-pdf[full]==1.3.0
transformers==4.49.0
elasticsearch==8.12.0
elasticsearch==8.12.0
minio==7.2.4
strenum==0.4.15
peewee==3.17.1

File diff suppressed because it is too large Load Diff

View File

@ -87,6 +87,6 @@ declare global {
// for type re-export
declare global {
// @ts-ignore
export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
export type { Component, Slot, Slots, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
import('vue')
}

View File

@ -8,16 +8,12 @@ export {}
/* prettier-ignore */
declare module 'vue' {
export interface GlobalComponents {
ElAlert: typeof import('element-plus/es')['ElAlert']
ElAside: typeof import('element-plus/es')['ElAside']
ElAvatar: typeof import('element-plus/es')['ElAvatar']
ElBacktop: typeof import('element-plus/es')['ElBacktop']
ElBadge: typeof import('element-plus/es')['ElBadge']
ElBreadcrumb: typeof import('element-plus/es')['ElBreadcrumb']
ElBreadcrumbItem: typeof import('element-plus/es')['ElBreadcrumbItem']
ElButton: typeof import('element-plus/es')['ElButton']
ElCard: typeof import('element-plus/es')['ElCard']
ElConfigProvider: typeof import('element-plus/es')['ElConfigProvider']
ElContainer: typeof import('element-plus/es')['ElContainer']
ElDialog: typeof import('element-plus/es')['ElDialog']
ElDivider: typeof import('element-plus/es')['ElDivider']
@ -25,36 +21,17 @@ declare module 'vue' {
ElDropdown: typeof import('element-plus/es')['ElDropdown']
ElDropdownItem: typeof import('element-plus/es')['ElDropdownItem']
ElDropdownMenu: typeof import('element-plus/es')['ElDropdownMenu']
ElEmpty: typeof import('element-plus/es')['ElEmpty']
ElForm: typeof import('element-plus/es')['ElForm']
ElFormItem: typeof import('element-plus/es')['ElFormItem']
ElHeader: typeof import('element-plus/es')['ElHeader']
ElIcon: typeof import('element-plus/es')['ElIcon']
ElInput: typeof import('element-plus/es')['ElInput']
ElMain: typeof import('element-plus/es')['ElMain']
ElMenu: typeof import('element-plus/es')['ElMenu']
ElMenuItem: typeof import('element-plus/es')['ElMenuItem']
ElOption: typeof import('element-plus/es')['ElOption']
ElPagination: typeof import('element-plus/es')['ElPagination']
ElPopover: typeof import('element-plus/es')['ElPopover']
ElProgress: typeof import('element-plus/es')['ElProgress']
ElRadio: typeof import('element-plus/es')['ElRadio']
ElRadioGroup: typeof import('element-plus/es')['ElRadioGroup']
ElScrollbar: typeof import('element-plus/es')['ElScrollbar']
ElSelect: typeof import('element-plus/es')['ElSelect']
ElSubMenu: typeof import('element-plus/es')['ElSubMenu']
ElSwitch: typeof import('element-plus/es')['ElSwitch']
ElTable: typeof import('element-plus/es')['ElTable']
ElTableColumn: typeof import('element-plus/es')['ElTableColumn']
ElTabPane: typeof import('element-plus/es')['ElTabPane']
ElTabs: typeof import('element-plus/es')['ElTabs']
ElTag: typeof import('element-plus/es')['ElTag']
ElTooltip: typeof import('element-plus/es')['ElTooltip']
ElUpload: typeof import('element-plus/es')['ElUpload']
RouterLink: typeof import('vue-router')['RouterLink']
RouterView: typeof import('vue-router')['RouterView']
}
export interface ComponentCustomProperties {
vLoading: typeof import('element-plus/es')['ElLoadingDirective']
}
}

View File

@ -1,140 +0,0 @@
[project]
name = "ragflow"
version = "0.17.2"
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
authors = [
{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }
]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10,<3.13"
dependencies = [
"datrie==0.8.2",
"akshare>=1.15.78,<2.0.0",
"azure-storage-blob==12.22.0",
"azure-identity==1.17.1",
"azure-storage-file-datalake==12.16.0",
"anthropic==0.34.1",
"arxiv==2.1.3",
"aspose-slides>=24.9.0,<25.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
"beartype>=0.18.5,<0.19.0",
"bio==1.7.1",
"blinker==1.7.0",
"boto3==1.34.140",
"botocore==1.34.140",
"cachetools==5.3.3",
"chardet==5.2.0",
"cn2an==0.5.22",
"cohere==5.6.2",
"Crawl4AI==0.3.8",
"dashscope==1.20.11",
"deepl==1.18.0",
"demjson3==3.0.6",
"discord-py==2.3.2",
"duckduckgo-search>=7.2.0,<8.0.0",
"editdistance==0.8.1",
"elastic-transport==8.12.0",
"elasticsearch==8.12.1",
"elasticsearch-dsl==8.12.0",
"filelock==3.15.4",
"flask==3.0.3",
"flask-cors==5.0.0",
"flask-login==0.6.3",
"flask-session==0.8.0",
"google-search-results==2.4.2",
"groq==0.9.0",
"hanziconv==0.3.2",
"html-text==0.6.2",
"httpx==0.27.0",
"huggingface-hub>=0.25.0,<0.26.0",
"infinity-sdk==0.6.0-dev3",
"infinity-emb>=0.0.66,<0.0.67",
"itsdangerous==2.1.2",
"json-repair==0.35.0",
"markdown==3.6",
"markdown-to-json==2.1.1",
"minio==7.2.4",
"mistralai==0.4.2",
"nltk==3.9.1",
"numpy>=1.26.0,<2.0.0",
"ollama==0.2.1",
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"openai==1.45.0",
"opencv-python==4.10.0.84",
"opencv-python-headless==4.10.0.84",
"openpyxl>=3.1.0,<4.0.0",
"ormsgpack==1.5.0",
"pandas>=2.2.0,<3.0.0",
"pdfplumber==0.10.4",
"peewee==3.17.1",
"pillow==10.4.0",
"protobuf==5.27.2",
"psycopg2-binary==2.9.9",
"pyclipper==1.3.0.post5",
"pycryptodomex==3.20.0",
"pypdf>=5.0.0,<6.0.0",
"pytest>=8.3.0,<9.0.0",
"python-dotenv==1.0.1",
"python-dateutil==2.8.2",
"python-pptx>=1.0.2,<2.0.0",
"pywencai==0.12.2",
"qianfan==0.4.6",
"ranx==0.3.20",
"readability-lxml==0.8.1",
"valkey==6.0.2",
"requests==2.32.2",
"replicate==0.31.0",
"roman-numbers==1.0.2",
"ruamel-base==1.0.0",
"scholarly==1.7.11",
"scikit-learn==1.5.0",
"selenium==4.22.0",
"selenium-wire==5.1.0",
"setuptools>=75.2.0,<76.0.0",
"shapely==2.0.5",
"six==1.16.0",
"strenum==0.4.15",
"tabulate==0.9.0",
"tavily-python==0.5.1",
"tencentcloud-sdk-python==3.0.1215",
"tika==2.6.0",
"tiktoken==0.7.0",
"umap_learn==0.5.6",
"vertexai==1.64.0",
"volcengine==1.0.146",
"voyageai==0.2.3",
"webdriver-manager==4.0.1",
"werkzeug==3.0.6",
"wikipedia==1.4.0",
"word2number==1.1",
"xgboost==1.5.0",
"xpinyin==0.7.6",
"yfinance==0.1.96",
"zhipuai==2.0.1",
"ruamel-yaml>=0.18.6,<0.19.0",
"google-generativeai>=0.8.1,<0.9.0",
"python-docx>=1.1.2,<2.0.0",
"pypdf2>=3.0.1,<4.0.0",
"graspologic>=3.4.1,<4.0.0",
"pymysql>=1.1.1,<2.0.0",
"mini-racer>=0.12.4,<0.13.0",
"pyodbc>=5.2.0,<6.0.0",
"pyicu>=2.13.1,<3.0.0",
"flasgger>=0.9.7.1,<0.10.0",
"xxhash>=3.5.0,<4.0.0",
"trio>=0.29.0",
]
[project.optional-dependencies]
full = [
"bcembedding==0.1.5",
"fastembed>=0.3.6,<0.4.0; sys_platform == 'darwin' or platform_machine != 'x86_64'",
"fastembed-gpu>=0.3.6,<0.4.0; sys_platform != 'darwin' and platform_machine == 'x86_64'",
"flagembedding==1.2.10",
"torch>=2.5.0,<3.0.0",
"transformers>=4.35.0,<5.0.0"
]
[[tool.uv.index]]
url = "https://mirrors.aliyun.com/pypi/simple"

6716
uv.lock

File diff suppressed because it is too large Load Diff