refactor(docker): 重构Docker配置以优化镜像构建和管理 (#37)

- 更新.dockerignore和.gitignore文件以排除不必要的文件 - 调整docker-compose.yml中的服务命名和配置 - 优化Dockerfile，简化构建流程并提高缓存利用率
2025-04-19 20:43:46 +08:00 · 2025-04-19 20:43:46 +08:00 · 3faa18126e
parent 2a7e4f2df1
commit 3faa18126e
22 changed files with 841 additions and 8028 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,2 @@
+web/node_modules
+web/package-lock.json
--- a/.gitignore
+++ b/.gitignore
@ -47,4 +47,7 @@ web/public/logo_secret.svg
 web/public/logo_old.svg
 web/public/logo.svg
 web/src/locales/zh.ts
-vllm/models
+vllm/models
+management/models--opendatalab--PDF-Extract-Kit-1.0
+management/models--hantian--layoutreader
+docker/models
--- a/227
+++ b/227
@ -1,211 +1,34 @@
-# base stage
-FROM ubuntu:22.04 AS base
-USER root
-SHELL ["/bin/bash", "-c"]
-
-ARG NEED_MIRROR=0
-ARG LIGHTEN=0
-ENV LIGHTEN=${LIGHTEN}
+# 使用 RAGFlow v0.17.2 镜像作为基础
+FROM infiniflow/ragflow:v0.17.2

+# 设置工作目录（与基础镜像保持一致）
 WORKDIR /ragflow

-# Copy models downloaded via download_deps.py
-RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
-    cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
-    tar --exclude='.*' -cf - \
-        /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
-        /huggingface.co/InfiniFlow/deepdoc \
-        | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc 
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
-    if [ "$LIGHTEN" != "1" ]; then \
-        (tar -cf - \
-            /huggingface.co/BAAI/bge-large-zh-v1.5 \
-            /huggingface.co/BAAI/bge-reranker-v2-m3 \
-            /huggingface.co/maidalun1020/bce-embedding-base_v1 \
-            /huggingface.co/maidalun1020/bce-reranker-base_v1 \
-            | tar -xf - --strip-components=2 -C /root/.ragflow) \
-    fi
+# 复制 Python 相关代码目录
+COPY api ./api
+COPY conf ./conf
+COPY deepdoc ./deepdoc
+COPY rag ./rag
+COPY agent ./agent
+COPY graphrag ./graphrag
+COPY agentic_reasoning ./agentic_reasoning

-# https://github.com/chrismattmann/tika-python
-# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
-    cp -r /deps/nltk_data /root/ && \
-    cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
-    cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
+# 复制 Python 依赖定义文件
+COPY pyproject.toml ./pyproject.toml
+COPY uv.lock ./uv.lock

-ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
-ENV DEBIAN_FRONTEND=noninteractive
-
-# Setup apt
-# Python package and implicit dependencies:
-# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
-# aspose-slides: pkg-config libicu-dev libgdiplus         libssl1.1_1.1.1f-1ubuntu2_amd64.deb
-# python-pptx:   default-jdk                              tika-server-standard-3.0.0.jar
-# selenium:      libatk-bridge2.0-0                       chrome-linux64-121-0-6167-85
-# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
-RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
-    if [ "$NEED_MIRROR" == "1" ]; then \
-        sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
-    fi; \
-    rm -f /etc/apt/apt.conf.d/docker-clean && \
-    echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
-    chmod 1777 /tmp && \
-    apt update && \
-    apt --no-install-recommends install -y ca-certificates && \
-    apt update && \
-    apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
-    apt install -y pkg-config libicu-dev libgdiplus && \
-    apt install -y default-jdk && \
-    apt install -y libatk-bridge2.0-0 && \
-    apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
-    apt install -y libjemalloc-dev && \
-    apt install -y python3-pip pipx nginx unzip curl wget git vim less
-
-RUN if [ "$NEED_MIRROR" == "1" ]; then \
-        pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
-        pip3 config set global.trusted-host mirrors.aliyun.com; \
-        mkdir -p /etc/uv && \
-        echo "[[index]]" > /etc/uv/uv.toml && \
-        echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
-        echo "default = true" >> /etc/uv/uv.toml; \
-    fi; \
-    pipx install uv
-
-ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
-ENV PATH=/root/.local/bin:$PATH
-
-# nodejs 12.22 on Ubuntu 22.04 is too old
-RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
-    curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
-    apt purge -y nodejs npm cargo && \
-    apt autoremove -y && \
-    apt update && \
-    apt install -y nodejs
-
-# A modern version of cargo is needed for the latest version of the Rust compiler.
-RUN apt update && apt install -y curl build-essential \
-    && if [ "$NEED_MIRROR" == "1" ]; then \
-         # Use TUNA mirrors for rustup/rust dist files
-         export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
-         export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
-         echo "Using TUNA mirrors for Rustup."; \
-       fi; \
-    # Force curl to use HTTP/1.1
-    curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
-    && echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
-
-ENV PATH="/root/.cargo/bin:${PATH}"
-
-RUN cargo --version && rustc --version
-
-# Add msssql ODBC driver
-# macOS ARM64 environment, install msodbcsql18.
-# general x86_64 environment, install msodbcsql17.
-RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
-    curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
-    curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
-    apt update && \
-    arch="$(uname -m)"; \
-    if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
-        # ARM64 (macOS/Apple Silicon or Linux aarch64)
-        ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
-    else \
-        # x86_64 or others
-        ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
-    fi || \
-    { echo "Failed to install ODBC driver"; exit 1; }
-
-
-
-# Add dependencies of selenium
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
-    unzip /chrome-linux64.zip && \
-    mv chrome-linux64 /opt/chrome && \
-    ln -s /opt/chrome/chrome /usr/local/bin/
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
-    unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
-    mv chromedriver /usr/local/bin/ && \
-    rm -f /usr/bin/google-chrome
-
-# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
-# aspose-slides on linux/arm64 is unavailable
-RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
-    if [ "$(uname -m)" = "x86_64" ]; then \
-        dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
-    elif [ "$(uname -m)" = "aarch64" ]; then \
-        dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
-    fi
-
-
-# builder stage
-FROM base AS builder
-USER root
-
-WORKDIR /ragflow
-
-# install dependencies from uv.lock file
-COPY pyproject.toml uv.lock ./
-
-# https://github.com/astral-sh/uv/issues/10462
-# uv records index url into uv.lock but doesn't failover among multiple indexes
-RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
-    if [ "$NEED_MIRROR" == "1" ]; then \
-        sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
-    else \
-        sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
-    fi; \
-    if [ "$LIGHTEN" == "1" ]; then \
-        uv sync --python 3.10 --frozen; \
-    else \
-        uv sync --python 3.10 --frozen --all-extras; \
-    fi
-
-COPY web web
-COPY docs docs
-RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
-    cd web && npm install && npm run build
-
-COPY .git /ragflow/.git
-
-RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
-    if [ "$LIGHTEN" == "1" ]; then \
-        version_info="$version_info slim"; \
-    else \
-        version_info="$version_info full"; \
-    fi; \
-    echo "RAGFlow version: $version_info"; \
-    echo $version_info > /ragflow/VERSION
-
-# production stage
-FROM base AS production
-USER root
-
-WORKDIR /ragflow
-
-# Copy Python environment and packages
-ENV VIRTUAL_ENV=/ragflow/.venv
-COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
-ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
-
-ENV PYTHONPATH=/ragflow/
-
-COPY web web
-COPY api api
-COPY conf conf
-COPY deepdoc deepdoc
-COPY rag rag
-COPY agent agent
-COPY graphrag graphrag
-COPY agentic_reasoning agentic_reasoning
-COPY pyproject.toml uv.lock ./
+# 复制前端源代码目录
+COPY web ./web

+# 复制 Docker 相关文件
 COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
-COPY docker/entrypoint.sh docker/entrypoint-parser.sh ./
-RUN chmod +x ./entrypoint*.sh
+COPY docker/entrypoint.sh ./entrypoint.sh
+
+RUN chmod +x ./entrypoint.sh
+
+# 重新构建前端应用
+# 因为我们覆盖了 /ragflow/web 目录，需要重新生成 /ragflow/web/dist
+# 基础镜像 infiniflow/ragflow:v0.17.2 应该已经包含了 Node.js 和 npm
+RUN cd web && npm install && npm run build

-# Copy compiled web pages
-COPY --from=builder /ragflow/web/dist /ragflow/web/dist

-COPY --from=builder /ragflow/VERSION /ragflow/VERSION
-ENTRYPOINT ["./entrypoint.sh"]
--- a/Dockerfile.deps
+++ b/Dockerfile.deps
@ -1,10 +0,0 @@
-# This builds an image that contains the resources needed by Dockerfile
-#
-FROM scratch
-
-# Copy resources downloaded via download_deps.py
-COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /
-
-COPY nltk_data /nltk_data
-
-COPY huggingface.co /huggingface.co
--- a/Dockerfile.scratch.oc9
+++ b/Dockerfile.scratch.oc9
@ -1,60 +0,0 @@
-FROM opencloudos/opencloudos:9.0
-USER root
-
-WORKDIR /ragflow
-
-RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel
-
-RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
-    bash ~/miniconda.sh -b -p /root/miniconda3 && \
-    rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
-    echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
-    echo "conda activate base" >> ~/.bashrc
-
-ENV PATH /root/miniconda3/bin:$PATH
-
-RUN conda create -y --name py11 python=3.11
-
-ENV CONDA_DEFAULT_ENV py11
-ENV CONDA_PREFIX /root/miniconda3/envs/py11
-ENV PATH $CONDA_PREFIX/bin:$PATH
-
-# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash -
-RUN dnf install -y nodejs
-
-RUN dnf install -y nginx
-
-ADD ./web ./web
-ADD ./api ./api
-ADD ./docs ./docs
-ADD ./conf ./conf
-ADD ./deepdoc ./deepdoc
-ADD ./rag ./rag
-ADD ./requirements.txt ./requirements.txt
-ADD ./agent ./agent
-ADD ./graphrag ./graphrag
-
-RUN dnf install -y openmpi openmpi-devel python3-openmpi
-ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH
-ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
-RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
-RUN cd ./web && npm i && npm run build
-RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5
-RUN conda run -n py11 pip install redis
-
-RUN dnf update -y && \
-    dnf install -y glib2 mesa-libGL && \
-    dnf clean all
-
-RUN conda run -n py11 pip install ollama
-RUN conda run -n py11 python -m nltk.downloader punkt
-RUN conda run -n py11 python -m nltk.downloader wordnet
-
-ENV PYTHONPATH=/ragflow/
-ENV HF_ENDPOINT=https://hf-mirror.com
-
-COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
-ADD docker/entrypoint.sh ./entrypoint.sh
-RUN chmod +x ./entrypoint.sh
-
-ENTRYPOINT ["./entrypoint.sh"]
--- a/bulid_order.txt
+++ b/bulid_order.txt
@ -0,0 +1 @@
+docker build -t zstar1003/ragflowplus:v0.2.0 .
--- a/docker/.env
+++ b/docker/.env
@ -71,10 +71,10 @@ REDIS_PORT=6379
 # The password for Redis.
 REDIS_PASSWORD=infini_rag_flow

-# The port used to expose RAGFlow's HTTP API service to the host machine, 
 SVR_HTTP_PORT=9380

-RAGFLOW_IMAGE=infiniflow/ragflow:v0.17.2
+# 镜像名称变更为zstar1003/ragflowplus:v0.2.0
+RAGFLOW_IMAGE=zstar1003/ragflowplus:v0.2.0

 # The local time zone.
 TIMEZONE='Asia/Shanghai'
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -7,7 +7,7 @@ services:
      mysql:
        condition: service_healthy
    image: ${RAGFLOW_IMAGE}
-    container_name: ragflow-server
+    container_name: ragflowplus-server
    ports:
      - ${SVR_HTTP_PORT}:9380
      - 80:80
@ -25,14 +25,13 @@ services:
    networks:
      - ragflow
    restart: on-failure
-    # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
-    # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
    extra_hosts:
      - "host.docker.internal:host-gateway"

-  # 新增加的用户后台信息管理系统
-  frontend:
-    image: zstar1003/ragflowplus-management-web:v0.1.2
+  # 后台信息管理系统前端
+  management-frontend:
+    container_name: ragflowplus-management-frontend
+    image: zstar1003/ragflowplus-management-web:v0.2.0
    build:
      context: .
      dockerfile: Dockerfile
@ -40,27 +39,91 @@ services:
    ports:
      - "8888:80"
    depends_on:
-      - backend
+      - management-backend
    environment:
      - API_BASE_URL=/api 
    networks:
      - ragflow
-
-  backend:
-    image: zstar1003/ragflowplus-management-server:v0.1.2
+      
+  # 后台信息管理系统后端
+  management-backend:
+    container_name: ragflowplus-management-backend
+    image: zstar1003/ragflowplus-management-server:v0.2.0
    build:
      context: .
      dockerfile: Dockerfile
      target: backend
    ports:
      - "5000:5000"
+    volumes:
+      - ./magic-pdf.json:/root/magic-pdf.json
+    depends_on: 
+      mysql:
+        condition: service_healthy
+      es01:
+        condition: service_healthy 
    environment:
      - FLASK_ENV=development
-      - CORS_ALLOWED_ORIGINS=http://frontend
+      - CORS_ALLOWED_ORIGINS=http://management-frontend
      - MANAGEMENT_ADMIN_USERNAME=${MANAGEMENT_ADMIN_USERNAME:-admin}
      - MANAGEMENT_ADMIN_PASSWORD=${MANAGEMENT_ADMIN_PASSWORD:-12345678}
      - MANAGEMENT_JWT_SECRET=${MANAGEMENT_JWT_SECRET:-12345678}
    extra_hosts:
      - "host.docker.internal:host-gateway"
+    networks:
+      - ragflow
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
+
+  # vllm 嵌入模型
+  vllm-bge:
+    container_name: vllm-bge
+    image: vllm/vllm-openai:latest
+    ipc: host
+    volumes:
+      - ./models/bge-m3:/models
+    command: [
+      "--model", "/models",
+      "--served-model-name", "bge-m3",
+      "--dtype", "float16",
+      "--gpu-memory-utilization", "0.9",
+    ]
+    ports:
+      - "8000:8000"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
+    networks:
+      - ragflow
+
+  # vllm 语言模型
+  vllm-deepseek:
+    container_name: vllm-deepseek
+    image: vllm/vllm-openai:latest
+    ipc: host
+    volumes:
+      - ./models/DeepSeek-R1-1.5B:/models
+    command: [
+      "--model", "/models",
+      "--served-model-name", "deepseek-r1",
+      "--dtype", "float16",
+      "--tensor-parallel-size", "1",
+      "--max-model-len", "4096"
+    ]
+    ports:
+      - "8001:8000"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
    networks:
      - ragflow
--- a/docker/magic-pdf.json
+++ b/docker/magic-pdf.json
@ -0,0 +1,52 @@
+{
+    "bucket_info": {
+        "bucket-name-1": [
+            "ak",
+            "sk",
+            "endpoint"
+        ],
+        "bucket-name-2": [
+            "ak",
+            "sk",
+            "endpoint"
+        ]
+    },
+    "models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
+    "layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
+    "device-mode": "cpu",
+    "layout-config": {
+        "model": "doclayout_yolo"
+    },
+    "formula-config": {
+        "mfd_model": "yolo_v8_mfd",
+        "mfr_model": "unimernet_small",
+        "enable": true
+    },
+    "table-config": {
+        "model": "rapid_table",
+        "sub_model": "slanet_plus",
+        "enable": true,
+        "max_time": 400
+    },
+    "llm-aided-config": {
+        "formula_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "text_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "title_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-32b-instruct",
+            "enable": false
+        }
+    },
+    "config_version": "1.2.0"
+}
--- a/download_deps.py
+++ b/download_deps.py
@ -1,57 +0,0 @@
-#!/usr/bin/env python3
-
-# PEP 723 metadata
-# /// script
-# requires-python = ">=3.10"
-# dependencies = [
-#   "huggingface-hub",
-#   "nltk",
-# ]
-# ///
-
-from huggingface_hub import snapshot_download
-import nltk
-import os
-import urllib.request
-
-urls = [
-    "http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb",
-    "http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb",
-    "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar",
-    "https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5",
-    "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
-    "https://bit.ly/chrome-linux64-121-0-6167-85",
-    "https://bit.ly/chromedriver-linux64-121-0-6167-85",
-]
-
-repos = [
-    "InfiniFlow/text_concat_xgb_v1.0",
-    "InfiniFlow/deepdoc",
-    "InfiniFlow/huqie",
-    "BAAI/bge-large-zh-v1.5",
-    "BAAI/bge-reranker-v2-m3",
-    "maidalun1020/bce-embedding-base_v1",
-    "maidalun1020/bce-reranker-base_v1",
-]
-
-def download_model(repo_id):
-    local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
-    os.makedirs(local_dir, exist_ok=True)
-    snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
-
-
-if __name__ == "__main__":
-    for url in urls:
-        filename = url.split("/")[-1]
-        print(f"Downloading {url}...")
-        if not os.path.exists(filename):
-            urllib.request.urlretrieve(url, filename)
-
-    local_dir = os.path.abspath('nltk_data')
-    for data in ['wordnet', 'punkt', 'punkt_tab']:
-        print(f"Downloading nltk {data}...")
-        nltk.download(data, download_dir=local_dir)
-
-    for repo_id in repos:
-        print(f"Downloading huggingface repo {repo_id}...")
-        download_model(repo_id)
--- a/management/.dockerignore
+++ b/management/.dockerignore
@ -0,0 +1,3 @@
+node_modules
+web/node_modules
+pnpm-lock.yaml
--- a/management/Dockerfile
+++ b/management/Dockerfile
@ -1,13 +1,23 @@
 # 前端构建阶段
 FROM node:18 AS frontend-builder
 WORKDIR /app/frontend
-COPY web /app/frontend
-# 安装 pnpm
-RUN npm install -g pnpm
+
+# 先复制依赖配置，利用 Docker 缓存机制
+COPY web/package.json web/pnpm-lock.yaml ./
+
+# 安装 pnpm（固定版本以避免兼容性问题）
+RUN npm install -g pnpm@8
 # 设置环境变量禁用交互式提示
 ENV CI=true
-# 安装依赖并构建
-RUN pnpm i && pnpm build
+# 安装依赖（这时 node_modules 会在容器里生成，而不是从宿主机复制）
+RUN pnpm install
+
+# 再复制其余源码（避免把宿主机的 node_modules 带进来）
+COPY web .
+
+# 构建产物
+RUN pnpm build && chmod -R 755 dist
+
 # 前端服务阶段
 FROM nginx:alpine AS frontend
 COPY nginx.conf /etc/nginx/conf.d/default.conf
@ -29,6 +39,13 @@ COPY server/requirements.txt /app/
 RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 # 复制后端代码
 COPY server /app
+
+# 创建 huggingface 缓存目录并复制模型
+RUN mkdir -p /root/.cache/huggingface/hub/
+COPY models--opendatalab--PDF-Extract-Kit-1.0 /root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0
+COPY models--hantian--layoutreader /root/.cache/huggingface/hub/models--hantian--layoutreader
+# COPY magic-pdf.json /root/magic-pdf.json
+
 # 暴露后端端口
 EXPOSE 5000
 CMD ["python", "app.py"]
--- a/management/docker-compose.yml
+++ b/management/docker-compose.yml
@ -1,6 +1,7 @@
 services:
-  frontend:
-    image: zstar1003/ragflowplus-management-web:v0.1.2
+  management-frontend:
+    container_name: ragflowplus-management-frontend
+    image: zstar1003/ragflowplus-management-web:v0.2.0
    build:
      context: .
      dockerfile: Dockerfile
@ -8,14 +9,15 @@ services:
    ports:
      - "8888:80"
    depends_on:
-      - backend
+      - management-backend
    environment:
      - API_BASE_URL=/api 
    networks:
      - management_network

-  backend:
-    image: zstar1003/ragflowplus-management-server:v0.1.2
+  management-backend:
+    container_name: ragflowplus-management-backend
+    image: zstar1003/ragflowplus-management-server:v0.2.0
    build:
      context: .
      dockerfile: Dockerfile
--- a/management/magic-pdf.json
+++ b/management/magic-pdf.json
@ -0,0 +1,52 @@
+{
+    "bucket_info": {
+        "bucket-name-1": [
+            "ak",
+            "sk",
+            "endpoint"
+        ],
+        "bucket-name-2": [
+            "ak",
+            "sk",
+            "endpoint"
+        ]
+    },
+    "models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
+    "layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
+    "device-mode": "cpu",
+    "layout-config": {
+        "model": "doclayout_yolo"
+    },
+    "formula-config": {
+        "mfd_model": "yolo_v8_mfd",
+        "mfr_model": "unimernet_small",
+        "enable": true
+    },
+    "table-config": {
+        "model": "rapid_table",
+        "sub_model": "slanet_plus",
+        "enable": true,
+        "max_time": 400
+    },
+    "llm-aided-config": {
+        "formula_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "text_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-7b-instruct",
+            "enable": false
+        },
+        "title_aided": {
+            "api_key": "your_api_key",
+            "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+            "model": "qwen2.5-32b-instruct",
+            "enable": false
+        }
+    },
+    "config_version": "1.2.0"
+}
--- a/management/nginx.conf
+++ b/management/nginx.conf
@ -12,8 +12,8 @@ server {
    }

    location /api/ {
-        # 将所有以/api/开头的请求转发到后端服务(backend容器的5000端口)
-        proxy_pass http://backend:5000/api/;
+        # 将所有以/api/开头的请求转发到后端服务(management-backend容器的5000端口)
+        proxy_pass http://management-backend:5000/api/;
        # 设置代理请求头
        proxy_set_header Host $host;  # 保留原始请求的Host头
        # 传递客户端真实IP
--- a/management/server/database.py
+++ b/management/server/database.py
@ -23,7 +23,7 @@ def is_running_in_docker():
 # 根据运行环境选择合适的主机地址
 DB_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
 MINIO_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
-ES_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
+ES_HOST = 'es01' if is_running_in_docker() else 'localhost'

 # 数据库连接配置
 DB_CONFIG = {
--- a/management/server/requirements.txt
+++ b/management/server/requirements.txt
@ -8,4 +8,7 @@ PyJWT==2.10.1
 dotenv==0.9.9
 magic-pdf[full]==1.3.0
 transformers==4.49.0
-elasticsearch==8.12.0
+elasticsearch==8.12.0
+minio==7.2.4
+strenum==0.4.15
+peewee==3.17.1
--- a/management/web/pnpm-lock.yaml
+++ b/management/web/pnpm-lock.yaml
--- a/management/web/types/auto/auto-imports.d.ts
+++ b/management/web/types/auto/auto-imports.d.ts
@ -87,6 +87,6 @@ declare global {
 // for type re-export
 declare global {
  // @ts-ignore
-  export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
+  export type { Component, Slot, Slots, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
  import('vue')
 }
--- a/management/web/types/auto/components.d.ts
+++ b/management/web/types/auto/components.d.ts
@ -8,16 +8,12 @@ export {}
 /* prettier-ignore */
 declare module 'vue' {
  export interface GlobalComponents {
-    ElAlert: typeof import('element-plus/es')['ElAlert']
    ElAside: typeof import('element-plus/es')['ElAside']
    ElAvatar: typeof import('element-plus/es')['ElAvatar']
    ElBacktop: typeof import('element-plus/es')['ElBacktop']
-    ElBadge: typeof import('element-plus/es')['ElBadge']
    ElBreadcrumb: typeof import('element-plus/es')['ElBreadcrumb']
    ElBreadcrumbItem: typeof import('element-plus/es')['ElBreadcrumbItem']
    ElButton: typeof import('element-plus/es')['ElButton']
-    ElCard: typeof import('element-plus/es')['ElCard']
-    ElConfigProvider: typeof import('element-plus/es')['ElConfigProvider']
    ElContainer: typeof import('element-plus/es')['ElContainer']
    ElDialog: typeof import('element-plus/es')['ElDialog']
    ElDivider: typeof import('element-plus/es')['ElDivider']
@ -25,36 +21,17 @@ declare module 'vue' {
    ElDropdown: typeof import('element-plus/es')['ElDropdown']
    ElDropdownItem: typeof import('element-plus/es')['ElDropdownItem']
    ElDropdownMenu: typeof import('element-plus/es')['ElDropdownMenu']
-    ElEmpty: typeof import('element-plus/es')['ElEmpty']
-    ElForm: typeof import('element-plus/es')['ElForm']
-    ElFormItem: typeof import('element-plus/es')['ElFormItem']
    ElHeader: typeof import('element-plus/es')['ElHeader']
    ElIcon: typeof import('element-plus/es')['ElIcon']
-    ElInput: typeof import('element-plus/es')['ElInput']
    ElMain: typeof import('element-plus/es')['ElMain']
    ElMenu: typeof import('element-plus/es')['ElMenu']
    ElMenuItem: typeof import('element-plus/es')['ElMenuItem']
-    ElOption: typeof import('element-plus/es')['ElOption']
-    ElPagination: typeof import('element-plus/es')['ElPagination']
-    ElPopover: typeof import('element-plus/es')['ElPopover']
    ElProgress: typeof import('element-plus/es')['ElProgress']
-    ElRadio: typeof import('element-plus/es')['ElRadio']
-    ElRadioGroup: typeof import('element-plus/es')['ElRadioGroup']
    ElScrollbar: typeof import('element-plus/es')['ElScrollbar']
-    ElSelect: typeof import('element-plus/es')['ElSelect']
    ElSubMenu: typeof import('element-plus/es')['ElSubMenu']
    ElSwitch: typeof import('element-plus/es')['ElSwitch']
-    ElTable: typeof import('element-plus/es')['ElTable']
-    ElTableColumn: typeof import('element-plus/es')['ElTableColumn']
-    ElTabPane: typeof import('element-plus/es')['ElTabPane']
-    ElTabs: typeof import('element-plus/es')['ElTabs']
-    ElTag: typeof import('element-plus/es')['ElTag']
    ElTooltip: typeof import('element-plus/es')['ElTooltip']
-    ElUpload: typeof import('element-plus/es')['ElUpload']
    RouterLink: typeof import('vue-router')['RouterLink']
    RouterView: typeof import('vue-router')['RouterView']
  }
-  export interface ComponentCustomProperties {
-    vLoading: typeof import('element-plus/es')['ElLoadingDirective']
-  }
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,140 +0,0 @@
-[project]
-name = "ragflow"
-version = "0.17.2"
-description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
-authors = [
-    { name = "Zhichang Yu", email = "yuzhichang@gmail.com" }
-]
-license = { file = "LICENSE" }
-readme = "README.md"
-requires-python = ">=3.10,<3.13"
-dependencies = [
-    "datrie==0.8.2",
-    "akshare>=1.15.78,<2.0.0",
-    "azure-storage-blob==12.22.0",
-    "azure-identity==1.17.1",
-    "azure-storage-file-datalake==12.16.0",
-    "anthropic==0.34.1",
-    "arxiv==2.1.3",
-    "aspose-slides>=24.9.0,<25.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
-    "beartype>=0.18.5,<0.19.0",
-    "bio==1.7.1",
-    "blinker==1.7.0",
-    "boto3==1.34.140",
-    "botocore==1.34.140",
-    "cachetools==5.3.3",
-    "chardet==5.2.0",
-    "cn2an==0.5.22",
-    "cohere==5.6.2",
-    "Crawl4AI==0.3.8",
-    "dashscope==1.20.11",
-    "deepl==1.18.0",
-    "demjson3==3.0.6",
-    "discord-py==2.3.2",
-    "duckduckgo-search>=7.2.0,<8.0.0",
-    "editdistance==0.8.1",
-    "elastic-transport==8.12.0",
-    "elasticsearch==8.12.1",
-    "elasticsearch-dsl==8.12.0",
-    "filelock==3.15.4",
-    "flask==3.0.3",
-    "flask-cors==5.0.0",
-    "flask-login==0.6.3",
-    "flask-session==0.8.0",
-    "google-search-results==2.4.2",
-    "groq==0.9.0",
-    "hanziconv==0.3.2",
-    "html-text==0.6.2",
-    "httpx==0.27.0",
-    "huggingface-hub>=0.25.0,<0.26.0",
-    "infinity-sdk==0.6.0-dev3",
-    "infinity-emb>=0.0.66,<0.0.67",
-    "itsdangerous==2.1.2",
-    "json-repair==0.35.0",
-    "markdown==3.6",
-    "markdown-to-json==2.1.1",
-    "minio==7.2.4",
-    "mistralai==0.4.2",
-    "nltk==3.9.1",
-    "numpy>=1.26.0,<2.0.0",
-    "ollama==0.2.1",
-    "onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
-    "onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
-    "openai==1.45.0",
-    "opencv-python==4.10.0.84",
-    "opencv-python-headless==4.10.0.84",
-    "openpyxl>=3.1.0,<4.0.0",
-    "ormsgpack==1.5.0",
-    "pandas>=2.2.0,<3.0.0",
-    "pdfplumber==0.10.4",
-    "peewee==3.17.1",
-    "pillow==10.4.0",
-    "protobuf==5.27.2",
-    "psycopg2-binary==2.9.9",
-    "pyclipper==1.3.0.post5",
-    "pycryptodomex==3.20.0",
-    "pypdf>=5.0.0,<6.0.0",
-    "pytest>=8.3.0,<9.0.0",
-    "python-dotenv==1.0.1",
-    "python-dateutil==2.8.2",
-    "python-pptx>=1.0.2,<2.0.0",
-    "pywencai==0.12.2",
-    "qianfan==0.4.6",
-    "ranx==0.3.20",
-    "readability-lxml==0.8.1",
-    "valkey==6.0.2",
-    "requests==2.32.2",
-    "replicate==0.31.0",
-    "roman-numbers==1.0.2",
-    "ruamel-base==1.0.0",
-    "scholarly==1.7.11",
-    "scikit-learn==1.5.0",
-    "selenium==4.22.0",
-    "selenium-wire==5.1.0",
-    "setuptools>=75.2.0,<76.0.0",
-    "shapely==2.0.5",
-    "six==1.16.0",
-    "strenum==0.4.15",
-    "tabulate==0.9.0",
-    "tavily-python==0.5.1",
-    "tencentcloud-sdk-python==3.0.1215",
-    "tika==2.6.0",
-    "tiktoken==0.7.0",
-    "umap_learn==0.5.6",
-    "vertexai==1.64.0",
-    "volcengine==1.0.146",
-    "voyageai==0.2.3",
-    "webdriver-manager==4.0.1",
-    "werkzeug==3.0.6",
-    "wikipedia==1.4.0",
-    "word2number==1.1",
-    "xgboost==1.5.0",
-    "xpinyin==0.7.6",
-    "yfinance==0.1.96",
-    "zhipuai==2.0.1",
-    "ruamel-yaml>=0.18.6,<0.19.0",
-    "google-generativeai>=0.8.1,<0.9.0",
-    "python-docx>=1.1.2,<2.0.0",
-    "pypdf2>=3.0.1,<4.0.0",
-    "graspologic>=3.4.1,<4.0.0",
-    "pymysql>=1.1.1,<2.0.0",
-    "mini-racer>=0.12.4,<0.13.0",
-    "pyodbc>=5.2.0,<6.0.0",
-    "pyicu>=2.13.1,<3.0.0",
-    "flasgger>=0.9.7.1,<0.10.0",
-    "xxhash>=3.5.0,<4.0.0",
-    "trio>=0.29.0",
-]
-
-[project.optional-dependencies]
-full = [
-    "bcembedding==0.1.5",
-    "fastembed>=0.3.6,<0.4.0; sys_platform == 'darwin' or platform_machine != 'x86_64'",
-    "fastembed-gpu>=0.3.6,<0.4.0; sys_platform != 'darwin' and platform_machine == 'x86_64'",
-    "flagembedding==1.2.10",
-    "torch>=2.5.0,<3.0.0",
-    "transformers>=4.35.0,<5.0.0"
-]
-
-[[tool.uv.index]]
-url = "https://mirrors.aliyun.com/pypi/simple"
--- a/uv.lock
+++ b/uv.lock
				`@ -0,0 +1 @@`
				`docker build -t zstar1003/ragflowplus:v0.2.0 .`