refactor(docker): 重构Docker配置以优化镜像构建和管理 (#37)
- 更新.dockerignore和.gitignore文件以排除不必要的文件 - 调整docker-compose.yml中的服务命名和配置 - 优化Dockerfile,简化构建流程并提高缓存利用率
This commit is contained in:
parent
2a7e4f2df1
commit
3faa18126e
|
@ -0,0 +1,2 @@
|
|||
web/node_modules
|
||||
web/package-lock.json
|
|
@ -47,4 +47,7 @@ web/public/logo_secret.svg
|
|||
web/public/logo_old.svg
|
||||
web/public/logo.svg
|
||||
web/src/locales/zh.ts
|
||||
vllm/models
|
||||
vllm/models
|
||||
management/models--opendatalab--PDF-Extract-Kit-1.0
|
||||
management/models--hantian--layoutreader
|
||||
docker/models
|
227
Dockerfile
227
Dockerfile
|
@ -1,211 +1,34 @@
|
|||
# base stage
|
||||
FROM ubuntu:22.04 AS base
|
||||
USER root
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ARG NEED_MIRROR=0
|
||||
ARG LIGHTEN=0
|
||||
ENV LIGHTEN=${LIGHTEN}
|
||||
# 使用 RAGFlow v0.17.2 镜像作为基础
|
||||
FROM infiniflow/ragflow:v0.17.2
|
||||
|
||||
# 设置工作目录(与基础镜像保持一致)
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy models downloaded via download_deps.py
|
||||
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
cp /huggingface.co/InfiniFlow/huqie/huqie.txt.trie /ragflow/rag/res/ && \
|
||||
tar --exclude='.*' -cf - \
|
||||
/huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
|
||||
/huggingface.co/InfiniFlow/deepdoc \
|
||||
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
|
||||
if [ "$LIGHTEN" != "1" ]; then \
|
||||
(tar -cf - \
|
||||
/huggingface.co/BAAI/bge-large-zh-v1.5 \
|
||||
/huggingface.co/BAAI/bge-reranker-v2-m3 \
|
||||
/huggingface.co/maidalun1020/bce-embedding-base_v1 \
|
||||
/huggingface.co/maidalun1020/bce-reranker-base_v1 \
|
||||
| tar -xf - --strip-components=2 -C /root/.ragflow) \
|
||||
fi
|
||||
# 复制 Python 相关代码目录
|
||||
COPY api ./api
|
||||
COPY conf ./conf
|
||||
COPY deepdoc ./deepdoc
|
||||
COPY rag ./rag
|
||||
COPY agent ./agent
|
||||
COPY graphrag ./graphrag
|
||||
COPY agentic_reasoning ./agentic_reasoning
|
||||
|
||||
# https://github.com/chrismattmann/tika-python
|
||||
# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
cp -r /deps/nltk_data /root/ && \
|
||||
cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
|
||||
cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
|
||||
# 复制 Python 依赖定义文件
|
||||
COPY pyproject.toml ./pyproject.toml
|
||||
COPY uv.lock ./uv.lock
|
||||
|
||||
ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Setup apt
|
||||
# Python package and implicit dependencies:
|
||||
# opencv-python: libglib2.0-0 libglx-mesa0 libgl1
|
||||
# aspose-slides: pkg-config libicu-dev libgdiplus libssl1.1_1.1.1f-1ubuntu2_amd64.deb
|
||||
# python-pptx: default-jdk tika-server-standard-3.0.0.jar
|
||||
# selenium: libatk-bridge2.0-0 chrome-linux64-121-0-6167-85
|
||||
# Building C extensions: libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list; \
|
||||
fi; \
|
||||
rm -f /etc/apt/apt.conf.d/docker-clean && \
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
|
||||
chmod 1777 /tmp && \
|
||||
apt update && \
|
||||
apt --no-install-recommends install -y ca-certificates && \
|
||||
apt update && \
|
||||
apt install -y libglib2.0-0 libglx-mesa0 libgl1 && \
|
||||
apt install -y pkg-config libicu-dev libgdiplus && \
|
||||
apt install -y default-jdk && \
|
||||
apt install -y libatk-bridge2.0-0 && \
|
||||
apt install -y libpython3-dev libgtk-4-1 libnss3 xdg-utils libgbm-dev && \
|
||||
apt install -y libjemalloc-dev && \
|
||||
apt install -y python3-pip pipx nginx unzip curl wget git vim less
|
||||
|
||||
RUN if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
pip3 config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
|
||||
pip3 config set global.trusted-host mirrors.aliyun.com; \
|
||||
mkdir -p /etc/uv && \
|
||||
echo "[[index]]" > /etc/uv/uv.toml && \
|
||||
echo 'url = "https://mirrors.aliyun.com/pypi/simple"' >> /etc/uv/uv.toml && \
|
||||
echo "default = true" >> /etc/uv/uv.toml; \
|
||||
fi; \
|
||||
pipx install uv
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
|
||||
# nodejs 12.22 on Ubuntu 22.04 is too old
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
||||
apt purge -y nodejs npm cargo && \
|
||||
apt autoremove -y && \
|
||||
apt update && \
|
||||
apt install -y nodejs
|
||||
|
||||
# A modern version of cargo is needed for the latest version of the Rust compiler.
|
||||
RUN apt update && apt install -y curl build-essential \
|
||||
&& if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
# Use TUNA mirrors for rustup/rust dist files
|
||||
export RUSTUP_DIST_SERVER="https://mirrors.tuna.tsinghua.edu.cn/rustup"; \
|
||||
export RUSTUP_UPDATE_ROOT="https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup"; \
|
||||
echo "Using TUNA mirrors for Rustup."; \
|
||||
fi; \
|
||||
# Force curl to use HTTP/1.1
|
||||
curl --proto '=https' --tlsv1.2 --http1.1 -sSf https://sh.rustup.rs | bash -s -- -y --profile minimal \
|
||||
&& echo 'export PATH="/root/.cargo/bin:${PATH}"' >> /root/.bashrc
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
RUN cargo --version && rustc --version
|
||||
|
||||
# Add msssql ODBC driver
|
||||
# macOS ARM64 environment, install msodbcsql18.
|
||||
# general x86_64 environment, install msodbcsql17.
|
||||
RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
|
||||
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
||||
curl https://packages.microsoft.com/config/ubuntu/22.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
||||
apt update && \
|
||||
arch="$(uname -m)"; \
|
||||
if [ "$arch" = "arm64" ] || [ "$arch" = "aarch64" ]; then \
|
||||
# ARM64 (macOS/Apple Silicon or Linux aarch64)
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql18; \
|
||||
else \
|
||||
# x86_64 or others
|
||||
ACCEPT_EULA=Y apt install -y unixodbc-dev msodbcsql17; \
|
||||
fi || \
|
||||
{ echo "Failed to install ODBC driver"; exit 1; }
|
||||
|
||||
|
||||
|
||||
# Add dependencies of selenium
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
|
||||
unzip /chrome-linux64.zip && \
|
||||
mv chrome-linux64 /opt/chrome && \
|
||||
ln -s /opt/chrome/chrome /usr/local/bin/
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
|
||||
unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
|
||||
mv chromedriver /usr/local/bin/ && \
|
||||
rm -f /usr/bin/google-chrome
|
||||
|
||||
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
|
||||
# aspose-slides on linux/arm64 is unavailable
|
||||
RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
|
||||
if [ "$(uname -m)" = "x86_64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
|
||||
elif [ "$(uname -m)" = "aarch64" ]; then \
|
||||
dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
|
||||
fi
|
||||
|
||||
|
||||
# builder stage
|
||||
FROM base AS builder
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# install dependencies from uv.lock file
|
||||
COPY pyproject.toml uv.lock ./
|
||||
|
||||
# https://github.com/astral-sh/uv/issues/10462
|
||||
# uv records index url into uv.lock but doesn't failover among multiple indexes
|
||||
RUN --mount=type=cache,id=ragflow_uv,target=/root/.cache/uv,sharing=locked \
|
||||
if [ "$NEED_MIRROR" == "1" ]; then \
|
||||
sed -i 's|pypi.org|mirrors.aliyun.com/pypi|g' uv.lock; \
|
||||
else \
|
||||
sed -i 's|mirrors.aliyun.com/pypi|pypi.org|g' uv.lock; \
|
||||
fi; \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
uv sync --python 3.10 --frozen; \
|
||||
else \
|
||||
uv sync --python 3.10 --frozen --all-extras; \
|
||||
fi
|
||||
|
||||
COPY web web
|
||||
COPY docs docs
|
||||
RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
|
||||
cd web && npm install && npm run build
|
||||
|
||||
COPY .git /ragflow/.git
|
||||
|
||||
RUN version_info=$(git describe --tags --match=v* --first-parent --always); \
|
||||
if [ "$LIGHTEN" == "1" ]; then \
|
||||
version_info="$version_info slim"; \
|
||||
else \
|
||||
version_info="$version_info full"; \
|
||||
fi; \
|
||||
echo "RAGFlow version: $version_info"; \
|
||||
echo $version_info > /ragflow/VERSION
|
||||
|
||||
# production stage
|
||||
FROM base AS production
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
# Copy Python environment and packages
|
||||
ENV VIRTUAL_ENV=/ragflow/.venv
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
|
||||
COPY web web
|
||||
COPY api api
|
||||
COPY conf conf
|
||||
COPY deepdoc deepdoc
|
||||
COPY rag rag
|
||||
COPY agent agent
|
||||
COPY graphrag graphrag
|
||||
COPY agentic_reasoning agentic_reasoning
|
||||
COPY pyproject.toml uv.lock ./
|
||||
# 复制前端源代码目录
|
||||
COPY web ./web
|
||||
|
||||
# 复制 Docker 相关文件
|
||||
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
||||
COPY docker/entrypoint.sh docker/entrypoint-parser.sh ./
|
||||
RUN chmod +x ./entrypoint*.sh
|
||||
COPY docker/entrypoint.sh ./entrypoint.sh
|
||||
|
||||
RUN chmod +x ./entrypoint.sh
|
||||
|
||||
# 重新构建前端应用
|
||||
# 因为我们覆盖了 /ragflow/web 目录,需要重新生成 /ragflow/web/dist
|
||||
# 基础镜像 infiniflow/ragflow:v0.17.2 应该已经包含了 Node.js 和 npm
|
||||
RUN cd web && npm install && npm run build
|
||||
|
||||
# Copy compiled web pages
|
||||
COPY --from=builder /ragflow/web/dist /ragflow/web/dist
|
||||
|
||||
COPY --from=builder /ragflow/VERSION /ragflow/VERSION
|
||||
ENTRYPOINT ["./entrypoint.sh"]
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
# This builds an image that contains the resources needed by Dockerfile
|
||||
#
|
||||
FROM scratch
|
||||
|
||||
# Copy resources downloaded via download_deps.py
|
||||
COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /
|
||||
|
||||
COPY nltk_data /nltk_data
|
||||
|
||||
COPY huggingface.co /huggingface.co
|
|
@ -1,60 +0,0 @@
|
|||
FROM opencloudos/opencloudos:9.0
|
||||
USER root
|
||||
|
||||
WORKDIR /ragflow
|
||||
|
||||
RUN dnf update -y && dnf install -y wget curl gcc-c++ openmpi-devel
|
||||
|
||||
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
|
||||
bash ~/miniconda.sh -b -p /root/miniconda3 && \
|
||||
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
|
||||
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
|
||||
echo "conda activate base" >> ~/.bashrc
|
||||
|
||||
ENV PATH /root/miniconda3/bin:$PATH
|
||||
|
||||
RUN conda create -y --name py11 python=3.11
|
||||
|
||||
ENV CONDA_DEFAULT_ENV py11
|
||||
ENV CONDA_PREFIX /root/miniconda3/envs/py11
|
||||
ENV PATH $CONDA_PREFIX/bin:$PATH
|
||||
|
||||
# RUN curl -sL https://rpm.nodesource.com/setup_14.x | bash -
|
||||
RUN dnf install -y nodejs
|
||||
|
||||
RUN dnf install -y nginx
|
||||
|
||||
ADD ./web ./web
|
||||
ADD ./api ./api
|
||||
ADD ./docs ./docs
|
||||
ADD ./conf ./conf
|
||||
ADD ./deepdoc ./deepdoc
|
||||
ADD ./rag ./rag
|
||||
ADD ./requirements.txt ./requirements.txt
|
||||
ADD ./agent ./agent
|
||||
ADD ./graphrag ./graphrag
|
||||
|
||||
RUN dnf install -y openmpi openmpi-devel python3-openmpi
|
||||
ENV C_INCLUDE_PATH /usr/include/openmpi-x86_64:$C_INCLUDE_PATH
|
||||
ENV LD_LIBRARY_PATH /usr/lib64/openmpi/lib:$LD_LIBRARY_PATH
|
||||
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
|
||||
RUN cd ./web && npm i && npm run build
|
||||
RUN conda run -n py11 pip install $(grep -ivE "mpi4py" ./requirements.txt) # without mpi4py==3.1.5
|
||||
RUN conda run -n py11 pip install redis
|
||||
|
||||
RUN dnf update -y && \
|
||||
dnf install -y glib2 mesa-libGL && \
|
||||
dnf clean all
|
||||
|
||||
RUN conda run -n py11 pip install ollama
|
||||
RUN conda run -n py11 python -m nltk.downloader punkt
|
||||
RUN conda run -n py11 python -m nltk.downloader wordnet
|
||||
|
||||
ENV PYTHONPATH=/ragflow/
|
||||
ENV HF_ENDPOINT=https://hf-mirror.com
|
||||
|
||||
COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
|
||||
ADD docker/entrypoint.sh ./entrypoint.sh
|
||||
RUN chmod +x ./entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["./entrypoint.sh"]
|
|
@ -0,0 +1 @@
|
|||
docker build -t zstar1003/ragflowplus:v0.2.0 .
|
|
@ -71,10 +71,10 @@ REDIS_PORT=6379
|
|||
# The password for Redis.
|
||||
REDIS_PASSWORD=infini_rag_flow
|
||||
|
||||
# The port used to expose RAGFlow's HTTP API service to the host machine,
|
||||
SVR_HTTP_PORT=9380
|
||||
|
||||
RAGFLOW_IMAGE=infiniflow/ragflow:v0.17.2
|
||||
# 镜像名称变更为zstar1003/ragflowplus:v0.2.0
|
||||
RAGFLOW_IMAGE=zstar1003/ragflowplus:v0.2.0
|
||||
|
||||
# The local time zone.
|
||||
TIMEZONE='Asia/Shanghai'
|
||||
|
|
|
@ -7,7 +7,7 @@ services:
|
|||
mysql:
|
||||
condition: service_healthy
|
||||
image: ${RAGFLOW_IMAGE}
|
||||
container_name: ragflow-server
|
||||
container_name: ragflowplus-server
|
||||
ports:
|
||||
- ${SVR_HTTP_PORT}:9380
|
||||
- 80:80
|
||||
|
@ -25,14 +25,13 @@ services:
|
|||
networks:
|
||||
- ragflow
|
||||
restart: on-failure
|
||||
# https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration
|
||||
# If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container.
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
||||
# 新增加的用户后台信息管理系统
|
||||
frontend:
|
||||
image: zstar1003/ragflowplus-management-web:v0.1.2
|
||||
# 后台信息管理系统前端
|
||||
management-frontend:
|
||||
container_name: ragflowplus-management-frontend
|
||||
image: zstar1003/ragflowplus-management-web:v0.2.0
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
|
@ -40,27 +39,91 @@ services:
|
|||
ports:
|
||||
- "8888:80"
|
||||
depends_on:
|
||||
- backend
|
||||
- management-backend
|
||||
environment:
|
||||
- API_BASE_URL=/api
|
||||
networks:
|
||||
- ragflow
|
||||
|
||||
backend:
|
||||
image: zstar1003/ragflowplus-management-server:v0.1.2
|
||||
|
||||
# 后台信息管理系统后端
|
||||
management-backend:
|
||||
container_name: ragflowplus-management-backend
|
||||
image: zstar1003/ragflowplus-management-server:v0.2.0
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
target: backend
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
- ./magic-pdf.json:/root/magic-pdf.json
|
||||
depends_on:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
es01:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- FLASK_ENV=development
|
||||
- CORS_ALLOWED_ORIGINS=http://frontend
|
||||
- CORS_ALLOWED_ORIGINS=http://management-frontend
|
||||
- MANAGEMENT_ADMIN_USERNAME=${MANAGEMENT_ADMIN_USERNAME:-admin}
|
||||
- MANAGEMENT_ADMIN_PASSWORD=${MANAGEMENT_ADMIN_PASSWORD:-12345678}
|
||||
- MANAGEMENT_JWT_SECRET=${MANAGEMENT_JWT_SECRET:-12345678}
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
networks:
|
||||
- ragflow
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
capabilities: [gpu]
|
||||
|
||||
# vllm 嵌入模型
|
||||
vllm-bge:
|
||||
container_name: vllm-bge
|
||||
image: vllm/vllm-openai:latest
|
||||
ipc: host
|
||||
volumes:
|
||||
- ./models/bge-m3:/models
|
||||
command: [
|
||||
"--model", "/models",
|
||||
"--served-model-name", "bge-m3",
|
||||
"--dtype", "float16",
|
||||
"--gpu-memory-utilization", "0.9",
|
||||
]
|
||||
ports:
|
||||
- "8000:8000"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- ragflow
|
||||
|
||||
# vllm 语言模型
|
||||
vllm-deepseek:
|
||||
container_name: vllm-deepseek
|
||||
image: vllm/vllm-openai:latest
|
||||
ipc: host
|
||||
volumes:
|
||||
- ./models/DeepSeek-R1-1.5B:/models
|
||||
command: [
|
||||
"--model", "/models",
|
||||
"--served-model-name", "deepseek-r1",
|
||||
"--dtype", "float16",
|
||||
"--tensor-parallel-size", "1",
|
||||
"--max-model-len", "4096"
|
||||
]
|
||||
ports:
|
||||
- "8001:8000"
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- ragflow
|
|
@ -0,0 +1,52 @@
|
|||
{
|
||||
"bucket_info": {
|
||||
"bucket-name-1": [
|
||||
"ak",
|
||||
"sk",
|
||||
"endpoint"
|
||||
],
|
||||
"bucket-name-2": [
|
||||
"ak",
|
||||
"sk",
|
||||
"endpoint"
|
||||
]
|
||||
},
|
||||
"models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
|
||||
"layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
|
||||
"device-mode": "cpu",
|
||||
"layout-config": {
|
||||
"model": "doclayout_yolo"
|
||||
},
|
||||
"formula-config": {
|
||||
"mfd_model": "yolo_v8_mfd",
|
||||
"mfr_model": "unimernet_small",
|
||||
"enable": true
|
||||
},
|
||||
"table-config": {
|
||||
"model": "rapid_table",
|
||||
"sub_model": "slanet_plus",
|
||||
"enable": true,
|
||||
"max_time": 400
|
||||
},
|
||||
"llm-aided-config": {
|
||||
"formula_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-7b-instruct",
|
||||
"enable": false
|
||||
},
|
||||
"text_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-7b-instruct",
|
||||
"enable": false
|
||||
},
|
||||
"title_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-32b-instruct",
|
||||
"enable": false
|
||||
}
|
||||
},
|
||||
"config_version": "1.2.0"
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# PEP 723 metadata
|
||||
# /// script
|
||||
# requires-python = ">=3.10"
|
||||
# dependencies = [
|
||||
# "huggingface-hub",
|
||||
# "nltk",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from huggingface_hub import snapshot_download
|
||||
import nltk
|
||||
import os
|
||||
import urllib.request
|
||||
|
||||
urls = [
|
||||
"http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb",
|
||||
"http://ports.ubuntu.com/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_arm64.deb",
|
||||
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar",
|
||||
"https://repo1.maven.org/maven2/org/apache/tika/tika-server-standard/3.0.0/tika-server-standard-3.0.0.jar.md5",
|
||||
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
|
||||
"https://bit.ly/chrome-linux64-121-0-6167-85",
|
||||
"https://bit.ly/chromedriver-linux64-121-0-6167-85",
|
||||
]
|
||||
|
||||
repos = [
|
||||
"InfiniFlow/text_concat_xgb_v1.0",
|
||||
"InfiniFlow/deepdoc",
|
||||
"InfiniFlow/huqie",
|
||||
"BAAI/bge-large-zh-v1.5",
|
||||
"BAAI/bge-reranker-v2-m3",
|
||||
"maidalun1020/bce-embedding-base_v1",
|
||||
"maidalun1020/bce-reranker-base_v1",
|
||||
]
|
||||
|
||||
def download_model(repo_id):
|
||||
local_dir = os.path.abspath(os.path.join("huggingface.co", repo_id))
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
snapshot_download(repo_id=repo_id, local_dir=local_dir, local_dir_use_symlinks=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for url in urls:
|
||||
filename = url.split("/")[-1]
|
||||
print(f"Downloading {url}...")
|
||||
if not os.path.exists(filename):
|
||||
urllib.request.urlretrieve(url, filename)
|
||||
|
||||
local_dir = os.path.abspath('nltk_data')
|
||||
for data in ['wordnet', 'punkt', 'punkt_tab']:
|
||||
print(f"Downloading nltk {data}...")
|
||||
nltk.download(data, download_dir=local_dir)
|
||||
|
||||
for repo_id in repos:
|
||||
print(f"Downloading huggingface repo {repo_id}...")
|
||||
download_model(repo_id)
|
|
@ -0,0 +1,3 @@
|
|||
node_modules
|
||||
web/node_modules
|
||||
pnpm-lock.yaml
|
|
@ -1,13 +1,23 @@
|
|||
# 前端构建阶段
|
||||
FROM node:18 AS frontend-builder
|
||||
WORKDIR /app/frontend
|
||||
COPY web /app/frontend
|
||||
# 安装 pnpm
|
||||
RUN npm install -g pnpm
|
||||
|
||||
# 先复制依赖配置,利用 Docker 缓存机制
|
||||
COPY web/package.json web/pnpm-lock.yaml ./
|
||||
|
||||
# 安装 pnpm(固定版本以避免兼容性问题)
|
||||
RUN npm install -g pnpm@8
|
||||
# 设置环境变量禁用交互式提示
|
||||
ENV CI=true
|
||||
# 安装依赖并构建
|
||||
RUN pnpm i && pnpm build
|
||||
# 安装依赖(这时 node_modules 会在容器里生成,而不是从宿主机复制)
|
||||
RUN pnpm install
|
||||
|
||||
# 再复制其余源码(避免把宿主机的 node_modules 带进来)
|
||||
COPY web .
|
||||
|
||||
# 构建产物
|
||||
RUN pnpm build && chmod -R 755 dist
|
||||
|
||||
# 前端服务阶段
|
||||
FROM nginx:alpine AS frontend
|
||||
COPY nginx.conf /etc/nginx/conf.d/default.conf
|
||||
|
@ -29,6 +39,13 @@ COPY server/requirements.txt /app/
|
|||
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
# 复制后端代码
|
||||
COPY server /app
|
||||
|
||||
# 创建 huggingface 缓存目录并复制模型
|
||||
RUN mkdir -p /root/.cache/huggingface/hub/
|
||||
COPY models--opendatalab--PDF-Extract-Kit-1.0 /root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0
|
||||
COPY models--hantian--layoutreader /root/.cache/huggingface/hub/models--hantian--layoutreader
|
||||
# COPY magic-pdf.json /root/magic-pdf.json
|
||||
|
||||
# 暴露后端端口
|
||||
EXPOSE 5000
|
||||
CMD ["python", "app.py"]
|
|
@ -1,6 +1,7 @@
|
|||
services:
|
||||
frontend:
|
||||
image: zstar1003/ragflowplus-management-web:v0.1.2
|
||||
management-frontend:
|
||||
container_name: ragflowplus-management-frontend
|
||||
image: zstar1003/ragflowplus-management-web:v0.2.0
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
|
@ -8,14 +9,15 @@ services:
|
|||
ports:
|
||||
- "8888:80"
|
||||
depends_on:
|
||||
- backend
|
||||
- management-backend
|
||||
environment:
|
||||
- API_BASE_URL=/api
|
||||
networks:
|
||||
- management_network
|
||||
|
||||
backend:
|
||||
image: zstar1003/ragflowplus-management-server:v0.1.2
|
||||
management-backend:
|
||||
container_name: ragflowplus-management-backend
|
||||
image: zstar1003/ragflowplus-management-server:v0.2.0
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
{
|
||||
"bucket_info": {
|
||||
"bucket-name-1": [
|
||||
"ak",
|
||||
"sk",
|
||||
"endpoint"
|
||||
],
|
||||
"bucket-name-2": [
|
||||
"ak",
|
||||
"sk",
|
||||
"endpoint"
|
||||
]
|
||||
},
|
||||
"models-dir": "/root/.cache/huggingface/hub/models--opendatalab--PDF-Extract-Kit-1.0/snapshots/782e787d46ed9b52253af6c1f69cdfcc76583e8d/models",
|
||||
"layoutreader-model-dir": "/root/.cache/huggingface/hub/models--hantian--layoutreader/snapshots/641226775a0878b1014a96ad01b9642915136853",
|
||||
"device-mode": "cpu",
|
||||
"layout-config": {
|
||||
"model": "doclayout_yolo"
|
||||
},
|
||||
"formula-config": {
|
||||
"mfd_model": "yolo_v8_mfd",
|
||||
"mfr_model": "unimernet_small",
|
||||
"enable": true
|
||||
},
|
||||
"table-config": {
|
||||
"model": "rapid_table",
|
||||
"sub_model": "slanet_plus",
|
||||
"enable": true,
|
||||
"max_time": 400
|
||||
},
|
||||
"llm-aided-config": {
|
||||
"formula_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-7b-instruct",
|
||||
"enable": false
|
||||
},
|
||||
"text_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-7b-instruct",
|
||||
"enable": false
|
||||
},
|
||||
"title_aided": {
|
||||
"api_key": "your_api_key",
|
||||
"base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
"model": "qwen2.5-32b-instruct",
|
||||
"enable": false
|
||||
}
|
||||
},
|
||||
"config_version": "1.2.0"
|
||||
}
|
|
@ -12,8 +12,8 @@ server {
|
|||
}
|
||||
|
||||
location /api/ {
|
||||
# 将所有以/api/开头的请求转发到后端服务(backend容器的5000端口)
|
||||
proxy_pass http://backend:5000/api/;
|
||||
# 将所有以/api/开头的请求转发到后端服务(management-backend容器的5000端口)
|
||||
proxy_pass http://management-backend:5000/api/;
|
||||
# 设置代理请求头
|
||||
proxy_set_header Host $host; # 保留原始请求的Host头
|
||||
# 传递客户端真实IP
|
||||
|
|
|
@ -23,7 +23,7 @@ def is_running_in_docker():
|
|||
# 根据运行环境选择合适的主机地址
|
||||
DB_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
|
||||
MINIO_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
|
||||
ES_HOST = 'host.docker.internal' if is_running_in_docker() else 'localhost'
|
||||
ES_HOST = 'es01' if is_running_in_docker() else 'localhost'
|
||||
|
||||
# 数据库连接配置
|
||||
DB_CONFIG = {
|
||||
|
|
|
@ -8,4 +8,7 @@ PyJWT==2.10.1
|
|||
dotenv==0.9.9
|
||||
magic-pdf[full]==1.3.0
|
||||
transformers==4.49.0
|
||||
elasticsearch==8.12.0
|
||||
elasticsearch==8.12.0
|
||||
minio==7.2.4
|
||||
strenum==0.4.15
|
||||
peewee==3.17.1
|
File diff suppressed because it is too large
Load Diff
|
@ -87,6 +87,6 @@ declare global {
|
|||
// for type re-export
|
||||
declare global {
|
||||
// @ts-ignore
|
||||
export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
|
||||
export type { Component, Slot, Slots, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue'
|
||||
import('vue')
|
||||
}
|
||||
|
|
|
@ -8,16 +8,12 @@ export {}
|
|||
/* prettier-ignore */
|
||||
declare module 'vue' {
|
||||
export interface GlobalComponents {
|
||||
ElAlert: typeof import('element-plus/es')['ElAlert']
|
||||
ElAside: typeof import('element-plus/es')['ElAside']
|
||||
ElAvatar: typeof import('element-plus/es')['ElAvatar']
|
||||
ElBacktop: typeof import('element-plus/es')['ElBacktop']
|
||||
ElBadge: typeof import('element-plus/es')['ElBadge']
|
||||
ElBreadcrumb: typeof import('element-plus/es')['ElBreadcrumb']
|
||||
ElBreadcrumbItem: typeof import('element-plus/es')['ElBreadcrumbItem']
|
||||
ElButton: typeof import('element-plus/es')['ElButton']
|
||||
ElCard: typeof import('element-plus/es')['ElCard']
|
||||
ElConfigProvider: typeof import('element-plus/es')['ElConfigProvider']
|
||||
ElContainer: typeof import('element-plus/es')['ElContainer']
|
||||
ElDialog: typeof import('element-plus/es')['ElDialog']
|
||||
ElDivider: typeof import('element-plus/es')['ElDivider']
|
||||
|
@ -25,36 +21,17 @@ declare module 'vue' {
|
|||
ElDropdown: typeof import('element-plus/es')['ElDropdown']
|
||||
ElDropdownItem: typeof import('element-plus/es')['ElDropdownItem']
|
||||
ElDropdownMenu: typeof import('element-plus/es')['ElDropdownMenu']
|
||||
ElEmpty: typeof import('element-plus/es')['ElEmpty']
|
||||
ElForm: typeof import('element-plus/es')['ElForm']
|
||||
ElFormItem: typeof import('element-plus/es')['ElFormItem']
|
||||
ElHeader: typeof import('element-plus/es')['ElHeader']
|
||||
ElIcon: typeof import('element-plus/es')['ElIcon']
|
||||
ElInput: typeof import('element-plus/es')['ElInput']
|
||||
ElMain: typeof import('element-plus/es')['ElMain']
|
||||
ElMenu: typeof import('element-plus/es')['ElMenu']
|
||||
ElMenuItem: typeof import('element-plus/es')['ElMenuItem']
|
||||
ElOption: typeof import('element-plus/es')['ElOption']
|
||||
ElPagination: typeof import('element-plus/es')['ElPagination']
|
||||
ElPopover: typeof import('element-plus/es')['ElPopover']
|
||||
ElProgress: typeof import('element-plus/es')['ElProgress']
|
||||
ElRadio: typeof import('element-plus/es')['ElRadio']
|
||||
ElRadioGroup: typeof import('element-plus/es')['ElRadioGroup']
|
||||
ElScrollbar: typeof import('element-plus/es')['ElScrollbar']
|
||||
ElSelect: typeof import('element-plus/es')['ElSelect']
|
||||
ElSubMenu: typeof import('element-plus/es')['ElSubMenu']
|
||||
ElSwitch: typeof import('element-plus/es')['ElSwitch']
|
||||
ElTable: typeof import('element-plus/es')['ElTable']
|
||||
ElTableColumn: typeof import('element-plus/es')['ElTableColumn']
|
||||
ElTabPane: typeof import('element-plus/es')['ElTabPane']
|
||||
ElTabs: typeof import('element-plus/es')['ElTabs']
|
||||
ElTag: typeof import('element-plus/es')['ElTag']
|
||||
ElTooltip: typeof import('element-plus/es')['ElTooltip']
|
||||
ElUpload: typeof import('element-plus/es')['ElUpload']
|
||||
RouterLink: typeof import('vue-router')['RouterLink']
|
||||
RouterView: typeof import('vue-router')['RouterView']
|
||||
}
|
||||
export interface ComponentCustomProperties {
|
||||
vLoading: typeof import('element-plus/es')['ElLoadingDirective']
|
||||
}
|
||||
}
|
||||
|
|
140
pyproject.toml
140
pyproject.toml
|
@ -1,140 +0,0 @@
|
|||
[project]
|
||||
name = "ragflow"
|
||||
version = "0.17.2"
|
||||
description = "[RAGFlow](https://ragflow.io/) is an open-source RAG (Retrieval-Augmented Generation) engine based on deep document understanding. It offers a streamlined RAG workflow for businesses of any scale, combining LLM (Large Language Models) to provide truthful question-answering capabilities, backed by well-founded citations from various complex formatted data."
|
||||
authors = [
|
||||
{ name = "Zhichang Yu", email = "yuzhichang@gmail.com" }
|
||||
]
|
||||
license = { file = "LICENSE" }
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10,<3.13"
|
||||
dependencies = [
|
||||
"datrie==0.8.2",
|
||||
"akshare>=1.15.78,<2.0.0",
|
||||
"azure-storage-blob==12.22.0",
|
||||
"azure-identity==1.17.1",
|
||||
"azure-storage-file-datalake==12.16.0",
|
||||
"anthropic==0.34.1",
|
||||
"arxiv==2.1.3",
|
||||
"aspose-slides>=24.9.0,<25.0.0; platform_machine == 'x86_64' or (sys_platform == 'darwin' and platform_machine == 'arm64')",
|
||||
"beartype>=0.18.5,<0.19.0",
|
||||
"bio==1.7.1",
|
||||
"blinker==1.7.0",
|
||||
"boto3==1.34.140",
|
||||
"botocore==1.34.140",
|
||||
"cachetools==5.3.3",
|
||||
"chardet==5.2.0",
|
||||
"cn2an==0.5.22",
|
||||
"cohere==5.6.2",
|
||||
"Crawl4AI==0.3.8",
|
||||
"dashscope==1.20.11",
|
||||
"deepl==1.18.0",
|
||||
"demjson3==3.0.6",
|
||||
"discord-py==2.3.2",
|
||||
"duckduckgo-search>=7.2.0,<8.0.0",
|
||||
"editdistance==0.8.1",
|
||||
"elastic-transport==8.12.0",
|
||||
"elasticsearch==8.12.1",
|
||||
"elasticsearch-dsl==8.12.0",
|
||||
"filelock==3.15.4",
|
||||
"flask==3.0.3",
|
||||
"flask-cors==5.0.0",
|
||||
"flask-login==0.6.3",
|
||||
"flask-session==0.8.0",
|
||||
"google-search-results==2.4.2",
|
||||
"groq==0.9.0",
|
||||
"hanziconv==0.3.2",
|
||||
"html-text==0.6.2",
|
||||
"httpx==0.27.0",
|
||||
"huggingface-hub>=0.25.0,<0.26.0",
|
||||
"infinity-sdk==0.6.0-dev3",
|
||||
"infinity-emb>=0.0.66,<0.0.67",
|
||||
"itsdangerous==2.1.2",
|
||||
"json-repair==0.35.0",
|
||||
"markdown==3.6",
|
||||
"markdown-to-json==2.1.1",
|
||||
"minio==7.2.4",
|
||||
"mistralai==0.4.2",
|
||||
"nltk==3.9.1",
|
||||
"numpy>=1.26.0,<2.0.0",
|
||||
"ollama==0.2.1",
|
||||
"onnxruntime==1.19.2; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
||||
"onnxruntime-gpu==1.19.2; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
||||
"openai==1.45.0",
|
||||
"opencv-python==4.10.0.84",
|
||||
"opencv-python-headless==4.10.0.84",
|
||||
"openpyxl>=3.1.0,<4.0.0",
|
||||
"ormsgpack==1.5.0",
|
||||
"pandas>=2.2.0,<3.0.0",
|
||||
"pdfplumber==0.10.4",
|
||||
"peewee==3.17.1",
|
||||
"pillow==10.4.0",
|
||||
"protobuf==5.27.2",
|
||||
"psycopg2-binary==2.9.9",
|
||||
"pyclipper==1.3.0.post5",
|
||||
"pycryptodomex==3.20.0",
|
||||
"pypdf>=5.0.0,<6.0.0",
|
||||
"pytest>=8.3.0,<9.0.0",
|
||||
"python-dotenv==1.0.1",
|
||||
"python-dateutil==2.8.2",
|
||||
"python-pptx>=1.0.2,<2.0.0",
|
||||
"pywencai==0.12.2",
|
||||
"qianfan==0.4.6",
|
||||
"ranx==0.3.20",
|
||||
"readability-lxml==0.8.1",
|
||||
"valkey==6.0.2",
|
||||
"requests==2.32.2",
|
||||
"replicate==0.31.0",
|
||||
"roman-numbers==1.0.2",
|
||||
"ruamel-base==1.0.0",
|
||||
"scholarly==1.7.11",
|
||||
"scikit-learn==1.5.0",
|
||||
"selenium==4.22.0",
|
||||
"selenium-wire==5.1.0",
|
||||
"setuptools>=75.2.0,<76.0.0",
|
||||
"shapely==2.0.5",
|
||||
"six==1.16.0",
|
||||
"strenum==0.4.15",
|
||||
"tabulate==0.9.0",
|
||||
"tavily-python==0.5.1",
|
||||
"tencentcloud-sdk-python==3.0.1215",
|
||||
"tika==2.6.0",
|
||||
"tiktoken==0.7.0",
|
||||
"umap_learn==0.5.6",
|
||||
"vertexai==1.64.0",
|
||||
"volcengine==1.0.146",
|
||||
"voyageai==0.2.3",
|
||||
"webdriver-manager==4.0.1",
|
||||
"werkzeug==3.0.6",
|
||||
"wikipedia==1.4.0",
|
||||
"word2number==1.1",
|
||||
"xgboost==1.5.0",
|
||||
"xpinyin==0.7.6",
|
||||
"yfinance==0.1.96",
|
||||
"zhipuai==2.0.1",
|
||||
"ruamel-yaml>=0.18.6,<0.19.0",
|
||||
"google-generativeai>=0.8.1,<0.9.0",
|
||||
"python-docx>=1.1.2,<2.0.0",
|
||||
"pypdf2>=3.0.1,<4.0.0",
|
||||
"graspologic>=3.4.1,<4.0.0",
|
||||
"pymysql>=1.1.1,<2.0.0",
|
||||
"mini-racer>=0.12.4,<0.13.0",
|
||||
"pyodbc>=5.2.0,<6.0.0",
|
||||
"pyicu>=2.13.1,<3.0.0",
|
||||
"flasgger>=0.9.7.1,<0.10.0",
|
||||
"xxhash>=3.5.0,<4.0.0",
|
||||
"trio>=0.29.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
full = [
|
||||
"bcembedding==0.1.5",
|
||||
"fastembed>=0.3.6,<0.4.0; sys_platform == 'darwin' or platform_machine != 'x86_64'",
|
||||
"fastembed-gpu>=0.3.6,<0.4.0; sys_platform != 'darwin' and platform_machine == 'x86_64'",
|
||||
"flagembedding==1.2.10",
|
||||
"torch>=2.5.0,<3.0.0",
|
||||
"transformers>=4.35.0,<5.0.0"
|
||||
]
|
||||
|
||||
[[tool.uv.index]]
|
||||
url = "https://mirrors.aliyun.com/pypi/simple"
|
Loading…
Reference in New Issue