Compare commits

...

10 Commits

Author SHA1 Message Date
zstar ae462bc9dc chore: 上传源码部署缺失的trie文件 2025-07-10 15:34:28 +08:00
zstar c4ad60528c
Merge pull request #191 from xinsenyan/correct_userid_to_string
修改userid为字符
2025-07-08 17:35:38 +08:00
taku ca0948d382 修改userid要求为字符 2025-07-08 17:26:30 +08:00
zstar 0a61254d65
Merge pull request #190 from xinsenyan/main
优化逻辑与去除冗余代码
2025-07-07 21:50:47 +08:00
Taku eaa4ad1aa2
Update service.py
优化删除文档的数据库查询语句
2025-07-07 21:16:08 +08:00
Taku 84a16792d7
Update document_parser.py
删除没用到的数据库连接
2025-07-07 21:12:49 +08:00
Taku 5825b76756
Update __init__.py
防止解析时不显示日志
2025-07-07 21:09:21 +08:00
zstar b5463870ff chores(knowledgebases): 清理相关冗余代码
- 删除了多余的 parse_document 函数
- 重命名 parse_document_async 为 parse_document,作为主要的文档解析接口
2025-06-29 17:35:48 +08:00
zstar 4262a2f829 docs(question): 添加关于后台知识库连接测试的问题和解答 2025-06-23 17:03:38 +08:00
zstar 7c97ad1d6f docs: 添加 ollama 连接问题的解决方案
- 解决了添加 ollama 时无法联通的问题
- 提供了修改配置文件的详细步骤
- 增加了重启服务的说明
2025-06-21 10:25:30 +08:00
13 changed files with 68 additions and 58 deletions

2
.gitignore vendored
View File

@ -19,7 +19,7 @@ Cargo.lock
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
*.trie
.trie
.idea/
.vscode/

View File

@ -70,6 +70,34 @@ deploy:
**回答:** Ragflow原生解析器心跳触发的问题不影响正常使用可忽略官方回答可参考https://github.com/infiniflow/ragflow/issues/6700
## 问题 11为什么添加ollama时无法联通
**回答:** ollama需要预先设置为对所有网络接口开放
修改配置文件:
```bash
vim /etc/systemd/system/ollama.service
```
[Service] 下添加:
```bash
Environment="OLLAMA_HOST=0.0.0.0"
```
重新载入配置文件重启ollama。
```bash
systemctl daemon-reload
systemctl restart ollama
```
## 问题 12在后台知识库连接测试中显示 text-embedding-v3无法连通
**回答:** 出现此情况原因是使用 ragflow 创建的初始用户(创建时间最早的用户),默认自带了通义千问的模型配置,可登陆此用户,在模型管理中移除通义千问的模型配置,并添加新的嵌入模型配置,后台会自动读取最新添加的模型配置信息。
嵌入模型仅支持 bge-m3 模型联网API仅支持硅基流动平台可免费调用该嵌入模型https://cloud.siliconflow.cn/i/bjDoFhPf
---

View File

@ -159,24 +159,6 @@ def delete_document(doc_id):
return error_response(str(e))
@knowledgebase_bp.route("/documents/<doc_id>/parse", methods=["POST"])
def parse_document(doc_id):
"""开始解析文档"""
# 处理 OPTIONS 预检请求
if request.method == "OPTIONS":
response = success_response({})
# 添加 CORS 相关头
response.headers.add("Access-Control-Allow-Methods", "POST")
response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
return response
try:
result = KnowledgebaseService.async_parse_document(doc_id)
return success_response(data=result)
except Exception as e:
return error_response(str(e), code=500)
@knowledgebase_bp.route("/documents/<doc_id>/parse/progress", methods=["GET"])
def get_parse_progress(doc_id):
"""获取文档解析进度"""
@ -242,8 +224,8 @@ def set_system_embedding_config_route():
@knowledgebase_bp.route("/documents/<doc_id>/parse", methods=["POST"])
def parse_document_async(doc_id): # 函数名改为 async 以区分
"""开始异步解析单个文档"""
def parse_document(doc_id):
"""开始解析文档"""
if request.method == "OPTIONS":
response = success_response({})
response.headers.add("Access-Control-Allow-Methods", "POST")
@ -251,7 +233,7 @@ def parse_document_async(doc_id): # 函数名改为 async 以区分
return response
try:
result = KnowledgebaseService.parse_document(doc_id) # 调用同步版本
result = KnowledgebaseService.parse_document(doc_id)
if result.get("success"):
return success_response(data={"message": f"文档 {doc_id} 同步解析完成。", "details": result})
else:

View File

@ -1,25 +1,25 @@
from peewee import Model
from typing import Type, TypeVar, Dict, Any
from typing import Any, Dict, Type, TypeVar
from peewee import Model
T = TypeVar("T", bound=Model)
T = TypeVar('T', bound=Model)
class BaseService:
model: Type[T]
@classmethod
def get_by_id(cls, id: str) -> T:
return cls.model.get_by_id(id)
@classmethod
@classmethod
def insert(cls, data: Dict[str, Any]) -> T:
return cls.model.create(**data)
@classmethod
def delete_by_id(cls, id: str) -> int:
return cls.model.delete().where(cls.model.id == id).execute()
@classmethod
def query(cls, **kwargs) -> list[T]:
return list(cls.model.select().where(*[
getattr(cls.model, k) == v for k, v in kwargs.items()
]))
return list(cls.model.select().where(*[getattr(cls.model, k) == v for k, v in kwargs.items()]))

View File

@ -1,21 +1,20 @@
from peewee import *
from .base_service import BaseService
from .models import File2Document
class File2DocumentService(BaseService):
model = File2Document
@classmethod
def create_mapping(cls, file_id: str, document_id: str) -> File2Document:
return cls.insert({
'file_id': file_id,
'document_id': document_id
})
return cls.insert({"file_id": file_id, "document_id": document_id})
@classmethod
def get_by_document_id(cls, document_id: str) -> list[File2Document]:
return cls.query(document_id=document_id)
@classmethod
def get_by_file_id(cls, file_id: str) -> list[File2Document]:
return cls.query(file_id=file_id)
return cls.query(file_id=file_id)

View File

@ -1,3 +1,4 @@
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

View File

@ -484,11 +484,8 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info):
# 4. 更新文本块的图像信息
if image_info_list and chunk_ids_list:
conn = None
cursor = None
try:
conn = get_db_connection()
cursor = conn.cursor()
# 为每个文本块找到最近的图片
for i, chunk_id in enumerate(chunk_ids_list):
@ -516,11 +513,7 @@ def perform_parse(doc_id, doc_info, file_info, embedding_config, kb_info):
except Exception as e:
logger.error(f"[Parser-ERROR] 更新文本块图片关联失败: {e}")
raise Exception(f"[Parser-ERROR] 更新文本块图片关联失败: {e}")
finally:
if cursor:
cursor.close()
if conn:
conn.close()
# 5. 更新最终状态
process_duration = time.time() - start_time

View File

@ -711,12 +711,19 @@ class KnowledgebaseService:
cursor = conn.cursor(dictionary=True)
# 先检查文档是否存在
# check_query = """
# SELECT
# d.kb_id,
# kb.created_by AS tenant_id -- 获取 tenant_id (knowledgebase的创建者)
# FROM document d
# JOIN knowledgebase kb ON d.kb_id = kb.id -- JOIN knowledgebase 表
# WHERE d.id = %s
# """
check_query = """
SELECT
d.kb_id,
kb.created_by AS tenant_id -- 获取 tenant_id (knowledgebase的创建者)
d.created_by AS tenant_id
FROM document d
JOIN knowledgebase kb ON d.kb_id = kb.id -- JOIN knowledgebase
WHERE d.id = %s
"""
cursor.execute(check_query, (doc_id,))
@ -1041,7 +1048,7 @@ class KnowledgebaseService:
SELECT llm_name, api_key, api_base
FROM tenant_llm
WHERE tenant_id = %s AND model_type = 'embedding'
ORDER BY create_time DESC # 如果一个用户可能有多个embedding配置取最早的
ORDER BY create_time DESC
LIMIT 1
"""
cursor.execute(query_embedding_config, (earliest_user_id,))

View File

@ -11,7 +11,7 @@ export function createTableDataApi(data: Tables.CreateOrUpdateTableRequestData)
}
/** 删 */
export function deleteTableDataApi(id: number) {
export function deleteTableDataApi(id: string) {
return request({
url: `api/v1/users/${id}`,
method: "delete"
@ -42,7 +42,7 @@ export function getTableDataApi(params: Tables.TableRequestData) {
* @param password
* @returns BaseResponse
*/
export function resetPasswordApi(userId: number, password: string) {
export function resetPasswordApi(userId: string, password: string) {
return request({
url: `api/v1/users/${userId}/reset-password`,
method: "put",

View File

@ -1,5 +1,5 @@
export interface CreateOrUpdateTableRequestData {
id?: number
id?: string
username: string
email?: string
password?: string
@ -21,7 +21,7 @@ export interface TableRequestData {
}
export interface TableData {
id: number
id: string
username: string
email: string
createTime: string

View File

@ -39,7 +39,7 @@ const formRules: FormRules<CreateOrUpdateTableRequestData> = {
// #region
const resetPasswordDialogVisible = ref<boolean>(false)
const resetPasswordFormRef = ref<FormInstance | null>(null)
const currentUserId = ref<number | undefined>(undefined) // ID
const currentUserId = ref<string | undefined>(undefined) // ID
const resetPasswordFormData = reactive({
password: ""
})

BIN
rag/res/huqie.txt.trie Normal file

Binary file not shown.