From 70647d36d5798c26e150dd0a9155e75318aee0d7 Mon Sep 17 00:00:00 2001 From: zstar <65890619+zstar1003@users.noreply.github.com> Date: Sat, 7 Jun 2025 16:14:33 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E4=B8=8A=E8=B0=83TopNItem=20?= =?UTF-8?q?=E7=BB=84=E4=BB=B6=E6=9C=80=E5=A4=A7=E5=80=BC=E4=B8=BA100?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../knowledgebases/document_parser.py | 28 +------------------ web/src/components/top-n-item.tsx | 2 +- 2 files changed, 2 insertions(+), 28 deletions(-) diff --git a/management/server/services/knowledgebases/document_parser.py b/management/server/services/knowledgebases/document_parser.py index 190272c..4951e0f 100644 --- a/management/server/services/knowledgebases/document_parser.py +++ b/management/server/services/knowledgebases/document_parser.py @@ -23,36 +23,10 @@ tknzr = RagTokenizer() def tokenize_text(text): + """使用分词器对文本进行分词""" return tknzr.tokenize(text) -def merge_chunks(sections, chunk_token_num=128, delimiter="\n。;!?"): - """合并文本块,替代naive_merge功能(预留函数)""" - if not sections: - return [] - - chunks = [""] - token_counts = [0] - - for section in sections: - # 计算当前部分的token数量 - text = section[0] if isinstance(section, tuple) else section - - # 简单估算token数量 - token_count = len(text.split()) - - # 如果当前chunk已经超过限制,创建新chunk - if token_counts[-1] > chunk_token_num: - chunks.append(text) - token_counts.append(token_count) - else: - # 否则添加到当前chunk - chunks[-1] += text - token_counts[-1] += token_count - - return chunks - - def _update_document_progress(doc_id, progress=None, message=None, status=None, run=None, chunk_count=None, process_duration=None): """更新数据库中文档的进度和状态""" conn = None diff --git a/web/src/components/top-n-item.tsx b/web/src/components/top-n-item.tsx index 3fd7602..ee1d50c 100644 --- a/web/src/components/top-n-item.tsx +++ b/web/src/components/top-n-item.tsx @@ -19,7 +19,7 @@ interface IProps { max?: number; } -const TopNItem = ({ initialValue = 8, max = 30 }: IProps) => { +const TopNItem = ({ initialValue = 8, max = 100 }: IProps) => { const { t } = useTranslate('chat'); return (