From ab4d2da1cfc292a184d09b8c097d57d6001e36d5 Mon Sep 17 00:00:00 2001 From: Huang ShaoHui <163737696+xinsenyan@users.noreply.github.com> Date: Mon, 9 Jun 2025 17:14:39 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=A7=A3=E6=9E=90excel?= =?UTF-8?q?=E7=9A=84=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 原版解析excel函数只能读取sheet1,而且对于合并的单元格只有第一个单元格有数据,其他的为non --- .../services/knowledgebases/excel_parser.py | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py index 608ff22..cbc8168 100644 --- a/management/server/services/knowledgebases/excel_parser.py +++ b/management/server/services/knowledgebases/excel_parser.py @@ -1,20 +1,32 @@ import pandas as pd -def parse_excel(file_path): - # 读取Excel文件 - df = pd.read_excel(file_path) - # 获取表头 - headers = df.columns.tolist() - blocks = [] - - for _, row in df.iterrows(): - # 构建HTML表格 - html_table = "{}{}
".format("".join(f"{col}" for col in headers), "".join(f"{row[col]}" for col in headers)) - block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0} - - blocks.append(block) +def parse_excel(file_path): + # 读取所有工作表 + all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet + blocks = [] + + for sheet_name, df in all_sheets.items(): + df = df.fillna(method='ffill')#填充合并的单元格 + headers = df.columns.tolist() + + for _, row in df.iterrows(): + html_table = "{}{}
".format( + "".join(f"{col}" for col in headers), + "".join(f"{row[col]}" for col in headers) + ) + print(row['测试分类']) + block = { + "type": "table", + "img_path": "", + "table_caption": [f"Sheet: {sheet_name}"], + "table_footnote": [], + "table_body": f"{html_table}", + "page_idx": 0 + } + blocks.append(block) + return blocks From d5fbd8d6206e44e459de433acbe2b5386911c369 Mon Sep 17 00:00:00 2001 From: zstar <65890619+zstar1003@users.noreply.github.com> Date: Mon, 9 Jun 2025 19:06:09 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E4=BA=86print=E7=89=B9?= =?UTF-8?q?=E5=AE=9A=E5=88=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../services/knowledgebases/excel_parser.py | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py index cbc8168..500f718 100644 --- a/management/server/services/knowledgebases/excel_parser.py +++ b/management/server/services/knowledgebases/excel_parser.py @@ -1,32 +1,21 @@ import pandas as pd -def parse_excel(file_path): - # 读取所有工作表 - all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet +def parse_excel(file_path): + # 读取所有工作表 + all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet + + blocks = [] + + for sheet_name, df in all_sheets.items(): + df = df.fillna(method="ffill") # 填充合并的单元格 + headers = df.columns.tolist() + + for _, row in df.iterrows(): + html_table = "{}{}
".format("".join(f"{col}" for col in headers), "".join(f"{row[col]}" for col in headers)) + block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0} + blocks.append(block) - blocks = [] - - for sheet_name, df in all_sheets.items(): - df = df.fillna(method='ffill')#填充合并的单元格 - headers = df.columns.tolist() - - for _, row in df.iterrows(): - html_table = "{}{}
".format( - "".join(f"{col}" for col in headers), - "".join(f"{row[col]}" for col in headers) - ) - print(row['测试分类']) - block = { - "type": "table", - "img_path": "", - "table_caption": [f"Sheet: {sheet_name}"], - "table_footnote": [], - "table_body": f"{html_table}", - "page_idx": 0 - } - blocks.append(block) - return blocks