From ab4d2da1cfc292a184d09b8c097d57d6001e36d5 Mon Sep 17 00:00:00 2001
From: Huang ShaoHui <163737696+xinsenyan@users.noreply.github.com>
Date: Mon, 9 Jun 2025 17:14:39 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=A7=A3=E6=9E=90excel?=
=?UTF-8?q?=E7=9A=84=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
原版解析excel函数只能读取sheet1,而且对于合并的单元格只有第一个单元格有数据,其他的为non
---
.../services/knowledgebases/excel_parser.py | 38 ++++++++++++-------
1 file changed, 25 insertions(+), 13 deletions(-)
diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py
index 608ff22..cbc8168 100644
--- a/management/server/services/knowledgebases/excel_parser.py
+++ b/management/server/services/knowledgebases/excel_parser.py
@@ -1,20 +1,32 @@
import pandas as pd
-def parse_excel(file_path):
- # 读取Excel文件
- df = pd.read_excel(file_path)
- # 获取表头
- headers = df.columns.tolist()
- blocks = []
-
- for _, row in df.iterrows():
- # 构建HTML表格
- html_table = "
".format("".join(f"{col} | " for col in headers), "".join(f"{row[col]} | " for col in headers))
- block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
-
- blocks.append(block)
+def parse_excel(file_path):
+ # 读取所有工作表
+ all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet
+ blocks = []
+
+ for sheet_name, df in all_sheets.items():
+ df = df.fillna(method='ffill')#填充合并的单元格
+ headers = df.columns.tolist()
+
+ for _, row in df.iterrows():
+ html_table = "".format(
+ "".join(f"{col} | " for col in headers),
+ "".join(f"{row[col]} | " for col in headers)
+ )
+ print(row['测试分类'])
+ block = {
+ "type": "table",
+ "img_path": "",
+ "table_caption": [f"Sheet: {sheet_name}"],
+ "table_footnote": [],
+ "table_body": f"{html_table}",
+ "page_idx": 0
+ }
+ blocks.append(block)
+
return blocks
From d5fbd8d6206e44e459de433acbe2b5386911c369 Mon Sep 17 00:00:00 2001
From: zstar <65890619+zstar1003@users.noreply.github.com>
Date: Mon, 9 Jun 2025 19:06:09 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E4=BA=86print=E7=89=B9?=
=?UTF-8?q?=E5=AE=9A=E5=88=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../services/knowledgebases/excel_parser.py | 39 +++++++------------
1 file changed, 14 insertions(+), 25 deletions(-)
diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py
index cbc8168..500f718 100644
--- a/management/server/services/knowledgebases/excel_parser.py
+++ b/management/server/services/knowledgebases/excel_parser.py
@@ -1,32 +1,21 @@
import pandas as pd
-def parse_excel(file_path):
- # 读取所有工作表
- all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet
+def parse_excel(file_path):
+ # 读取所有工作表
+ all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet
+
+ blocks = []
+
+ for sheet_name, df in all_sheets.items():
+ df = df.fillna(method="ffill") # 填充合并的单元格
+ headers = df.columns.tolist()
+
+ for _, row in df.iterrows():
+ html_table = "".format("".join(f"{col} | " for col in headers), "".join(f"{row[col]} | " for col in headers))
+ block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
+ blocks.append(block)
- blocks = []
-
- for sheet_name, df in all_sheets.items():
- df = df.fillna(method='ffill')#填充合并的单元格
- headers = df.columns.tolist()
-
- for _, row in df.iterrows():
- html_table = "".format(
- "".join(f"{col} | " for col in headers),
- "".join(f"{row[col]} | " for col in headers)
- )
- print(row['测试分类'])
- block = {
- "type": "table",
- "img_path": "",
- "table_caption": [f"Sheet: {sheet_name}"],
- "table_footnote": [],
- "table_body": f"{html_table}",
- "page_idx": 0
- }
- blocks.append(block)
-
return blocks