From ab4d2da1cfc292a184d09b8c097d57d6001e36d5 Mon Sep 17 00:00:00 2001
From: Huang ShaoHui <163737696+xinsenyan@users.noreply.github.com>
Date: Mon, 9 Jun 2025 17:14:39 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E8=A7=A3=E6=9E=90excel?=
 =?UTF-8?q?=E7=9A=84=E5=87=BD=E6=95=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

原版解析excel函数只能读取sheet1，而且对于合并的单元格只有第一个单元格有数据，其他的为non
---
 .../services/knowledgebases/excel_parser.py   | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)
diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py
index 608ff22..cbc8168 100644
--- a/management/server/services/knowledgebases/excel_parser.py
+++ b/management/server/services/knowledgebases/excel_parser.py
@@ -1,20 +1,32 @@
 import pandas as pd
 
 
-def parse_excel(file_path):
-    # 读取Excel文件
-    df = pd.read_excel(file_path)
-    # 获取表头
-    headers = df.columns.tolist()
-    blocks = []
-
-    for _, row in df.iterrows():
-        # 构建HTML表格
-        html_table = "<html><body><table><tr>{}</tr><tr>{}</tr></table></body></html>".format("".join(f"<td>{col}</td>" for col in headers), "".join(f"<td>{row[col]}</td>" for col in headers))
-        block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
-
-        blocks.append(block)
+def parse_excel(file_path):  
+    # 读取所有工作表  
+    all_sheets = pd.read_excel(file_path, sheet_name=None)  # 读取所有sheet  
 
+    blocks = []  
+      
+    for sheet_name, df in all_sheets.items():  
+        df = df.fillna(method='ffill')#填充合并的单元格
+        headers = df.columns.tolist()  
+        
+        for _, row in df.iterrows():  
+            html_table = "<html><body><table><tr>{}</tr><tr>{}</tr></table></body></html>".format(  
+                "".join(f"<td>{col}</td>" for col in headers),   
+                "".join(f"<td>{row[col]}</td>" for col in headers)  
+            )
+            print(row['测试分类'])
+            block = {  
+                "type": "table",   
+                "img_path": "",   
+                "table_caption": [f"Sheet: {sheet_name}"],
+                "table_footnote": [],   
+                "table_body": f"{html_table}",   
+                "page_idx": 0  
+            }  
+            blocks.append(block)  
+      
     return blocks