diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py
index 608ff22..500f718 100644
--- a/management/server/services/knowledgebases/excel_parser.py
+++ b/management/server/services/knowledgebases/excel_parser.py
@@ -2,18 +2,19 @@ import pandas as pd
def parse_excel(file_path):
- # 读取Excel文件
- df = pd.read_excel(file_path)
- # 获取表头
- headers = df.columns.tolist()
+ # 读取所有工作表
+ all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet
+
blocks = []
- for _, row in df.iterrows():
- # 构建HTML表格
- html_table = "
".format("".join(f"{col} | " for col in headers), "".join(f"{row[col]} | " for col in headers))
- block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
+ for sheet_name, df in all_sheets.items():
+ df = df.fillna(method="ffill") # 填充合并的单元格
+ headers = df.columns.tolist()
- blocks.append(block)
+ for _, row in df.iterrows():
+ html_table = "".format("".join(f"{col} | " for col in headers), "".join(f"{row[col]} | " for col in headers))
+ block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
+ blocks.append(block)
return blocks