diff --git a/management/server/services/knowledgebases/excel_parser.py b/management/server/services/knowledgebases/excel_parser.py index 608ff22..500f718 100644 --- a/management/server/services/knowledgebases/excel_parser.py +++ b/management/server/services/knowledgebases/excel_parser.py @@ -2,18 +2,19 @@ import pandas as pd def parse_excel(file_path): - # 读取Excel文件 - df = pd.read_excel(file_path) - # 获取表头 - headers = df.columns.tolist() + # 读取所有工作表 + all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet + blocks = [] - for _, row in df.iterrows(): - # 构建HTML表格 - html_table = "{}{}
".format("".join(f"{col}" for col in headers), "".join(f"{row[col]}" for col in headers)) - block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0} + for sheet_name, df in all_sheets.items(): + df = df.fillna(method="ffill") # 填充合并的单元格 + headers = df.columns.tolist() - blocks.append(block) + for _, row in df.iterrows(): + html_table = "{}{}
".format("".join(f"{col}" for col in headers), "".join(f"{row[col]}" for col in headers)) + block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0} + blocks.append(block) return blocks