Merge pull request #157 from xinsenyan/main
refactor: 更新解析excel的函数,适配更复杂的表格结构
This commit is contained in:
commit
0640f973aa
|
@ -2,18 +2,19 @@ import pandas as pd
|
|||
|
||||
|
||||
def parse_excel(file_path):
|
||||
# 读取Excel文件
|
||||
df = pd.read_excel(file_path)
|
||||
# 获取表头
|
||||
headers = df.columns.tolist()
|
||||
# 读取所有工作表
|
||||
all_sheets = pd.read_excel(file_path, sheet_name=None) # 读取所有sheet
|
||||
|
||||
blocks = []
|
||||
|
||||
for _, row in df.iterrows():
|
||||
# 构建HTML表格
|
||||
html_table = "<html><body><table><tr>{}</tr><tr>{}</tr></table></body></html>".format("".join(f"<td>{col}</td>" for col in headers), "".join(f"<td>{row[col]}</td>" for col in headers))
|
||||
block = {"type": "table", "img_path": "", "table_caption": [], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
|
||||
for sheet_name, df in all_sheets.items():
|
||||
df = df.fillna(method="ffill") # 填充合并的单元格
|
||||
headers = df.columns.tolist()
|
||||
|
||||
blocks.append(block)
|
||||
for _, row in df.iterrows():
|
||||
html_table = "<html><body><table><tr>{}</tr><tr>{}</tr></table></body></html>".format("".join(f"<td>{col}</td>" for col in headers), "".join(f"<td>{row[col]}</td>" for col in headers))
|
||||
block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
|
||||
blocks.append(block)
|
||||
|
||||
return blocks
|
||||
|
||||
|
|
Loading…
Reference in New Issue