import pandas as pd
def parse_excel(file_path):
all_sheets = pd.read_excel(file_path, sheet_name=None)
blocks = []
for sheet_name, df in all_sheets.items():
df = df.ffill()
headers = df.columns.tolist()
for _, row in df.iterrows():
html_table = "
".format("".join(f"{col} | " for col in headers), "".join(f"{row[col]} | " for col in headers))
block = {"type": "table", "img_path": "", "table_caption": [f"Sheet: {sheet_name}"], "table_footnote": [], "table_body": f"{html_table}", "page_idx": 0}
blocks.append(block)
return blocks
if __name__ == "__main__":
file_path = "test_excel.xls"
parse_excel_result = parse_excel(file_path)
print(parse_excel_result)