import os import pandas as pd def parse_excel_file(file_path): """ 通用表格解析函数,支持 Excel (.xlsx/.xls) 和 CSV 文件 返回统一格式的数据块列表 """ blocks = [] # 根据文件扩展名选择读取方式 file_ext = os.path.splitext(file_path)[1].lower() try: if file_ext in (".xlsx", ".xls"): # 处理Excel文件(多sheet) all_sheets = pd.read_excel(file_path, sheet_name=None) for sheet_name, df in all_sheets.items(): blocks.extend(_process_dataframe(df, sheet_name)) elif file_ext == ".csv": # 处理CSV文件(单sheet) df = pd.read_csv(file_path) blocks.extend(_process_dataframe(df, "CSV")) else: raise ValueError(f"Unsupported file format: {file_ext}") except Exception as e: raise ValueError(f"Failed to parse file {file_path}: {str(e)}") return blocks def _process_dataframe(df, sheet_name): """处理单个DataFrame,生成统一格式的数据块""" df = df.ffill() headers = df.columns.tolist() blocks = [] for _, row in df.iterrows(): html_table = "