from docx import Document from docx.shared import Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.shared import qn, OxmlElement import json def json_to_docx(json_data, output_dir = None): print(f"\n开始转换JSON到DOCX文档") if output_dir: doc = Document(output_dir) else: doc = Document() total_elements = len(json_data) print(f"文档包含 {total_elements} 个元素(段落和表格)") for i, element in enumerate(json_data, 1): print(f"\n处理元素 {i}/{total_elements}: ", end="") if element["type"] == "text": print(f"段落 (长度: {len(element['content']['runs'])}个runs)") add_paragraph_from_json(doc, element["content"]) elif element["type"] == "table": rows = element["content"]["rows"] cols = element["content"]["cols"] merges = len(element["content"].get("merged_cells", [])) print(f"表格 ({rows}行×{cols}列, 包含 {merges} 个合并单元格)") add_table_from_json(doc, element["content"], element.get("bold", False)) return doc def add_paragraph_from_json(doc, para_json): paragraph = doc.add_paragraph() print(f" 添加段落 (对齐: {para_json['alignment']})") # 设置段落对齐方式 alignment_map = { "left": WD_ALIGN_PARAGRAPH.LEFT, "center": WD_ALIGN_PARAGRAPH.CENTER, "right": WD_ALIGN_PARAGRAPH.RIGHT, "justify": WD_ALIGN_PARAGRAPH.JUSTIFY } paragraph.alignment = alignment_map.get(para_json["alignment"], WD_ALIGN_PARAGRAPH.LEFT) # 添加文本运行(runs) for run_idx, run_json in enumerate(para_json["runs"], 1): run = paragraph.add_run(run_json["text"]) try: if run_json["has_page_break"]: import docx run.add_break(docx.enum.text.WD_BREAK.PAGE) except: pass font = run.font print(f" 添加run {run_idx}: '{run_json['text']}' " f"(字体: {run_json['font']['name']}, 大小: {run_json['font']['size']}, " f"加粗: {run_json['font']['bold']}, 斜体: {run_json['font']['italic']})") # 设置字体样式 if run_json["font"]["name"]: font.name = run_json["font"]["name"] run.element.rPr.rFonts.set(qn('w:eastAsia'), run_json["font"]["name"]) if run_json["font"]["size"]: font.size = Pt(run_json["font"]["size"]) font.bold = run_json["font"]["bold"] font.italic = run_json["font"]["italic"] font.underline = run_json["font"]["underline"] # 设置字体颜色 if run_json["font"]["color"]: color = run_json["font"]["color"] font.color.rgb = RGBColor(color["r"], color["g"], color["b"]) print(f" 设置颜色: RGB({color['r']}, {color['g']}, {color['b']})") def add_table_from_json(doc, table_json, bold=False): print(f" 创建表格: {table_json['rows']}行 × {table_json['cols']}列") table = doc.add_table(rows=table_json["rows"], cols=table_json["cols"]) table.autofit = True # 自动调整列宽和行高 # 设置表格样式为无网格线(我们将自定义边框) table.style = 'Table Grid' # 设置列宽 if "col_widths" in table_json and any(table_json["col_widths"]): print(" 设置列宽...") for col_idx, width in enumerate(table_json["col_widths"]): if width is not None: # 将英寸转换为Twips(1英寸=1440 Twips) twips_width = int(width * 1440) for cell in table.columns[col_idx].cells: tc = cell._tc tcPr = tc.get_or_add_tcPr() tcW = tcPr.first_child_found_in("w:tcW") if tcW is None: tcW = OxmlElement('w:tcW') tcPr.append(tcW) tcW.set(qn('w:w'), str(twips_width)) tcW.set(qn('w:type'), 'dxa') # 使用绝对单位 # 设置行高 if "row_heights" in table_json and any(table_json["row_heights"]): print(" 设置行高...") for row_idx, height in enumerate(table_json["row_heights"]): if height is not None: # 将英寸转换为Twips(1英寸=1440 Twips) twips_height = int(height * 1440) tr = table.rows[row_idx]._tr trPr = tr.get_or_add_trPr() trHeight = OxmlElement('w:trHeight') trHeight.set(qn('w:val'), str(twips_height)) trHeight.set(qn('w:hRule'), 'atLeast') # 或'exact'表示固定高度 trPr.append(trHeight) # 处理合并单元格 for merge_idx, merge_info in enumerate(table_json.get("merged_cells", []), 1): start_row = merge_info["start_row"] start_col = merge_info["start_col"] end_row = merge_info["end_row"] end_col = merge_info["end_col"] print(f" 合并单元格 #{merge_idx}: 从({start_row},{start_col})到({end_row},{end_col})") start_cell = table.cell(start_row, start_col) end_cell = table.cell(end_row, end_col) start_cell.merge(end_cell) # 填充表格内容 for row_idx, row_data in enumerate(table_json["cells"]): for col_idx, cell_data in enumerate(row_data): # 跳过被合并的非主单元格 if cell_data["is_merged"] and not cell_data["merge_info"]["is_primary"]: print(f" 跳过被合并的单元格({row_idx},{col_idx})") continue cell = table.cell(cell_data["row"], cell_data["col"]) print(f" 处理单元格({row_idx},{col_idx}) - 对齐: {cell_data['alignment']}") format_cell(cell, cell_data) # 统一设置单元格格式 def format_cell(cell, cell_data): """设置单元格完整格式""" # 清空原有内容 for p in cell.paragraphs: p._element.getparent().remove(p._element) # 添加内容 for para in cell_data["content"]: add_paragraph_from_json(cell, para) # 设置对齐方式 set_cell_alignment(cell, cell_data) # 设置边框 set_cell_border(cell, cell_data["border"]) # 设置背景色 if cell_data.get("shading"): set_cell_shading(cell, cell_data["shading"]) # 设置边距 if cell_data.get("margins"): set_cell_margins(cell, cell_data["margins"]) def set_cell_alignment(cell, cell_data): """设置单元格对齐(水平和垂直)""" # 水平对齐 if cell.paragraphs: align_map = { "left": WD_ALIGN_PARAGRAPH.LEFT, "center": WD_ALIGN_PARAGRAPH.CENTER, "right": WD_ALIGN_PARAGRAPH.RIGHT, "justify": WD_ALIGN_PARAGRAPH.JUSTIFY } cell.paragraphs[0].alignment = align_map.get(cell_data["alignment"], WD_ALIGN_PARAGRAPH.LEFT) # 垂直对齐设置 tcPr = cell._tc.get_or_add_tcPr() vAlign = OxmlElement('w:vAlign') align_value = cell_data.get('vertical_align', 'center') print(f" 设置垂直对齐: {align_value}") # 确保使用有效的对齐值 valid_alignments = ['top', 'center', 'bottom'] if align_value not in valid_alignments: align_value = 'center' # 默认值 vAlign.set(qn('w:val'), align_value) tcPr.append(vAlign) def set_cell_shading(cell, shading): """设置单元格背景色""" tcPr = cell._tc.get_or_add_tcPr() shd = OxmlElement('w:shd') shd.set(qn('w:fill'), shading["color"]) if shading.get("theme"): shd.set(qn('w:themeColor'), shading["theme"]) tcPr.append(shd) def set_cell_margins(cell, margins): """设置单元格边距""" tcPr = cell._tc.get_or_add_tcPr() tcMar = OxmlElement('w:tcMar') for side, margin in margins.items(): side_el = OxmlElement(f'w:{side}') side_el.set(qn('w:w'), margin["w"]) side_el.set(qn('w:type'), margin["type"]) tcMar.append(side_el) tcPr.append(tcMar) def set_cell_border(cell, border_data): """ 设置单元格边框 :param cell: 单元格对象 :param border_data: 边框数据 """ tc = cell._tc tcPr = tc.get_or_add_tcPr() # 检查是否存在边框元素,不存在则创建 tcBorders = tcPr.first_child_found_in("w:tcBorders") if tcBorders is None: tcBorders = OxmlElement('w:tcBorders') tcPr.append(tcBorders) # 设置各边边框 for side in ['top', 'left', 'bottom', 'right']: if side in border_data: border = border_data[side] border_el = OxmlElement(f'w:{side}') border_el.set(qn('w:val'), border.get('style', 'single')) border_el.set(qn('w:sz'), str(border.get('size', 4))) border_el.set(qn('w:color'), border.get('color', '000000')) tcBorders.append(border_el) # 使用示例 if __name__ == "__main__": # 假设我们已经有了之前生成的JSON数据 input_json = "output.json" output_path = "restored.docx" print(f"从 {input_json} 读取JSON数据...") with open(input_json, "r", encoding="utf-8") as f: json_data = json.load(f) # 将JSON转换回DOCX json_to_docx(json_data, output_path) from typing import List, Dict, Any, Union def list_to_json_with_merges( table_data: List[List[str]], style_config: Dict[str, Any] = None, detect_merges: bool = True, merge_columns: Union[int, List[int]] = None # 新增参数,控制合并哪些列 ) -> Dict[str, Any]: """ 将二维列表转换为表格JSON,可选是否合并相邻相同单元格 参数: table_data: 二维字符串列表表示的表格数据 style_config: 包含样式配置的字典(可选) detect_merges: 是否检测并合并相邻相同单元格(默认为True) merge_columns: 控制合并哪些列,可以是: - None:合并所有列(默认) - int n:只合并前n列 - List[int]:只合并指定的列 返回: 符合表格JSON结构的字典 """ if not table_data or not table_data[0]: return {"type": "table", "content": {"rows": 0, "cols": 0, "cells": [], "merged_cells": []}} rows = len(table_data) cols = len(table_data[0]) result = { "type": "table", "content": { "rows": rows, "cols": cols, "merged_cells": [], "cells": [[None for _ in range(cols)] for _ in range(rows)] } } # 处理merge_columns参数 columns_to_merge = set() if merge_columns is not None: if isinstance(merge_columns, int): columns_to_merge = set(range(merge_columns)) # 前n列 elif isinstance(merge_columns, list): columns_to_merge = set(merge_columns) # 指定列 for col in range(cols): # 检查当前列是否需要合并 should_merge = detect_merges if merge_columns is not None: should_merge = should_merge and (col in columns_to_merge) start_row = 0 while start_row < rows: current_value = table_data[start_row][col] end_row = start_row if should_merge: while end_row + 1 < rows and table_data[end_row + 1][col] == current_value: end_row += 1 if should_merge and end_row > start_row: merge_info = { "start_row": start_row, "start_col": col, "end_row": end_row, "end_col": col } result["content"]["merged_cells"].append(merge_info) for row in range(start_row, end_row + 1): cell_data = create_cell_data( row=row, col=col, value=current_value, style_config=style_config, is_merged=True, is_primary=(row == start_row), merge_range=merge_info ) result["content"]["cells"][row][col] = cell_data else: cell_data = create_cell_data( row=start_row, col=col, value=current_value, style_config=style_config, is_merged=False ) result["content"]["cells"][start_row][col] = cell_data start_row = end_row + 1 return [result] def create_cell_data( row: int, col: int, value: str, style_config: Dict[str, Any], is_merged: bool = False, is_primary: bool = False, merge_range: Dict[str, int] = None ) -> Dict[str, Any]: """创建标准化单元格数据""" cell = { "row": row, "col": col, "is_merged": is_merged, "content": create_cell_content(value, style_config), "alignment": style_config.get("alignment", "center") if style_config else "center", "border": style_config.get("border", {}) if style_config else {}, "shading": style_config.get("shading", {}) if style_config else {}, "margins": style_config.get("margins", {}) if style_config else {} } if is_merged: cell["merge_info"] = { "is_primary": is_primary, "start_row": merge_range["start_row"], "start_col": merge_range["start_col"], "end_row": merge_range["end_row"], "end_col": merge_range["end_col"] } return cell def create_cell_content(text: str, style_config: Dict[str, Any] = None) -> List[Dict]: """创建单元格内容结构""" font_config = style_config.get("font", {}) if style_config else {} return [{ "alignment": style_config.get("alignment", "left") if style_config else "left", "runs": [{ "text": text, "font": { "name": font_config.get("name", "Calibri"), "size": font_config.get("size", 11), "bold": font_config.get("bold", False), "italic": font_config.get("italic", False), "underline": font_config.get("underline", False), "color": font_config.get("color", {"r": 0, "g": 0, "b": 0}) }, "has_page_break": False }] }]