2025-07-29 18:01:15 +08:00
|
|
|
|
from docx import Document
|
|
|
|
|
from docx.shared import Pt, RGBColor
|
|
|
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
|
|
|
from docx.oxml.shared import qn, OxmlElement
|
|
|
|
|
import json
|
|
|
|
|
|
2025-07-30 17:31:18 +08:00
|
|
|
|
def json_to_docx(json_data, output_dir = None):
|
2025-07-29 18:01:15 +08:00
|
|
|
|
print(f"\n开始转换JSON到DOCX文档")
|
2025-07-30 17:31:18 +08:00
|
|
|
|
if output_dir:
|
|
|
|
|
doc = Document(output_dir)
|
|
|
|
|
else:
|
|
|
|
|
doc = Document()
|
2025-07-29 18:01:15 +08:00
|
|
|
|
total_elements = len(json_data)
|
|
|
|
|
print(f"文档包含 {total_elements} 个元素(段落和表格)")
|
|
|
|
|
|
|
|
|
|
for i, element in enumerate(json_data, 1):
|
|
|
|
|
print(f"\n处理元素 {i}/{total_elements}: ", end="")
|
|
|
|
|
if element["type"] == "text":
|
|
|
|
|
print(f"段落 (长度: {len(element['content']['runs'])}个runs)")
|
|
|
|
|
add_paragraph_from_json(doc, element["content"])
|
|
|
|
|
elif element["type"] == "table":
|
|
|
|
|
rows = element["content"]["rows"]
|
|
|
|
|
cols = element["content"]["cols"]
|
|
|
|
|
merges = len(element["content"].get("merged_cells", []))
|
|
|
|
|
print(f"表格 ({rows}行×{cols}列, 包含 {merges} 个合并单元格)")
|
|
|
|
|
add_table_from_json(doc, element["content"], element.get("bold", False))
|
|
|
|
|
|
|
|
|
|
return doc
|
|
|
|
|
|
|
|
|
|
def add_paragraph_from_json(doc, para_json):
|
|
|
|
|
paragraph = doc.add_paragraph()
|
|
|
|
|
print(f" 添加段落 (对齐: {para_json['alignment']})")
|
|
|
|
|
|
|
|
|
|
# 设置段落对齐方式
|
|
|
|
|
alignment_map = {
|
|
|
|
|
"left": WD_ALIGN_PARAGRAPH.LEFT,
|
|
|
|
|
"center": WD_ALIGN_PARAGRAPH.CENTER,
|
|
|
|
|
"right": WD_ALIGN_PARAGRAPH.RIGHT,
|
|
|
|
|
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
|
|
|
|
|
}
|
|
|
|
|
paragraph.alignment = alignment_map.get(para_json["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
|
|
|
|
|
|
|
|
|
|
# 添加文本运行(runs)
|
|
|
|
|
for run_idx, run_json in enumerate(para_json["runs"], 1):
|
|
|
|
|
run = paragraph.add_run(run_json["text"])
|
|
|
|
|
try:
|
|
|
|
|
if run_json["has_page_break"]:
|
|
|
|
|
import docx
|
|
|
|
|
run.add_break(docx.enum.text.WD_BREAK.PAGE)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
font = run.font
|
|
|
|
|
|
|
|
|
|
print(f" 添加run {run_idx}: '{run_json['text']}' "
|
|
|
|
|
f"(字体: {run_json['font']['name']}, 大小: {run_json['font']['size']}, "
|
|
|
|
|
f"加粗: {run_json['font']['bold']}, 斜体: {run_json['font']['italic']})")
|
|
|
|
|
|
|
|
|
|
# 设置字体样式
|
|
|
|
|
if run_json["font"]["name"]:
|
|
|
|
|
font.name = run_json["font"]["name"]
|
|
|
|
|
run.element.rPr.rFonts.set(qn('w:eastAsia'), run_json["font"]["name"])
|
|
|
|
|
|
|
|
|
|
if run_json["font"]["size"]:
|
|
|
|
|
font.size = Pt(run_json["font"]["size"])
|
|
|
|
|
|
|
|
|
|
font.bold = run_json["font"]["bold"]
|
|
|
|
|
font.italic = run_json["font"]["italic"]
|
|
|
|
|
font.underline = run_json["font"]["underline"]
|
|
|
|
|
|
|
|
|
|
# 设置字体颜色
|
|
|
|
|
if run_json["font"]["color"]:
|
|
|
|
|
color = run_json["font"]["color"]
|
|
|
|
|
font.color.rgb = RGBColor(color["r"], color["g"], color["b"])
|
|
|
|
|
print(f" 设置颜色: RGB({color['r']}, {color['g']}, {color['b']})")
|
|
|
|
|
|
|
|
|
|
def add_table_from_json(doc, table_json, bold=False):
|
|
|
|
|
print(f" 创建表格: {table_json['rows']}行 × {table_json['cols']}列")
|
|
|
|
|
table = doc.add_table(rows=table_json["rows"], cols=table_json["cols"])
|
2025-07-30 17:31:18 +08:00
|
|
|
|
table.autofit = True # 自动调整列宽和行高
|
2025-07-29 18:01:15 +08:00
|
|
|
|
# 设置表格样式为无网格线(我们将自定义边框)
|
|
|
|
|
table.style = 'Table Grid'
|
|
|
|
|
|
|
|
|
|
# 设置列宽
|
|
|
|
|
if "col_widths" in table_json and any(table_json["col_widths"]):
|
|
|
|
|
print(" 设置列宽...")
|
|
|
|
|
for col_idx, width in enumerate(table_json["col_widths"]):
|
|
|
|
|
if width is not None:
|
|
|
|
|
# 将英寸转换为Twips(1英寸=1440 Twips)
|
|
|
|
|
twips_width = int(width * 1440)
|
|
|
|
|
for cell in table.columns[col_idx].cells:
|
|
|
|
|
tc = cell._tc
|
|
|
|
|
tcPr = tc.get_or_add_tcPr()
|
|
|
|
|
tcW = tcPr.first_child_found_in("w:tcW")
|
|
|
|
|
if tcW is None:
|
|
|
|
|
tcW = OxmlElement('w:tcW')
|
|
|
|
|
tcPr.append(tcW)
|
|
|
|
|
tcW.set(qn('w:w'), str(twips_width))
|
|
|
|
|
tcW.set(qn('w:type'), 'dxa') # 使用绝对单位
|
|
|
|
|
|
|
|
|
|
# 设置行高
|
|
|
|
|
if "row_heights" in table_json and any(table_json["row_heights"]):
|
|
|
|
|
print(" 设置行高...")
|
|
|
|
|
for row_idx, height in enumerate(table_json["row_heights"]):
|
|
|
|
|
if height is not None:
|
|
|
|
|
# 将英寸转换为Twips(1英寸=1440 Twips)
|
|
|
|
|
twips_height = int(height * 1440)
|
|
|
|
|
tr = table.rows[row_idx]._tr
|
|
|
|
|
trPr = tr.get_or_add_trPr()
|
|
|
|
|
trHeight = OxmlElement('w:trHeight')
|
|
|
|
|
trHeight.set(qn('w:val'), str(twips_height))
|
|
|
|
|
trHeight.set(qn('w:hRule'), 'atLeast') # 或'exact'表示固定高度
|
|
|
|
|
trPr.append(trHeight)
|
|
|
|
|
|
|
|
|
|
# 处理合并单元格
|
|
|
|
|
for merge_idx, merge_info in enumerate(table_json.get("merged_cells", []), 1):
|
|
|
|
|
start_row = merge_info["start_row"]
|
|
|
|
|
start_col = merge_info["start_col"]
|
|
|
|
|
end_row = merge_info["end_row"]
|
|
|
|
|
end_col = merge_info["end_col"]
|
|
|
|
|
|
|
|
|
|
print(f" 合并单元格 #{merge_idx}: 从({start_row},{start_col})到({end_row},{end_col})")
|
|
|
|
|
|
|
|
|
|
start_cell = table.cell(start_row, start_col)
|
|
|
|
|
end_cell = table.cell(end_row, end_col)
|
|
|
|
|
start_cell.merge(end_cell)
|
|
|
|
|
|
|
|
|
|
# 填充表格内容
|
|
|
|
|
for row_idx, row_data in enumerate(table_json["cells"]):
|
|
|
|
|
for col_idx, cell_data in enumerate(row_data):
|
|
|
|
|
# 跳过被合并的非主单元格
|
|
|
|
|
if cell_data["is_merged"] and not cell_data["merge_info"]["is_primary"]:
|
|
|
|
|
print(f" 跳过被合并的单元格({row_idx},{col_idx})")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
cell = table.cell(cell_data["row"], cell_data["col"])
|
|
|
|
|
print(f" 处理单元格({row_idx},{col_idx}) - 对齐: {cell_data['alignment']}")
|
|
|
|
|
format_cell(cell, cell_data) # 统一设置单元格格式
|
|
|
|
|
|
|
|
|
|
def format_cell(cell, cell_data):
|
|
|
|
|
"""设置单元格完整格式"""
|
|
|
|
|
# 清空原有内容
|
|
|
|
|
for p in cell.paragraphs:
|
|
|
|
|
p._element.getparent().remove(p._element)
|
|
|
|
|
|
|
|
|
|
# 添加内容
|
|
|
|
|
for para in cell_data["content"]:
|
|
|
|
|
add_paragraph_from_json(cell, para)
|
|
|
|
|
|
|
|
|
|
# 设置对齐方式
|
|
|
|
|
set_cell_alignment(cell, cell_data)
|
|
|
|
|
|
|
|
|
|
# 设置边框
|
|
|
|
|
set_cell_border(cell, cell_data["border"])
|
|
|
|
|
|
|
|
|
|
# 设置背景色
|
|
|
|
|
if cell_data.get("shading"):
|
|
|
|
|
set_cell_shading(cell, cell_data["shading"])
|
|
|
|
|
|
|
|
|
|
# 设置边距
|
|
|
|
|
if cell_data.get("margins"):
|
|
|
|
|
set_cell_margins(cell, cell_data["margins"])
|
|
|
|
|
|
|
|
|
|
def set_cell_alignment(cell, cell_data):
|
|
|
|
|
"""设置单元格对齐(水平和垂直)"""
|
|
|
|
|
# 水平对齐
|
|
|
|
|
if cell.paragraphs:
|
|
|
|
|
align_map = {
|
|
|
|
|
"left": WD_ALIGN_PARAGRAPH.LEFT,
|
|
|
|
|
"center": WD_ALIGN_PARAGRAPH.CENTER,
|
|
|
|
|
"right": WD_ALIGN_PARAGRAPH.RIGHT,
|
|
|
|
|
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
|
|
|
|
|
}
|
|
|
|
|
cell.paragraphs[0].alignment = align_map.get(cell_data["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
|
|
|
|
|
|
|
|
|
|
# 垂直对齐设置
|
|
|
|
|
tcPr = cell._tc.get_or_add_tcPr()
|
|
|
|
|
vAlign = OxmlElement('w:vAlign')
|
2025-07-30 17:31:18 +08:00
|
|
|
|
align_value = cell_data.get('vertical_align', 'center')
|
2025-07-29 18:01:15 +08:00
|
|
|
|
print(f" 设置垂直对齐: {align_value}")
|
|
|
|
|
|
|
|
|
|
# 确保使用有效的对齐值
|
|
|
|
|
valid_alignments = ['top', 'center', 'bottom']
|
|
|
|
|
if align_value not in valid_alignments:
|
2025-07-30 17:31:18 +08:00
|
|
|
|
align_value = 'center' # 默认值
|
2025-07-29 18:01:15 +08:00
|
|
|
|
|
|
|
|
|
vAlign.set(qn('w:val'), align_value)
|
|
|
|
|
tcPr.append(vAlign)
|
|
|
|
|
|
|
|
|
|
def set_cell_shading(cell, shading):
|
|
|
|
|
"""设置单元格背景色"""
|
|
|
|
|
tcPr = cell._tc.get_or_add_tcPr()
|
|
|
|
|
shd = OxmlElement('w:shd')
|
|
|
|
|
shd.set(qn('w:fill'), shading["color"])
|
|
|
|
|
if shading.get("theme"):
|
|
|
|
|
shd.set(qn('w:themeColor'), shading["theme"])
|
|
|
|
|
tcPr.append(shd)
|
|
|
|
|
|
|
|
|
|
def set_cell_margins(cell, margins):
|
|
|
|
|
"""设置单元格边距"""
|
|
|
|
|
tcPr = cell._tc.get_or_add_tcPr()
|
|
|
|
|
tcMar = OxmlElement('w:tcMar')
|
|
|
|
|
|
|
|
|
|
for side, margin in margins.items():
|
|
|
|
|
side_el = OxmlElement(f'w:{side}')
|
|
|
|
|
side_el.set(qn('w:w'), margin["w"])
|
|
|
|
|
side_el.set(qn('w:type'), margin["type"])
|
|
|
|
|
tcMar.append(side_el)
|
|
|
|
|
|
|
|
|
|
tcPr.append(tcMar)
|
|
|
|
|
|
|
|
|
|
def set_cell_border(cell, border_data):
|
|
|
|
|
"""
|
|
|
|
|
设置单元格边框
|
|
|
|
|
:param cell: 单元格对象
|
|
|
|
|
:param border_data: 边框数据
|
|
|
|
|
"""
|
|
|
|
|
tc = cell._tc
|
|
|
|
|
tcPr = tc.get_or_add_tcPr()
|
|
|
|
|
|
|
|
|
|
# 检查是否存在边框元素,不存在则创建
|
|
|
|
|
tcBorders = tcPr.first_child_found_in("w:tcBorders")
|
|
|
|
|
if tcBorders is None:
|
|
|
|
|
tcBorders = OxmlElement('w:tcBorders')
|
|
|
|
|
tcPr.append(tcBorders)
|
|
|
|
|
|
|
|
|
|
# 设置各边边框
|
|
|
|
|
for side in ['top', 'left', 'bottom', 'right']:
|
|
|
|
|
if side in border_data:
|
|
|
|
|
border = border_data[side]
|
|
|
|
|
border_el = OxmlElement(f'w:{side}')
|
|
|
|
|
border_el.set(qn('w:val'), border.get('style', 'single'))
|
|
|
|
|
border_el.set(qn('w:sz'), str(border.get('size', 4)))
|
|
|
|
|
border_el.set(qn('w:color'), border.get('color', '000000'))
|
|
|
|
|
tcBorders.append(border_el)
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# 假设我们已经有了之前生成的JSON数据
|
|
|
|
|
input_json = "output.json"
|
|
|
|
|
output_path = "restored.docx"
|
|
|
|
|
|
|
|
|
|
print(f"从 {input_json} 读取JSON数据...")
|
|
|
|
|
with open(input_json, "r", encoding="utf-8") as f:
|
|
|
|
|
json_data = json.load(f)
|
|
|
|
|
|
|
|
|
|
# 将JSON转换回DOCX
|
|
|
|
|
json_to_docx(json_data, output_path)
|
|
|
|
|
|
2025-07-30 17:31:18 +08:00
|
|
|
|
from typing import List, Dict, Any, Union
|
|
|
|
|
|
2025-07-29 18:01:15 +08:00
|
|
|
|
def list_to_json_with_merges(
|
|
|
|
|
table_data: List[List[str]],
|
|
|
|
|
style_config: Dict[str, Any] = None,
|
2025-07-30 17:31:18 +08:00
|
|
|
|
detect_merges: bool = True,
|
|
|
|
|
merge_columns: Union[int, List[int]] = None # 新增参数,控制合并哪些列
|
2025-07-29 18:01:15 +08:00
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
将二维列表转换为表格JSON,可选是否合并相邻相同单元格
|
|
|
|
|
|
|
|
|
|
参数:
|
|
|
|
|
table_data: 二维字符串列表表示的表格数据
|
|
|
|
|
style_config: 包含样式配置的字典(可选)
|
|
|
|
|
detect_merges: 是否检测并合并相邻相同单元格(默认为True)
|
2025-07-30 17:31:18 +08:00
|
|
|
|
merge_columns: 控制合并哪些列,可以是:
|
|
|
|
|
- None:合并所有列(默认)
|
|
|
|
|
- int n:只合并前n列
|
|
|
|
|
- List[int]:只合并指定的列
|
2025-07-29 18:01:15 +08:00
|
|
|
|
|
|
|
|
|
返回:
|
|
|
|
|
符合表格JSON结构的字典
|
|
|
|
|
"""
|
|
|
|
|
if not table_data or not table_data[0]:
|
|
|
|
|
return {"type": "table", "content": {"rows": 0, "cols": 0, "cells": [], "merged_cells": []}}
|
|
|
|
|
|
|
|
|
|
rows = len(table_data)
|
|
|
|
|
cols = len(table_data[0])
|
|
|
|
|
|
|
|
|
|
result = {
|
|
|
|
|
"type": "table",
|
|
|
|
|
"content": {
|
|
|
|
|
"rows": rows,
|
|
|
|
|
"cols": cols,
|
|
|
|
|
"merged_cells": [],
|
|
|
|
|
"cells": [[None for _ in range(cols)] for _ in range(rows)]
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-07-30 17:31:18 +08:00
|
|
|
|
|
|
|
|
|
# 处理merge_columns参数
|
|
|
|
|
columns_to_merge = set()
|
|
|
|
|
if merge_columns is not None:
|
|
|
|
|
if isinstance(merge_columns, int):
|
|
|
|
|
columns_to_merge = set(range(merge_columns)) # 前n列
|
|
|
|
|
elif isinstance(merge_columns, list):
|
|
|
|
|
columns_to_merge = set(merge_columns) # 指定列
|
2025-07-29 18:01:15 +08:00
|
|
|
|
|
|
|
|
|
for col in range(cols):
|
2025-07-30 17:31:18 +08:00
|
|
|
|
# 检查当前列是否需要合并
|
|
|
|
|
should_merge = detect_merges
|
|
|
|
|
if merge_columns is not None:
|
|
|
|
|
should_merge = should_merge and (col in columns_to_merge)
|
|
|
|
|
|
2025-07-29 18:01:15 +08:00
|
|
|
|
start_row = 0
|
|
|
|
|
while start_row < rows:
|
|
|
|
|
current_value = table_data[start_row][col]
|
|
|
|
|
end_row = start_row
|
|
|
|
|
|
2025-07-30 17:31:18 +08:00
|
|
|
|
if should_merge:
|
2025-07-29 18:01:15 +08:00
|
|
|
|
while end_row + 1 < rows and table_data[end_row + 1][col] == current_value:
|
|
|
|
|
end_row += 1
|
|
|
|
|
|
2025-07-30 17:31:18 +08:00
|
|
|
|
if should_merge and end_row > start_row:
|
2025-07-29 18:01:15 +08:00
|
|
|
|
merge_info = {
|
|
|
|
|
"start_row": start_row,
|
|
|
|
|
"start_col": col,
|
|
|
|
|
"end_row": end_row,
|
|
|
|
|
"end_col": col
|
|
|
|
|
}
|
|
|
|
|
result["content"]["merged_cells"].append(merge_info)
|
|
|
|
|
|
|
|
|
|
for row in range(start_row, end_row + 1):
|
|
|
|
|
cell_data = create_cell_data(
|
|
|
|
|
row=row,
|
|
|
|
|
col=col,
|
|
|
|
|
value=current_value,
|
|
|
|
|
style_config=style_config,
|
|
|
|
|
is_merged=True,
|
|
|
|
|
is_primary=(row == start_row),
|
|
|
|
|
merge_range=merge_info
|
|
|
|
|
)
|
|
|
|
|
result["content"]["cells"][row][col] = cell_data
|
2025-07-30 17:31:18 +08:00
|
|
|
|
else:
|
2025-07-29 18:01:15 +08:00
|
|
|
|
cell_data = create_cell_data(
|
|
|
|
|
row=start_row,
|
|
|
|
|
col=col,
|
|
|
|
|
value=current_value,
|
|
|
|
|
style_config=style_config,
|
|
|
|
|
is_merged=False
|
|
|
|
|
)
|
|
|
|
|
result["content"]["cells"][start_row][col] = cell_data
|
|
|
|
|
|
|
|
|
|
start_row = end_row + 1
|
|
|
|
|
|
|
|
|
|
return [result]
|
|
|
|
|
|
|
|
|
|
def create_cell_data(
|
|
|
|
|
row: int,
|
|
|
|
|
col: int,
|
|
|
|
|
value: str,
|
|
|
|
|
style_config: Dict[str, Any],
|
|
|
|
|
is_merged: bool = False,
|
|
|
|
|
is_primary: bool = False,
|
|
|
|
|
merge_range: Dict[str, int] = None
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
"""创建标准化单元格数据"""
|
|
|
|
|
cell = {
|
|
|
|
|
"row": row,
|
|
|
|
|
"col": col,
|
|
|
|
|
"is_merged": is_merged,
|
|
|
|
|
"content": create_cell_content(value, style_config),
|
2025-07-30 17:31:18 +08:00
|
|
|
|
"alignment": style_config.get("alignment", "center") if style_config else "center",
|
2025-07-29 18:01:15 +08:00
|
|
|
|
"border": style_config.get("border", {}) if style_config else {},
|
|
|
|
|
"shading": style_config.get("shading", {}) if style_config else {},
|
|
|
|
|
"margins": style_config.get("margins", {}) if style_config else {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if is_merged:
|
|
|
|
|
cell["merge_info"] = {
|
|
|
|
|
"is_primary": is_primary,
|
|
|
|
|
"start_row": merge_range["start_row"],
|
|
|
|
|
"start_col": merge_range["start_col"],
|
|
|
|
|
"end_row": merge_range["end_row"],
|
|
|
|
|
"end_col": merge_range["end_col"]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return cell
|
|
|
|
|
|
|
|
|
|
def create_cell_content(text: str, style_config: Dict[str, Any] = None) -> List[Dict]:
|
|
|
|
|
"""创建单元格内容结构"""
|
|
|
|
|
font_config = style_config.get("font", {}) if style_config else {}
|
|
|
|
|
|
|
|
|
|
return [{
|
|
|
|
|
"alignment": style_config.get("alignment", "left") if style_config else "left",
|
|
|
|
|
"runs": [{
|
|
|
|
|
"text": text,
|
|
|
|
|
"font": {
|
|
|
|
|
"name": font_config.get("name", "Calibri"),
|
|
|
|
|
"size": font_config.get("size", 11),
|
|
|
|
|
"bold": font_config.get("bold", False),
|
|
|
|
|
"italic": font_config.get("italic", False),
|
|
|
|
|
"underline": font_config.get("underline", False),
|
|
|
|
|
"color": font_config.get("color", {"r": 0, "g": 0, "b": 0})
|
|
|
|
|
},
|
|
|
|
|
"has_page_break": False
|
|
|
|
|
}]
|
|
|
|
|
}]
|