Docx2Json_Json2Docx/json_to_docx.py

244 lines
9.2 KiB
Python
Raw Normal View History

2025-07-02 16:17:54 +08:00
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.shared import qn, OxmlElement
import json
def json_to_docx(json_data, output_path):
print(f"\n开始转换JSON到DOCX文档输出路径: {output_path}")
doc = Document()
total_elements = len(json_data)
print(f"文档包含 {total_elements} 个元素(段落和表格)")
for i, element in enumerate(json_data, 1):
print(f"\n处理元素 {i}/{total_elements}: ", end="")
if element["type"] == "text":
print(f"段落 (长度: {len(element['content']['runs'])}个runs)")
add_paragraph_from_json(doc, element["content"])
elif element["type"] == "table":
rows = element["content"]["rows"]
cols = element["content"]["cols"]
merges = len(element["content"].get("merged_cells", []))
print(f"表格 ({rows}行×{cols}列, 包含 {merges} 个合并单元格)")
add_table_from_json(doc, element["content"], element.get("bold", False))
print("\n正在保存文档...")
doc.save(output_path)
print(f"文档已成功保存到 {output_path}")
def add_paragraph_from_json(doc, para_json):
paragraph = doc.add_paragraph()
print(f" 添加段落 (对齐: {para_json['alignment']})")
# 设置段落对齐方式
alignment_map = {
"left": WD_ALIGN_PARAGRAPH.LEFT,
"center": WD_ALIGN_PARAGRAPH.CENTER,
"right": WD_ALIGN_PARAGRAPH.RIGHT,
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
}
paragraph.alignment = alignment_map.get(para_json["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
# 添加文本运行(runs)
for run_idx, run_json in enumerate(para_json["runs"], 1):
run = paragraph.add_run(run_json["text"])
if run_json["has_page_break"]:
import docx
run.add_break(docx.enum.text.WD_BREAK.PAGE)
font = run.font
print(f" 添加run {run_idx}: '{run_json['text']}' "
f"(字体: {run_json['font']['name']}, 大小: {run_json['font']['size']}, "
f"加粗: {run_json['font']['bold']}, 斜体: {run_json['font']['italic']})")
# 设置字体样式
if run_json["font"]["name"]:
font.name = run_json["font"]["name"]
run.element.rPr.rFonts.set(qn('w:eastAsia'), run_json["font"]["name"])
if run_json["font"]["size"]:
font.size = Pt(run_json["font"]["size"])
font.bold = run_json["font"]["bold"]
font.italic = run_json["font"]["italic"]
font.underline = run_json["font"]["underline"]
# 设置字体颜色
if run_json["font"]["color"]:
color = run_json["font"]["color"]
font.color.rgb = RGBColor(color["r"], color["g"], color["b"])
print(f" 设置颜色: RGB({color['r']}, {color['g']}, {color['b']})")
def add_table_from_json(doc, table_json, bold=False):
print(f" 创建表格: {table_json['rows']}× {table_json['cols']}")
table = doc.add_table(rows=table_json["rows"], cols=table_json["cols"])
# 设置表格样式为无网格线(我们将自定义边框)
table.style = 'Table Grid'
# 设置列宽
if "col_widths" in table_json and any(table_json["col_widths"]):
print(" 设置列宽...")
for col_idx, width in enumerate(table_json["col_widths"]):
if width is not None:
# 将英寸转换为Twips1英寸=1440 Twips
twips_width = int(width * 1440)
for cell in table.columns[col_idx].cells:
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
tcW = tcPr.first_child_found_in("w:tcW")
if tcW is None:
tcW = OxmlElement('w:tcW')
tcPr.append(tcW)
tcW.set(qn('w:w'), str(twips_width))
tcW.set(qn('w:type'), 'dxa') # 使用绝对单位
# 设置行高
if "row_heights" in table_json and any(table_json["row_heights"]):
print(" 设置行高...")
for row_idx, height in enumerate(table_json["row_heights"]):
if height is not None:
# 将英寸转换为Twips1英寸=1440 Twips
twips_height = int(height * 1440)
tr = table.rows[row_idx]._tr
trPr = tr.get_or_add_trPr()
trHeight = OxmlElement('w:trHeight')
trHeight.set(qn('w:val'), str(twips_height))
trHeight.set(qn('w:hRule'), 'atLeast') # 或'exact'表示固定高度
trPr.append(trHeight)
# 处理合并单元格
for merge_idx, merge_info in enumerate(table_json.get("merged_cells", []), 1):
start_row = merge_info["start_row"]
start_col = merge_info["start_col"]
end_row = merge_info["end_row"]
end_col = merge_info["end_col"]
print(f" 合并单元格 #{merge_idx}: 从({start_row},{start_col})到({end_row},{end_col})")
start_cell = table.cell(start_row, start_col)
end_cell = table.cell(end_row, end_col)
start_cell.merge(end_cell)
# 填充表格内容
for row_idx, row_data in enumerate(table_json["cells"]):
for col_idx, cell_data in enumerate(row_data):
# 跳过被合并的非主单元格
if cell_data["is_merged"] and not cell_data["merge_info"]["is_primary"]:
print(f" 跳过被合并的单元格({row_idx},{col_idx})")
continue
cell = table.cell(cell_data["row"], cell_data["col"])
print(f" 处理单元格({row_idx},{col_idx}) - 对齐: {cell_data['alignment']}")
format_cell(cell, cell_data) # 统一设置单元格格式
def format_cell(cell, cell_data):
"""设置单元格完整格式"""
# 清空原有内容
for p in cell.paragraphs:
p._element.getparent().remove(p._element)
# 添加内容
for para in cell_data["content"]:
add_paragraph_from_json(cell, para)
# 设置对齐方式
set_cell_alignment(cell, cell_data)
# 设置边框
set_cell_border(cell, cell_data["border"])
# 设置背景色
if cell_data.get("shading"):
set_cell_shading(cell, cell_data["shading"])
# 设置边距
if cell_data.get("margins"):
set_cell_margins(cell, cell_data["margins"])
def set_cell_alignment(cell, cell_data):
"""设置单元格对齐(水平和垂直)"""
# 水平对齐
if cell.paragraphs:
align_map = {
"left": WD_ALIGN_PARAGRAPH.LEFT,
"center": WD_ALIGN_PARAGRAPH.CENTER,
"right": WD_ALIGN_PARAGRAPH.RIGHT,
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
}
cell.paragraphs[0].alignment = align_map.get(cell_data["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
# 垂直对齐设置
tcPr = cell._tc.get_or_add_tcPr()
vAlign = OxmlElement('w:vAlign')
align_value = cell_data.get('vertical_align', 'top')
print(f" 设置垂直对齐: {align_value}")
# 确保使用有效的对齐值
valid_alignments = ['top', 'center', 'bottom']
if align_value not in valid_alignments:
align_value = 'top' # 默认值
vAlign.set(qn('w:val'), align_value)
tcPr.append(vAlign)
def set_cell_shading(cell, shading):
"""设置单元格背景色"""
tcPr = cell._tc.get_or_add_tcPr()
shd = OxmlElement('w:shd')
shd.set(qn('w:fill'), shading["color"])
if shading.get("theme"):
shd.set(qn('w:themeColor'), shading["theme"])
tcPr.append(shd)
def set_cell_margins(cell, margins):
"""设置单元格边距"""
tcPr = cell._tc.get_or_add_tcPr()
tcMar = OxmlElement('w:tcMar')
for side, margin in margins.items():
side_el = OxmlElement(f'w:{side}')
side_el.set(qn('w:w'), margin["w"])
side_el.set(qn('w:type'), margin["type"])
tcMar.append(side_el)
tcPr.append(tcMar)
def set_cell_border(cell, border_data):
"""
设置单元格边框
:param cell: 单元格对象
:param border_data: 边框数据
"""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
# 检查是否存在边框元素,不存在则创建
tcBorders = tcPr.first_child_found_in("w:tcBorders")
if tcBorders is None:
tcBorders = OxmlElement('w:tcBorders')
tcPr.append(tcBorders)
# 设置各边边框
for side in ['top', 'left', 'bottom', 'right']:
if side in border_data:
border = border_data[side]
border_el = OxmlElement(f'w:{side}')
border_el.set(qn('w:val'), border.get('style', 'single'))
border_el.set(qn('w:sz'), str(border.get('size', 4)))
border_el.set(qn('w:color'), border.get('color', '000000'))
tcBorders.append(border_el)
# 使用示例
if __name__ == "__main__":
# 假设我们已经有了之前生成的JSON数据
input_json = "output.json"
output_path = "restored.docx"
print(f"{input_json} 读取JSON数据...")
with open(input_json, "r", encoding="utf-8") as f:
json_data = json.load(f)
# 将JSON转换回DOCX
json_to_docx(json_data, output_path)