Report_Generate_Server/tools/json_to_docx.py

375 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.shared import qn, OxmlElement
import json
def json_to_docx(json_data):
print(f"\n开始转换JSON到DOCX文档")
doc = Document()
total_elements = len(json_data)
print(f"文档包含 {total_elements} 个元素(段落和表格)")
for i, element in enumerate(json_data, 1):
print(f"\n处理元素 {i}/{total_elements}: ", end="")
if element["type"] == "text":
print(f"段落 (长度: {len(element['content']['runs'])}个runs)")
add_paragraph_from_json(doc, element["content"])
elif element["type"] == "table":
rows = element["content"]["rows"]
cols = element["content"]["cols"]
merges = len(element["content"].get("merged_cells", []))
print(f"表格 ({rows}行×{cols}列, 包含 {merges} 个合并单元格)")
add_table_from_json(doc, element["content"], element.get("bold", False))
return doc
def add_paragraph_from_json(doc, para_json):
paragraph = doc.add_paragraph()
print(f" 添加段落 (对齐: {para_json['alignment']})")
# 设置段落对齐方式
alignment_map = {
"left": WD_ALIGN_PARAGRAPH.LEFT,
"center": WD_ALIGN_PARAGRAPH.CENTER,
"right": WD_ALIGN_PARAGRAPH.RIGHT,
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
}
paragraph.alignment = alignment_map.get(para_json["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
# 添加文本运行(runs)
for run_idx, run_json in enumerate(para_json["runs"], 1):
run = paragraph.add_run(run_json["text"])
try:
if run_json["has_page_break"]:
import docx
run.add_break(docx.enum.text.WD_BREAK.PAGE)
except:
pass
font = run.font
print(f" 添加run {run_idx}: '{run_json['text']}' "
f"(字体: {run_json['font']['name']}, 大小: {run_json['font']['size']}, "
f"加粗: {run_json['font']['bold']}, 斜体: {run_json['font']['italic']})")
# 设置字体样式
if run_json["font"]["name"]:
font.name = run_json["font"]["name"]
run.element.rPr.rFonts.set(qn('w:eastAsia'), run_json["font"]["name"])
if run_json["font"]["size"]:
font.size = Pt(run_json["font"]["size"])
font.bold = run_json["font"]["bold"]
font.italic = run_json["font"]["italic"]
font.underline = run_json["font"]["underline"]
# 设置字体颜色
if run_json["font"]["color"]:
color = run_json["font"]["color"]
font.color.rgb = RGBColor(color["r"], color["g"], color["b"])
print(f" 设置颜色: RGB({color['r']}, {color['g']}, {color['b']})")
def add_table_from_json(doc, table_json, bold=False):
print(f" 创建表格: {table_json['rows']}× {table_json['cols']}")
table = doc.add_table(rows=table_json["rows"], cols=table_json["cols"])
# 设置表格样式为无网格线(我们将自定义边框)
table.style = 'Table Grid'
# 设置列宽
if "col_widths" in table_json and any(table_json["col_widths"]):
print(" 设置列宽...")
for col_idx, width in enumerate(table_json["col_widths"]):
if width is not None:
# 将英寸转换为Twips1英寸=1440 Twips
twips_width = int(width * 1440)
for cell in table.columns[col_idx].cells:
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
tcW = tcPr.first_child_found_in("w:tcW")
if tcW is None:
tcW = OxmlElement('w:tcW')
tcPr.append(tcW)
tcW.set(qn('w:w'), str(twips_width))
tcW.set(qn('w:type'), 'dxa') # 使用绝对单位
# 设置行高
if "row_heights" in table_json and any(table_json["row_heights"]):
print(" 设置行高...")
for row_idx, height in enumerate(table_json["row_heights"]):
if height is not None:
# 将英寸转换为Twips1英寸=1440 Twips
twips_height = int(height * 1440)
tr = table.rows[row_idx]._tr
trPr = tr.get_or_add_trPr()
trHeight = OxmlElement('w:trHeight')
trHeight.set(qn('w:val'), str(twips_height))
trHeight.set(qn('w:hRule'), 'atLeast') # 或'exact'表示固定高度
trPr.append(trHeight)
# 处理合并单元格
for merge_idx, merge_info in enumerate(table_json.get("merged_cells", []), 1):
start_row = merge_info["start_row"]
start_col = merge_info["start_col"]
end_row = merge_info["end_row"]
end_col = merge_info["end_col"]
print(f" 合并单元格 #{merge_idx}: 从({start_row},{start_col})到({end_row},{end_col})")
start_cell = table.cell(start_row, start_col)
end_cell = table.cell(end_row, end_col)
start_cell.merge(end_cell)
# 填充表格内容
for row_idx, row_data in enumerate(table_json["cells"]):
for col_idx, cell_data in enumerate(row_data):
# 跳过被合并的非主单元格
if cell_data["is_merged"] and not cell_data["merge_info"]["is_primary"]:
print(f" 跳过被合并的单元格({row_idx},{col_idx})")
continue
cell = table.cell(cell_data["row"], cell_data["col"])
print(f" 处理单元格({row_idx},{col_idx}) - 对齐: {cell_data['alignment']}")
format_cell(cell, cell_data) # 统一设置单元格格式
def format_cell(cell, cell_data):
"""设置单元格完整格式"""
# 清空原有内容
for p in cell.paragraphs:
p._element.getparent().remove(p._element)
# 添加内容
for para in cell_data["content"]:
add_paragraph_from_json(cell, para)
# 设置对齐方式
set_cell_alignment(cell, cell_data)
# 设置边框
set_cell_border(cell, cell_data["border"])
# 设置背景色
if cell_data.get("shading"):
set_cell_shading(cell, cell_data["shading"])
# 设置边距
if cell_data.get("margins"):
set_cell_margins(cell, cell_data["margins"])
def set_cell_alignment(cell, cell_data):
"""设置单元格对齐(水平和垂直)"""
# 水平对齐
if cell.paragraphs:
align_map = {
"left": WD_ALIGN_PARAGRAPH.LEFT,
"center": WD_ALIGN_PARAGRAPH.CENTER,
"right": WD_ALIGN_PARAGRAPH.RIGHT,
"justify": WD_ALIGN_PARAGRAPH.JUSTIFY
}
cell.paragraphs[0].alignment = align_map.get(cell_data["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
# 垂直对齐设置
tcPr = cell._tc.get_or_add_tcPr()
vAlign = OxmlElement('w:vAlign')
align_value = cell_data.get('vertical_align', 'top')
print(f" 设置垂直对齐: {align_value}")
# 确保使用有效的对齐值
valid_alignments = ['top', 'center', 'bottom']
if align_value not in valid_alignments:
align_value = 'top' # 默认值
vAlign.set(qn('w:val'), align_value)
tcPr.append(vAlign)
def set_cell_shading(cell, shading):
"""设置单元格背景色"""
tcPr = cell._tc.get_or_add_tcPr()
shd = OxmlElement('w:shd')
shd.set(qn('w:fill'), shading["color"])
if shading.get("theme"):
shd.set(qn('w:themeColor'), shading["theme"])
tcPr.append(shd)
def set_cell_margins(cell, margins):
"""设置单元格边距"""
tcPr = cell._tc.get_or_add_tcPr()
tcMar = OxmlElement('w:tcMar')
for side, margin in margins.items():
side_el = OxmlElement(f'w:{side}')
side_el.set(qn('w:w'), margin["w"])
side_el.set(qn('w:type'), margin["type"])
tcMar.append(side_el)
tcPr.append(tcMar)
def set_cell_border(cell, border_data):
"""
设置单元格边框
:param cell: 单元格对象
:param border_data: 边框数据
"""
tc = cell._tc
tcPr = tc.get_or_add_tcPr()
# 检查是否存在边框元素,不存在则创建
tcBorders = tcPr.first_child_found_in("w:tcBorders")
if tcBorders is None:
tcBorders = OxmlElement('w:tcBorders')
tcPr.append(tcBorders)
# 设置各边边框
for side in ['top', 'left', 'bottom', 'right']:
if side in border_data:
border = border_data[side]
border_el = OxmlElement(f'w:{side}')
border_el.set(qn('w:val'), border.get('style', 'single'))
border_el.set(qn('w:sz'), str(border.get('size', 4)))
border_el.set(qn('w:color'), border.get('color', '000000'))
tcBorders.append(border_el)
# 使用示例
if __name__ == "__main__":
# 假设我们已经有了之前生成的JSON数据
input_json = "output.json"
output_path = "restored.docx"
print(f"{input_json} 读取JSON数据...")
with open(input_json, "r", encoding="utf-8") as f:
json_data = json.load(f)
# 将JSON转换回DOCX
json_to_docx(json_data, output_path)
from typing import List, Dict, Any
def list_to_json_with_merges(
table_data: List[List[str]],
style_config: Dict[str, Any] = None,
detect_merges: bool = True # 新增控制参数
) -> Dict[str, Any]:
"""
将二维列表转换为表格JSON可选是否合并相邻相同单元格
参数:
table_data: 二维字符串列表表示的表格数据
style_config: 包含样式配置的字典(可选)
detect_merges: 是否检测并合并相邻相同单元格默认为True
返回:
符合表格JSON结构的字典
"""
if not table_data or not table_data[0]:
return {"type": "table", "content": {"rows": 0, "cols": 0, "cells": [], "merged_cells": []}}
rows = len(table_data)
cols = len(table_data[0])
result = {
"type": "table",
"content": {
"rows": rows,
"cols": cols,
"merged_cells": [],
"cells": [[None for _ in range(cols)] for _ in range(rows)]
}
}
for col in range(cols):
start_row = 0
while start_row < rows:
current_value = table_data[start_row][col]
end_row = start_row
# 只有开启合并检测时才查找可合并区域
if detect_merges:
while end_row + 1 < rows and table_data[end_row + 1][col] == current_value:
end_row += 1
# 处理单元格(区分合并/非合并模式)
if detect_merges and end_row > start_row: # 合并模式
merge_info = {
"start_row": start_row,
"start_col": col,
"end_row": end_row,
"end_col": col
}
result["content"]["merged_cells"].append(merge_info)
for row in range(start_row, end_row + 1):
cell_data = create_cell_data(
row=row,
col=col,
value=current_value,
style_config=style_config,
is_merged=True,
is_primary=(row == start_row),
merge_range=merge_info
)
result["content"]["cells"][row][col] = cell_data
else: # 非合并模式或无需合并的单单元格
cell_data = create_cell_data(
row=start_row,
col=col,
value=current_value,
style_config=style_config,
is_merged=False
)
result["content"]["cells"][start_row][col] = cell_data
start_row = end_row + 1
return [result]
def create_cell_data(
row: int,
col: int,
value: str,
style_config: Dict[str, Any],
is_merged: bool = False,
is_primary: bool = False,
merge_range: Dict[str, int] = None
) -> Dict[str, Any]:
"""创建标准化单元格数据"""
cell = {
"row": row,
"col": col,
"is_merged": is_merged,
"content": create_cell_content(value, style_config),
"alignment": style_config.get("alignment", "left") if style_config else "left",
"border": style_config.get("border", {}) if style_config else {},
"shading": style_config.get("shading", {}) if style_config else {},
"margins": style_config.get("margins", {}) if style_config else {}
}
if is_merged:
cell["merge_info"] = {
"is_primary": is_primary,
"start_row": merge_range["start_row"],
"start_col": merge_range["start_col"],
"end_row": merge_range["end_row"],
"end_col": merge_range["end_col"]
}
return cell
def create_cell_content(text: str, style_config: Dict[str, Any] = None) -> List[Dict]:
"""创建单元格内容结构"""
font_config = style_config.get("font", {}) if style_config else {}
return [{
"alignment": style_config.get("alignment", "left") if style_config else "left",
"runs": [{
"text": text,
"font": {
"name": font_config.get("name", "Calibri"),
"size": font_config.get("size", 11),
"bold": font_config.get("bold", False),
"italic": font_config.get("italic", False),
"underline": font_config.get("underline", False),
"color": font_config.get("color", {"r": 0, "g": 0, "b": 0})
},
"has_page_break": False
}]
}]