删除docxjson项目

2025-07-02 16:20:27 +08:00 · 2025-07-02 16:20:27 +08:00 · ead688513c
parent 52634c1c7d
commit ead688513c
9 changed files with 0 additions and 15197 deletions
--- a/json2docx_docx2json/pycache/esay_docx_func.cpython-312.pyc
+++ b/json2docx_docx2json/pycache/esay_docx_func.cpython-312.pyc
--- a/json2docx_docx2json/pycache/esay_docx_func.cpython-313.pyc
+++ b/json2docx_docx2json/pycache/esay_docx_func.cpython-313.pyc
--- a/json2docx_docx2json/docx_to_json.py
+++ b/json2docx_docx2json/docx_to_json.py
@ -1,396 +0,0 @@
-import json
-from docx import Document
-from docx.oxml.shared import qn
-
-
-def docx_to_json(docx_path):
-    print(f"\n开始解析文档: {docx_path}")
-    doc = Document(docx_path)
-
-    result = []
-    para_index = 0
-    table_index = 0
-
-    print(f"文档包含 {len(doc.paragraphs)} 个段落和 {len(doc.tables)} 个表格")
-
-    for element in doc.element.body:
-        if element.tag.endswith('p'):
-            paragraph = doc.paragraphs[para_index]
-            print(f"\n处理段落 {para_index}: {paragraph.text[:50]}...")
-            para_data = process_paragraph(paragraph)
-            result.append({
-                "type": "text",
-                "content": para_data
-            })
-            para_index += 1
-        elif element.tag.endswith('tbl'):
-            table = doc.tables[table_index]
-            print(f"\n处理表格 {table_index} ({len(table.rows)}行×{len(table.columns)}列)")
-            table_data = process_table_with_merge_info(table)
-            result.append({
-                "type": "table",
-                "content": table_data
-                # 移除了bold属性，加粗信息现在由每个run单独记录
-            })
-            table_index += 1
-
-    print("\n文档解析完成!")
-    return result
-
-def process_table_with_merge_info(table):
-    """处理表格并包含合并信息、行高列宽和完整格式"""
-    table_data = {
-        "rows": len(table.rows),
-        "cols": len(table.columns),
-        "cells": [],
-        "merged_cells": [],
-        "row_heights": [None] * len(table.rows),
-        "col_widths": [None] * len(table.columns)
-    }
-
-    # 先处理所有单元格内容
-    cell_data_matrix = []
-    for i, row in enumerate(table.rows):
-        row_data = []
-        for j, cell in enumerate(row.cells):
-            cell_data = {
-                "row": i,
-                "col": j,
-                "content": process_cell_content(cell),
-                "alignment": get_cell_alignment(cell),  # 获取单元格对齐
-                "vertical_align": get_vertical_alignment(cell),  # 新增垂直对齐
-                "border": get_cell_border(cell),
-                "shading": get_cell_shading(cell),  # 新增背景色
-                "margins": get_cell_margins(cell),  # 新增边距
-                "is_merged": False,
-                "merge_info": None
-            }
-            row_data.append(cell_data)
-            
-            # 记录行高列宽（最后一个单元格时记录）
-            if j == len(row.cells) - 1 and row.height is not None:
-                table_data["row_heights"][i] = row.height.inches
-            if i == len(table.rows) - 1 and table.columns[j].width is not None:
-                table_data["col_widths"][j] = table.columns[j].width.inches
-                
-        cell_data_matrix.append(row_data)
-
-    # 检测合并单元格
-    merge_ranges = []
-    for i, row in enumerate(table.rows):
-        for j, cell in enumerate(row.cells):
-            # 跳过已经处理过的合并单元格
-            if any((i >= r1 and i <= r2 and j >= c1 and j <= c2) for (r1, r2, c1, c2) in merge_ranges):
-                continue
-
-            # 查找相同单元格地址的范围
-            r2 = i
-            c2 = j
-            
-            # 水平方向查找
-            while c2 + 1 < table_data["cols"] and table.rows[i].cells[c2 + 1]._tc is cell._tc:
-                c2 += 1
-            
-            # 垂直方向查找
-            while r2 + 1 < table_data["rows"] and table.rows[r2 + 1].cells[j]._tc is cell._tc:
-                r2 += 1
-            
-            # 如果找到合并区域
-            if r2 > i or c2 > j:
-                merge_ranges.append((i, r2, j, c2))
-                
-                # 更新主单元格信息
-                cell_data_matrix[i][j]["is_merged"] = True
-                cell_data_matrix[i][j]["merge_info"] = {
-                    "is_primary": True,
-                    "merge_range": f"{i},{j}-{r2},{c2}"
-                }
-                
-                # 添加到合并单元格列表
-                table_data["merged_cells"].append({
-                    "start_row": i,
-                    "start_col": j,
-                    "end_row": r2,
-                    "end_col": c2,
-                    "content": process_cell_content(cell)
-                })
-                
-                # 更新被合并单元格信息
-                for r in range(i, r2 + 1):
-                    for c in range(j, c2 + 1):
-                        if r != i or c != j:  # 跳过主单元格
-                            cell_data_matrix[r][c]["is_merged"] = True
-                            cell_data_matrix[r][c]["merge_info"] = {
-                                "is_primary": False,
-                                "merge_range": f"{i},{j}-{r2},{c2}"
-                            }
-
-    # 将处理后的单元格数据添加到结果中
-    table_data["cells"] = cell_data_matrix
-
-    return table_data
-
-def get_vertical_alignment(cell):
-    """获取单元格垂直对齐方式"""
-    try:
-        tcPr = cell._element.tcPr
-        if tcPr is not None:
-            vAlign = tcPr.find(qn('w:vAlign'))
-            if vAlign is not None:
-                align_map = {
-                    'top': 'top',
-                    'center': 'center',
-                    'bottom': 'bottom'
-                }
-                return align_map.get(vAlign.get(qn('w:val')), 'top')
-    except:
-        print("获取垂直对齐方式失败")
-        pass
-    return 'top'  # 默认顶部对齐
-
-def get_cell_shading(cell):
-    """获取单元格背景色"""
-    try:
-        tcPr = cell._element.tcPr
-        if tcPr is not None:
-            shading = tcPr.find(qn('w:shd'))
-            if shading is not None:
-                color = shading.get(qn('w:fill'))
-                if color:
-                    return {
-                        'color': color,
-                        'theme': shading.get(qn('w:themeColor'), '')
-                    }
-    except:
-        pass
-    return None
-
-def get_cell_margins(cell):
-    """获取单元格边距"""
-    margins = {}
-    try:
-        tcPr = cell._element.tcPr
-        if tcPr is not None:
-            for side in ['top', 'left', 'bottom', 'right']:
-                margin = tcPr.find(qn(f'w:tcMar/w:{side}'))
-                if margin is not None:
-                    margins[side] = {
-                        'w': margin.get(qn('w:w')),
-                        'type': margin.get(qn('w:type'))
-                    }
-    except:
-        pass
-    return margins if margins else None
-
-def process_cell_content(cell):
-    """处理单元格内容，直接调用段落处理函数"""
-    cell_content = []
-    for para in cell.paragraphs:
-        # 复用段落处理逻辑
-        para_data = process_paragraph(para)
-        cell_content.append(para_data)
-    return cell_content
-
-def has_page_break(run):
-    """检查run是否包含分页符"""
-    xml = run._element.xml
-    return ('w:br' in xml and 'type="page"' in xml) or '\x0c' in run.text
-
-def process_paragraph(paragraph):
-    para_data = {
-        "alignment": get_alignment_with_fallback(paragraph),
-        "runs": []
-    }
-
-    print(f"段落对齐方式: {para_data['alignment']}")
-
-    for run in paragraph.runs:
-        run_data = {
-            "text": run.text,
-            "font": get_font_info(run, paragraph),
-            "style": run.style.name if run.style else None,
-            "has_page_break": has_page_break(run)
-        }
-        para_data["runs"].append(run_data)
-
-    print(f"段落包含 {len(para_data['runs'])} 个文本运行(runs)")
-    return para_data
-
-def get_alignment_with_fallback(paragraph):
-    # 首先尝试直接从段落属性获取
-    try:
-        if paragraph.alignment is not None:
-            alignment_map = {
-                0: "left",
-                1: "center",
-                2: "right",
-                3: "justify"
-            }
-            result = alignment_map.get(paragraph.alignment, "left")
-            print(f"从paragraph.alignment获取对齐方式: {result}")
-            return result
-    except:
-        # 如果段落alignment为None，尝试从样式获取
-        try:
-            p_pr = paragraph.style.element.xpath('w:pPr')[0]
-            if p_pr.xpath('w:jc'):
-                jc_attr = p_pr.xpath('w:jc')[0].attrib
-                align_map = {
-                    "left": "left",
-                    "center": "center",
-                    "right": "right",
-                    "both": "justify",
-                    "start": "left",
-                    "end": "right"
-                }
-                result = align_map.get(jc_attr[qn('w:val')], "left")
-                print(f"从段落样式w:jc获取对齐方式: {result}")
-                return result
-        except Exception as e:
-            print(f"获取对齐方式失败: {str(e)}")
-
-    print("使用默认对齐方式(left)")
-    return "left"
-
-def get_font_info(run, paragraph):
-    font = run.font
-    font_info = {
-        "name": None,
-        "size": None,
-        "bold": font.bold if font.bold is not None else False,  # 默认为False
-        "italic": font.italic,
-        "underline": font.underline,
-        "color": get_color_info(run, paragraph)
-    }
-
-    # 处理字体大小
-    if font.size:
-        font_info["size"] = font.size.pt
-    else:
-        try:
-            p_rpr = paragraph.style.element.xpath('w:rPr')[0]
-            if p_rpr.xpath('w:sz'):
-                sz_attr = p_rpr.xpath('w:sz')[0].attrib
-                font_info["size"] = int(sz_attr[qn('w:val')]) / 2  # 转换为pt
-                print(f"从段落样式获取字体大小: {font_info['size']}pt")
-        except Exception as e:
-            print(f"获取字体大小失败: {str(e)}")
-            font_info["size"] = 11  # 默认值
-
-    # 处理字体名称
-    if font.name:
-        font_info["name"] = font.name
-        print(f"从run.font获取字体: {font.name}")
-    else:
-        try:
-            p_rpr = paragraph.style.element.xpath('w:rPr')[0]
-            if p_rpr.xpath('w:rFonts'):
-                try:
-                    font_info["name"] = p_rpr.xpath('w:rFonts')[0].attrib[qn("w:eastAsia")]
-                    print(f"从段落样式w:eastAsia获取字体: {font_info['name']}")
-                except:
-                    font_info["name"] = p_rpr.xpath('w:rFonts')[0].attrib[qn("w:ascii")]
-                    print(f"从段落样式w:ascii获取字体: {font_info['name']}")
-        except Exception as e:
-            print(f"获取字体失败: {str(e)}")
-            font_info["name"] = "Calibri"  # 默认值
-
-    return font_info
-
-def get_color_info(run, paragraph):
-    """增强版颜色获取，优先从run获取，失败则从段落样式获取"""
-    color_info = None
-    
-    # 1. 首先尝试从run.font获取颜色
-    try:
-        if run.font.color and hasattr(run.font.color, 'rgb'):
-            color = run.font.color
-            color_info = {
-                "r": (color.rgb >> 16) & 0xff,
-                "g": (color.rgb >> 8) & 0xff,
-                "b": color.rgb & 0xff
-            }
-            print(f"从run.font获取颜色: RGB({color_info['r']}, {color_info['g']}, {color_info['b']})")
-    except Exception as e:
-        print(f"从run.font获取颜色失败: {str(e)}")
-    
-    # 2. 如果run颜色为空，尝试从段落样式中获取
-    if color_info is None:
-        try:
-            p_rpr = paragraph.style.element.xpath('w:rPr')[0]
-            if p_rpr.xpath('w:color'):
-                color_attr = p_rpr.xpath('w:color')[0].attrib
-                if 'w:val' in color_attr:
-                    hex_color = color_attr[qn('w:val')]
-                    if hex_color.startswith('FF'):
-                        hex_color = hex_color[2:]
-                    rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
-                    color_info = {
-                        "r": rgb[0],
-                        "g": rgb[1],
-                        "b": rgb[2]
-                    }
-                    print(f"从段落样式获取颜色: RGB{rgb}")
-        except Exception as e:
-            print(f"从段落样式获取颜色失败: {str(e)}")
-    
-    return color_info
-
-def get_cell_alignment(cell):
-    if cell.paragraphs:
-        return get_alignment_with_fallback(cell.paragraphs[0])
-    return "left"
-
-def get_cell_border(cell):
-    # 默认返回实线边框
-    default_border = {
-        "top": {"style": "single", "size": 4, "color": "000000"},
-        "bottom": {"style": "single", "size": 4, "color": "000000"},
-        "left": {"style": "single", "size": 4, "color": "000000"},
-        "right": {"style": "single", "size": 4, "color": "000000"}
-    }
-    
-    try:
-        # 尝试获取实际边框设置
-        tcPr = cell._element.tcPr
-        if tcPr is None:
-            return default_border
-            
-        borders = {}
-        for side in ['top', 'bottom', 'left', 'right']:
-            border = tcPr.xpath(f'w:tcBorders/w:{side}')
-            if border:
-                border = border[0]
-                border_style = border.get(qn('w:val'), 'single')
-                border_size = border.get(qn('w:sz'), '4')
-                border_color = border.get(qn('w:color'), '000000')
-                borders[side] = {
-                    "style": border_style,
-                    "size": int(border_size),
-                    "color": border_color
-                }
-            else:
-                borders[side] = default_border[side]
-        
-        return borders
-    except Exception as e:
-        print(f"获取单元格边框失败: {str(e)}, 使用默认边框")
-        return default_border
-
-def process_cell(cell):
-    cell_content = []
-    print(f"处理单元格，包含 {len(cell.paragraphs)} 个段落")
-
-    for para in cell.paragraphs:
-        cell_content.append(process_paragraph(para))
-
-    return cell_content
-
-if __name__ == "__main__":
-    docx_path = r'D:\work\报告扫描\source.docx'
-    json_data = docx_to_json(docx_path)
-
-    with open("output.json", "w", encoding="utf-8") as f:
-        json.dump(json_data, f, ensure_ascii=False, indent=2)
-
-    print("转换完成，结果已保存到output.json")
--- a/json2docx_docx2json/json_to_docx.py
+++ b/json2docx_docx2json/json_to_docx.py
@ -1,243 +0,0 @@
-from docx import Document
-from docx.shared import Pt, RGBColor
-from docx.enum.text import WD_ALIGN_PARAGRAPH
-from docx.oxml.shared import qn, OxmlElement
-import json
-
-def json_to_docx(json_data, output_path):
-    print(f"\n开始转换JSON到DOCX文档，输出路径: {output_path}")
-    doc = Document()
-    total_elements = len(json_data)
-    print(f"文档包含 {total_elements} 个元素(段落和表格)")
-    
-    for i, element in enumerate(json_data, 1):
-        print(f"\n处理元素 {i}/{total_elements}: ", end="")
-        if element["type"] == "text":
-            print(f"段落 (长度: {len(element['content']['runs'])}个runs)")
-            add_paragraph_from_json(doc, element["content"])
-        elif element["type"] == "table":
-            rows = element["content"]["rows"]
-            cols = element["content"]["cols"]
-            merges = len(element["content"].get("merged_cells", []))
-            print(f"表格 ({rows}行×{cols}列, 包含 {merges} 个合并单元格)")
-            add_table_from_json(doc, element["content"], element.get("bold", False))
-    
-    print("\n正在保存文档...")
-    doc.save(output_path)
-    print(f"文档已成功保存到 {output_path}")
-
-def add_paragraph_from_json(doc, para_json):
-    paragraph = doc.add_paragraph()
-    print(f"  添加段落 (对齐: {para_json['alignment']})")
-    
-    # 设置段落对齐方式
-    alignment_map = {
-        "left": WD_ALIGN_PARAGRAPH.LEFT,
-        "center": WD_ALIGN_PARAGRAPH.CENTER,
-        "right": WD_ALIGN_PARAGRAPH.RIGHT,
-        "justify": WD_ALIGN_PARAGRAPH.JUSTIFY
-    }
-    paragraph.alignment = alignment_map.get(para_json["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
-    
-    # 添加文本运行(runs)
-    for run_idx, run_json in enumerate(para_json["runs"], 1):
-        run = paragraph.add_run(run_json["text"])
-        if run_json["has_page_break"]:
-            import docx
-            run.add_break(docx.enum.text.WD_BREAK.PAGE)
-        font = run.font
-        
-        print(f"    添加run {run_idx}: '{run_json['text']}' "
-              f"(字体: {run_json['font']['name']}, 大小: {run_json['font']['size']}, "
-              f"加粗: {run_json['font']['bold']}, 斜体: {run_json['font']['italic']})")
-        
-        # 设置字体样式
-        if run_json["font"]["name"]:
-            font.name = run_json["font"]["name"]
-            run.element.rPr.rFonts.set(qn('w:eastAsia'), run_json["font"]["name"])
-        
-        if run_json["font"]["size"]:
-            font.size = Pt(run_json["font"]["size"])
-        
-        font.bold = run_json["font"]["bold"]
-        font.italic = run_json["font"]["italic"]
-        font.underline = run_json["font"]["underline"]
-        
-        # 设置字体颜色
-        if run_json["font"]["color"]:
-            color = run_json["font"]["color"]
-            font.color.rgb = RGBColor(color["r"], color["g"], color["b"])
-            print(f"      设置颜色: RGB({color['r']}, {color['g']}, {color['b']})")
-
-def add_table_from_json(doc, table_json, bold=False):
-    print(f"  创建表格: {table_json['rows']}行 × {table_json['cols']}列")
-    table = doc.add_table(rows=table_json["rows"], cols=table_json["cols"])
-    
-    # 设置表格样式为无网格线（我们将自定义边框）
-    table.style = 'Table Grid'
-    
-    # 设置列宽
-    if "col_widths" in table_json and any(table_json["col_widths"]):
-        print("  设置列宽...")
-        for col_idx, width in enumerate(table_json["col_widths"]):
-            if width is not None:
-                # 将英寸转换为Twips（1英寸=1440 Twips）
-                twips_width = int(width * 1440)
-                for cell in table.columns[col_idx].cells:
-                    tc = cell._tc
-                    tcPr = tc.get_or_add_tcPr()
-                    tcW = tcPr.first_child_found_in("w:tcW")
-                    if tcW is None:
-                        tcW = OxmlElement('w:tcW')
-                        tcPr.append(tcW)
-                    tcW.set(qn('w:w'), str(twips_width))
-                    tcW.set(qn('w:type'), 'dxa')  # 使用绝对单位
-    
-    # 设置行高
-    if "row_heights" in table_json and any(table_json["row_heights"]):
-        print("  设置行高...")
-        for row_idx, height in enumerate(table_json["row_heights"]):
-            if height is not None:
-                # 将英寸转换为Twips（1英寸=1440 Twips）
-                twips_height = int(height * 1440)
-                tr = table.rows[row_idx]._tr
-                trPr = tr.get_or_add_trPr()
-                trHeight = OxmlElement('w:trHeight')
-                trHeight.set(qn('w:val'), str(twips_height))
-                trHeight.set(qn('w:hRule'), 'atLeast')  # 或'exact'表示固定高度
-                trPr.append(trHeight)
-    
-    # 处理合并单元格
-    for merge_idx, merge_info in enumerate(table_json.get("merged_cells", []), 1):
-        start_row = merge_info["start_row"]
-        start_col = merge_info["start_col"]
-        end_row = merge_info["end_row"]
-        end_col = merge_info["end_col"]
-        
-        print(f"    合并单元格 #{merge_idx}: 从({start_row},{start_col})到({end_row},{end_col})")
-        
-        start_cell = table.cell(start_row, start_col)
-        end_cell = table.cell(end_row, end_col)
-        start_cell.merge(end_cell)
-    
-    # 填充表格内容
-    for row_idx, row_data in enumerate(table_json["cells"]):
-        for col_idx, cell_data in enumerate(row_data):
-            # 跳过被合并的非主单元格
-            if cell_data["is_merged"] and not cell_data["merge_info"]["is_primary"]:
-                print(f"    跳过被合并的单元格({row_idx},{col_idx})")
-                continue
-                
-            cell = table.cell(cell_data["row"], cell_data["col"])
-            print(f"    处理单元格({row_idx},{col_idx}) - 对齐: {cell_data['alignment']}")
-            format_cell(cell, cell_data)  # 统一设置单元格格式
-
-def format_cell(cell, cell_data):
-    """设置单元格完整格式"""
-    # 清空原有内容
-    for p in cell.paragraphs:
-        p._element.getparent().remove(p._element)
-    
-    # 添加内容
-    for para in cell_data["content"]:
-        add_paragraph_from_json(cell, para)
-    
-    # 设置对齐方式
-    set_cell_alignment(cell, cell_data)
-    
-    # 设置边框
-    set_cell_border(cell, cell_data["border"])
-    
-    # 设置背景色
-    if cell_data.get("shading"):
-        set_cell_shading(cell, cell_data["shading"])
-    
-    # 设置边距
-    if cell_data.get("margins"):
-        set_cell_margins(cell, cell_data["margins"])
-
-def set_cell_alignment(cell, cell_data):
-    """设置单元格对齐（水平和垂直）"""
-    # 水平对齐
-    if cell.paragraphs:
-        align_map = {
-            "left": WD_ALIGN_PARAGRAPH.LEFT,
-            "center": WD_ALIGN_PARAGRAPH.CENTER,
-            "right": WD_ALIGN_PARAGRAPH.RIGHT,
-            "justify": WD_ALIGN_PARAGRAPH.JUSTIFY
-        }
-        cell.paragraphs[0].alignment = align_map.get(cell_data["alignment"], WD_ALIGN_PARAGRAPH.LEFT)
-    
-    # 垂直对齐设置
-    tcPr = cell._tc.get_or_add_tcPr()
-    vAlign = OxmlElement('w:vAlign')
-    align_value = cell_data.get('vertical_align', 'top')
-    print(f"    设置垂直对齐: {align_value}")
-
-    # 确保使用有效的对齐值
-    valid_alignments = ['top', 'center', 'bottom']
-    if align_value not in valid_alignments:
-        align_value = 'top'  # 默认值
-
-    vAlign.set(qn('w:val'), align_value)
-    tcPr.append(vAlign)
-
-def set_cell_shading(cell, shading):
-    """设置单元格背景色"""
-    tcPr = cell._tc.get_or_add_tcPr()
-    shd = OxmlElement('w:shd')
-    shd.set(qn('w:fill'), shading["color"])
-    if shading.get("theme"):
-        shd.set(qn('w:themeColor'), shading["theme"])
-    tcPr.append(shd)
-
-def set_cell_margins(cell, margins):
-    """设置单元格边距"""
-    tcPr = cell._tc.get_or_add_tcPr()
-    tcMar = OxmlElement('w:tcMar')
-    
-    for side, margin in margins.items():
-        side_el = OxmlElement(f'w:{side}')
-        side_el.set(qn('w:w'), margin["w"])
-        side_el.set(qn('w:type'), margin["type"])
-        tcMar.append(side_el)
-    
-    tcPr.append(tcMar)
-
-def set_cell_border(cell, border_data):
-    """
-    设置单元格边框
-    :param cell: 单元格对象
-    :param border_data: 边框数据
-    """
-    tc = cell._tc
-    tcPr = tc.get_or_add_tcPr()
-    
-    # 检查是否存在边框元素，不存在则创建
-    tcBorders = tcPr.first_child_found_in("w:tcBorders")
-    if tcBorders is None:
-        tcBorders = OxmlElement('w:tcBorders')
-        tcPr.append(tcBorders)
-    
-    # 设置各边边框
-    for side in ['top', 'left', 'bottom', 'right']:
-        if side in border_data:
-            border = border_data[side]
-            border_el = OxmlElement(f'w:{side}')
-            border_el.set(qn('w:val'), border.get('style', 'single'))
-            border_el.set(qn('w:sz'), str(border.get('size', 4)))
-            border_el.set(qn('w:color'), border.get('color', '000000'))
-            tcBorders.append(border_el)
-
-# 使用示例
-if __name__ == "__main__":
-    # 假设我们已经有了之前生成的JSON数据
-    input_json = "output.json"
-    output_path = "restored.docx"
-    
-    print(f"从 {input_json} 读取JSON数据...")
-    with open(input_json, "r", encoding="utf-8") as f:
-        json_data = json.load(f)
-    
-    # 将JSON转换回DOCX
-    json_to_docx(json_data, output_path)
--- a/json2docx_docx2json/output.json
+++ b/json2docx_docx2json/output.json
--- a/json2docx_docx2json/restored.docx
+++ b/json2docx_docx2json/restored.docx
--- a/json2docx_docx2json/source.docx
+++ b/json2docx_docx2json/source.docx
--- a/json2docx_docx2json/查找分页符.py
+++ b/json2docx_docx2json/查找分页符.py
@ -1,41 +0,0 @@
-from docx import Document
-from docx.enum.text import WD_BREAK
-import docx.oxml.shared as oxml
-
-def find_and_mark_page_breaks(input_path, output_path):
-    """
-    功能：检测文档中的分页符并在原位置添加标记
-    
-    参数：
-        input_path: 输入文档路径
-        output_path: 输出文档路径
-    """
-    doc = Document(input_path)
-    
-    # 遍历所有段落
-    for paragraph in doc.paragraphs:
-        # 遍历段落中的所有runs
-        for run in paragraph.runs:
-            # 检查run的XML中是否包含分页符
-            if has_page_break(run):
-                print(f"发现分页符 - 段落内容: '{paragraph.text}'")
-                
-                # 在原位置添加可见标记（可选）
-                run.text = run.text.replace("\x0c", "[PAGE BREAK]")
-                
-                # 如果要保留原分页符并添加新分页符
-                run.add_break(WD_BREAK.PAGE)
-    
-    # 保存修改后的文档
-    doc.save(output_path)
-    print(f"处理完成，结果已保存到: {output_path}")
-
-def has_page_break(run):
-    """检查run是否包含分页符"""
-    xml = run._element.xml
-    return ('w:br' in xml and 'type="page"' in xml) or '\x0c' in run.text
-
-# 使用示例
-input_file = "source.docx"
-output_file = "output_with_marks.docx"
-find_and_mark_page_breaks(input_file, output_file)
--- a/json2docx_docx2json/（1122016001）[2025]D-3A0010三峡新能源海上风电运维江苏有限公司牟平海上风电场雷电防护装置检测91320982MA1T6N9LXJ
+++ b/json2docx_docx2json/（1122016001）[2025]D-3A0010三峡新能源海上风电运维江苏有限公司牟平海上风电场雷电防护装置检测91320982MA1T6N9LXJ