from docx import Document from docx.enum.text import WD_BREAK import docx.oxml.shared as oxml def find_and_mark_page_breaks(input_path, output_path): """ 功能:检测文档中的分页符并在原位置添加标记 参数: input_path: 输入文档路径 output_path: 输出文档路径 """ doc = Document(input_path) # 遍历所有段落 for paragraph in doc.paragraphs: # 遍历段落中的所有runs for run in paragraph.runs: # 检查run的XML中是否包含分页符 if has_page_break(run): print(f"发现分页符 - 段落内容: '{paragraph.text}'") # 在原位置添加可见标记(可选) run.text = run.text.replace("\x0c", "[PAGE BREAK]") # 如果要保留原分页符并添加新分页符 run.add_break(WD_BREAK.PAGE) # 保存修改后的文档 doc.save(output_path) print(f"处理完成,结果已保存到: {output_path}") def has_page_break(run): """检查run是否包含分页符""" xml = run._element.xml return ('w:br' in xml and 'type="page"' in xml) or '\x0c' in run.text # 使用示例 input_file = "source.docx" output_file = "output_with_marks.docx" find_and_mark_page_breaks(input_file, output_file)