第一次提交

This commit is contained in:
Voge1imkafig 2025-07-02 16:02:36 +08:00
commit 379354958a
20 changed files with 969 additions and 0 deletions

Binary file not shown.

16
docx_merge_win32com.log Normal file
View File

@ -0,0 +1,16 @@
2025-07-02 16:00:16,465 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-02 16:00:16,496 - ERROR - 文档合并失败: (-2147352567, '发生意外。', (0, 'Kingsoft WPS', '文档打开失败。', '', 3010, -786427), None)
Traceback (most recent call last):
File "c:\Users\VogelimKafig\Desktop\金风模板\tools\win32_total_copy.py", line 43, in merge_documents
doc1 = self.word_app.Documents.Open(str(doc1_path))
File "<COMObject <unknown>>", line 5, in Open
pywintypes.com_error: (-2147352567, '发生意外。', (0, 'Kingsoft WPS', '文档打开失败。', '', 3010, -786427), None)
2025-07-02 16:00:16,528 - INFO - 已清理资源
2025-07-02 16:01:03,544 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-02 16:01:03,581 - ERROR - 文档合并失败: (-2147352567, '发生意外。', (0, 'Kingsoft WPS', '文档打开失败。', '', 3010, -786427), None)
Traceback (most recent call last):
File "c:\Users\VogelimKafig\Desktop\金风模板\tools\win32_total_copy.py", line 43, in merge_documents
doc1 = self.word_app.Documents.Open(str(doc1_path))
File "<COMObject <unknown>>", line 5, in Open
pywintypes.com_error: (-2147352567, '发生意外。', (0, 'Kingsoft WPS', '文档打开失败。', '', 3010, -786427), None)
2025-07-02 16:01:03,620 - INFO - 已清理资源

169
generate_jingfeng.py Normal file
View File

@ -0,0 +1,169 @@
from tools.win32_total_copy import WordDocumentMerger
import os, logging
from docx import Document
from tools.json_process import merge_info
from tools.docx_tools import search_and_replace
def main(base_info, baogao_info):
DEFAULT_BASE_INFO = { #项目基本信息
#目录
'picture_dir': "", #图片存放地址
'shengcheng_dir': os.getcwd(), #工作路径(报告生成的路径、报告模板存放的路径)
'muban_dir': os.getcwd(), #文档模板存放路径
#项目概况
'jituan_jianxie': '甲方集团',
'jia_company_name': '甲方公司名',
'kehu_company_name': '客户名称',
'kehu_fuzeren': '客户负责人',
'kehu_phone': '18xxxxxxxxxx',
'shigong_company_name': '施工单位',
'jizu_num': '项目规格(台)',
'zhengji_changjia': '整机厂家',
'yepian_changjia': '叶片厂家', #叶片厂家信息
'fengchang_name': '风场名称',
'fengchang_location': '风场位置',
'shigong_date': '施工日期(开始~结束)',
'jizu_type': '机组型号',
'yepian_type': '叶片型号',
'jiancha_fangshi': '作业无人机近距离外观检查',
#乙方信息
'yi_Company': '乙方公司名',
'fuzeren': '甲方负责人',
'phone_fuzeren': '188xxxxxxxx',
}
try:
base_info = merge_info(base_info, DEFAULT_BASE_INFO)
logger = logging.getLogger(base_info['shengcheng_dir'] + __name__)
picture_dir = base_info['picture_dir']
shengcheng_dir = base_info['shengcheng_dir']
muban_dir = base_info['muban_dir']
jituan_jianxie = base_info['jituan_jianxie']
jia_company_name = base_info['jia_company_name']
kehu_company_name = base_info['kehu_company_name']
kehu_fuzeren = base_info['kehu_fuzeren']
kehu_phone = base_info['kehu_phone']
shigong_company_name = base_info['shigong_company_name']
jizu_num = base_info['jizu_num']
zhengji_changjia = base_info['zhengji_changjia']
yepian_changjia = base_info['yepian_changjia']
fengchang_name = base_info['fengchang_name']
fengchang_location = base_info['fengchang_location']
shigong_date = base_info['shigong_date']
jizu_type = base_info['jizu_type']
yepian_type = base_info['yepian_type']
jiancha_fangshi = base_info['jiancha_fangshi']
yi_Company = base_info['yi_Company']
fuzeren = base_info['fuzeren']
phone_fuzeren = base_info['phone_fuzeren']
except Exception as e:
logger.error(f"获取项目基本信息失败:{e}")
return
DEFAULT_BAOGAO_INFO = {
#当前检查报告基本内容
'jizu_bianhao': 'H3-08#', #检查的机组编号
'baogao_date': '2021年12月10日',
#检查方案
'jiancha_renyuan': '张三,李四,王五', #检查人员
#检查信息
'waibu_jiancha': 'True', #是否包含外部检查
'neibu_jiancha': 'True', #是否包含内部检查
'fanglei_jiancha': 'True', #是否包含防雷检查 #注:防雷检测占不存放缺陷图
'jiancha_location': '叶片外部外观', #检查内容文字
'jiancha_fangshi': '作业无人机近距离外观检查', #检查方式文字
#报告处理信息
'yezhu_renyuan': '李四', #业主(人员)
'changjia_renyuan': '王五', #厂家(人员)
'date_process': '生成报告人员', #数据处理人员 吴总希望获取前端用户执行生成人员为这个人
'baogao_bianzhi': '生成报告人员', #报告编制人员 吴总希望获取前端用户执行生成人员为这个人
'baogao_shenghe': '待填写(人员)', #报告审核人员
'shenghe_date': '待填写(日期)', #报告审核日期
#检查情况汇总表(文字信息) 前端根据是否包含对应部分检查自行确定检查内容,这里默认全部包含
'Y1_jiancha_neirong': '''1.叶片前缘、后缘、PS面、SS面
2.叶片内部导雷卡腹板透光人孔盖版叶根盖板...
3.轮毂至塔基导通内部导线线阻外部导线线阻...''',
'Y2_jiancha_neirong': '''1.叶片前缘、后缘、PS面、SS面
2.叶片内部导雷卡腹板透光人孔盖版叶根盖板...
3.轮毂至塔基导通内部导线线阻外部导线线阻...''',
'Y3_jiancha_neirong': '''1.叶片前缘、后缘、PS面、SS面
2.叶片内部导雷卡腹板透光人孔盖版叶根盖板...
3.轮毂至塔基导通内部导线线阻外部导线线阻...''',
#报告总结
'baogao_zongjie': '''1、因海上风电叶片运行环境恶劣、空气盐碱度高叶片前缘合模缝区域及PS面迎风面涂层易受腐蚀建议定期观察维护。
2经无人机近距离外观检查发现H3-08#机位Y200220AF叶片PS面距叶根20m处发现一处裂纹损伤长度轴向3m该缺陷经我方判定为严重缺陷建议尽快结安排对该机组停机并结合其他检查手段如人工打磨进一步勘查并决定维修处置方案防止风险进一步升级。
3经无人机近距离外观检查未发现H3-08#机位Y200249AF、Y200250AF叶片有明显影响机组正常运行的外部缺陷。
''', #报告总结文字
'pizhun_renyuan': '王五' #批准人
}
try:
baogao_info = merge_info(baogao_info, DEFAULT_BAOGAO_INFO)
jizu_bianhao = baogao_info['jizu_bianhao']
baogao_date = baogao_info['baogao_date']
jiancha_fangshi = baogao_info['jiancha_fangshi']
bianzhi_renyuan = baogao_info['baogao_bianzhi']
baogao_shenghe = baogao_info['baogao_shenghe']
pizhun_renyuan = baogao_info['pizhun_renyuan']
jiancha_renyuan = baogao_info['jiancha_renyuan']
except Exception as e:
logger.error(f"获取报告基本信息失败:{e}")
return
list_to_replace = {
'jia_company_name' : jia_company_name,
'fengchang_name' : fengchang_name,
'jizu_hao' : jizu_bianhao,
'bianzhi_renyuan': bianzhi_renyuan,
'shenghe_renyuan': baogao_shenghe,
'pizhun_renyuan': pizhun_renyuan,
'bianzhi_riqi': baogao_date,
'jia_company_name1' : jia_company_name,
'fengchang_name1' : fengchang_name,
'jizu_hao1' : jizu_bianhao,
'fengchang_name2' : fengchang_name,
'fengchang_location' : fengchang_location,
'kehu_company' : kehu_company_name,
'xiangmuguige' : jizu_num,
'kehu_fuzeren' : kehu_fuzeren,
'yezhu_phone' : kehu_phone,
'shigong_company' : shigong_company_name,
'shigong_date' : shigong_date,
'shigong_fuzeren' : fuzeren,
'shigong_phone' : phone_fuzeren,
'zhengji_changjia' : zhengji_changjia,
'yepian_changjia' : yepian_changjia,
'jizu_type' : jizu_type,
'yepian_type' : yepian_type,
'jiancha_fangshi' : jiancha_fangshi,
'jiancha_renyuan' : jiancha_renyuan
}
output_filename = os.path.join(shengcheng_dir, f"{jizu_bianhao}_{baogao_date}.docx")
merger = WordDocumentMerger()
try:
merger.merge_documents(os.path.join(muban_dir, f"empty.docx"), os.path.join(muban_dir, 'jingfeng_1.docx'), output_filename)
for find_text, replace_text in list_to_replace.items():
print(search_and_replace(output_filename, find_text, replace_text))
except Exception as e:
logger.error(f"合并文档失败:{e}")
return
if __name__ == '__main__':
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('docx_merge_win32com.log'),
logging.StreamHandler()
]
)
base_info = {
"muban_dir" : r"C:\Users\VogelimKafig\Desktop\金风模板\source\muban",
"shengcheng_dir" : r"C:\Users\VogelimKafig\Desktop\金风模板\output"
}
main(None, None)

BIN
json格式.docx Normal file

Binary file not shown.

View File

@ -0,0 +1,61 @@
2025-07-01 16:57:19,386 - ERROR - 获取项目基本信息失败:'NoneType' object is not subscriptable
2025-07-01 17:00:03,548 - ERROR - 获取项目基本信息失败:'NoneType' object is not subscriptable
2025-07-01 17:02:01,727 - ERROR - 获取项目基本信息失败:'NoneType' object is not subscriptable
2025-07-01 17:02:21,161 - ERROR - 获取项目基本信息失败:'NoneType' object is not subscriptable
2025-07-01 17:03:55,620 - ERROR - 获取报告基本信息失败:'NoneType' object has no attribute 'items'
2025-07-01 17:05:08,651 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:05:08,938 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx
2025-07-01 17:05:09,039 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:05:09,128 - INFO - 已插入第二个文档内容
2025-07-01 17:05:09,233 - INFO - 已合并页眉页脚
2025-07-01 17:05:09,308 - INFO - 已修复所有页眉边框线
2025-07-01 17:05:09,404 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_1.docx
2025-07-01 17:05:09,514 - INFO - 已清理资源
2025-07-01 17:07:07,730 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:07:08,054 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx
2025-07-01 17:07:08,131 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:07:08,183 - INFO - 已插入第二个文档内容
2025-07-01 17:07:08,320 - INFO - 已合并页眉页脚
2025-07-01 17:07:08,350 - INFO - 已修复所有页眉边框线
2025-07-01 17:07:08,515 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_2.docx
2025-07-01 17:07:08,600 - INFO - 已清理资源
2025-07-01 17:13:05,392 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:13:05,702 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日.docx
2025-07-01 17:13:05,745 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:13:05,765 - INFO - 已插入第二个文档内容
2025-07-01 17:13:05,884 - INFO - 已合并页眉页脚
2025-07-01 17:13:05,931 - INFO - 已修复所有页眉边框线
2025-07-01 17:13:05,999 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_1.docx
2025-07-01 17:13:06,089 - INFO - 已清理资源
2025-07-01 17:15:02,491 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:15:02,637 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:15:02,701 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:15:02,758 - INFO - 已插入第二个文档内容
2025-07-01 17:15:02,844 - INFO - 已合并页眉页脚
2025-07-01 17:15:02,872 - INFO - 已修复所有页眉边框线
2025-07-01 17:15:02,925 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_2.docx
2025-07-01 17:15:03,024 - INFO - 已清理资源
2025-07-01 17:15:42,731 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:15:42,882 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:15:42,951 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:15:43,007 - INFO - 已插入第二个文档内容
2025-07-01 17:15:43,096 - INFO - 已合并页眉页脚
2025-07-01 17:15:43,126 - INFO - 已修复所有页眉边框线
2025-07-01 17:15:43,184 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_3.docx
2025-07-01 17:15:43,294 - INFO - 已清理资源
2025-07-01 17:20:31,277 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:20:31,419 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:20:31,494 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:20:31,575 - INFO - 已插入第二个文档内容
2025-07-01 17:20:31,670 - INFO - 已合并页眉页脚
2025-07-01 17:20:31,705 - INFO - 已修复所有页眉边框线
2025-07-01 17:20:31,761 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_4.docx
2025-07-01 17:20:31,892 - INFO - 已清理资源
2025-07-01 17:21:28,795 - INFO - 开始合并文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx 和 C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:21:28,952 - INFO - 已打开第一个文档: C:\Users\VogelimKafig\Desktop\金风模板\empty.docx
2025-07-01 17:21:29,020 - INFO - 已打开第二个文档: C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_1.docx
2025-07-01 17:21:29,074 - INFO - 已插入第二个文档内容
2025-07-01 17:21:29,162 - INFO - 已合并页眉页脚
2025-07-01 17:21:29,193 - INFO - 已修复所有页眉边框线
2025-07-01 17:21:29,249 - INFO - 已保存合并文档: C:\Users\VogelimKafig\Desktop\金风模板\H3-08#_2021年12月10日_5.docx
2025-07-01 17:21:29,340 - INFO - 已清理资源

BIN
output/output.docx Normal file

Binary file not shown.

BIN
source/muban/empty.docx Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
source/muban/quexian.docx Normal file

Binary file not shown.

Binary file not shown.

BIN
source/muban/use_tools.docx Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

58
tools/docx_tools.py Normal file
View File

@ -0,0 +1,58 @@
from docx import Document
def search_and_replace(filename: str, find_text: str, replace_text: str) -> str:
"""替换所有find_text为replace_text
Args:
filename: Path to the Word document
find_text: Text to search for
replace_text: Text to replace with
"""
try:
doc = Document(filename)
# Perform find and replace
count = find_and_replace_text(doc, find_text, replace_text)
if count > 0:
doc.save(filename)
return f"Replaced {count} occurrence(s) of '{find_text}' with '{replace_text}'."
else:
return f"No occurrences of '{find_text}' found."
except Exception as e:
return f"Failed to search and replace: {str(e)}"
def find_and_replace_text(doc, old_text, new_text):
"""
Find and replace text throughout the document.
Args:
doc: Document object
old_text: Text to find
new_text: Text to replace with
Returns:
Number of replacements made
"""
count = 0
# Search in paragraphs
for para in doc.paragraphs:
if old_text in para.text:
for run in para.runs:
if old_text in run.text:
run.text = run.text.replace(old_text, new_text)
count += 1
# Search in tables
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for para in cell.paragraphs:
if old_text in para.text:
for run in para.runs:
if old_text in run.text:
run.text = run.text.replace(old_text, new_text)
count += 1
return count

27
tools/json_process.py Normal file
View File

@ -0,0 +1,27 @@
def merge_info(frontend_info, default_info):
"""
合并前端传入的 info 和默认 info
规则如果前端传入的值为空None 或空字符串则使用默认值
Args:
frontend_info: 前端传入的字典
default_info: 默认的完整字典
Returns:
合并后的完整字典
"""
if not isinstance(frontend_info, dict) or frontend_info is None:
return default_info
merged_info = {}
for key, default_value in default_info.items():
# 获取前端传入的值
frontend_value = frontend_info.get(key)
# 判断前端值是否为空None 或空字符串)
if frontend_value is None or frontend_value == "":
merged_info[key] = default_value
else:
merged_info[key] = frontend_value
return merged_info

465
tools/total_copy_docx.py Normal file
View File

@ -0,0 +1,465 @@
import os
import zipfile
import shutil
import logging
from tempfile import mkdtemp
from lxml import etree
from copy import deepcopy
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('docx_merge_advanced.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
class AdvancedDocxMerger:
def __init__(self, doc1_path, doc2_path, output_path):
self.doc1_path = doc1_path
self.doc2_path = doc2_path
self.output_path = output_path
self.temp_dir = mkdtemp(prefix='docx_merge_')
self.doc1_dir = os.path.join(self.temp_dir, "doc1")
self.doc2_dir = os.path.join(self.temp_dir, "doc2")
self.merged_dir = os.path.join(self.temp_dir, "merged")
# XML命名空间
self.ns = {
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'
}
# XML解析器配置
self.parser = etree.XMLParser(remove_blank_text=True)
logger.info(f"初始化合并器,临时目录: {self.temp_dir}")
def _extract_docx(self):
"""解压两个Word文档到临时目录"""
logger.info("开始解压文档...")
try:
os.makedirs(self.doc1_dir, exist_ok=True)
os.makedirs(self.doc2_dir, exist_ok=True)
with zipfile.ZipFile(self.doc1_path, 'r') as zip_ref:
zip_ref.extractall(self.doc1_dir)
logger.info(f"解压 {self.doc1_path} 完成")
with zipfile.ZipFile(self.doc2_path, 'r') as zip_ref:
zip_ref.extractall(self.doc2_dir)
logger.info(f"解压 {self.doc2_path} 完成")
return True
except Exception as e:
logger.error(f"解压失败: {str(e)}")
return False
def _prepare_merged_dir(self):
"""准备合并目录初始复制doc1的全部内容"""
logger.info("准备合并目录...")
try:
# 先完整复制doc1作为基础
shutil.copytree(self.doc1_dir, self.merged_dir)
# 记录所有已存在的文件
self.existing_files = set()
for root, _, files in os.walk(self.merged_dir):
for file in files:
rel_path = os.path.relpath(os.path.join(root, file), self.merged_dir)
self.existing_files.add(rel_path.replace("\\", "/"))
logger.info(f"初始合并目录准备完成,已有 {len(self.existing_files)} 个文件")
return True
except Exception as e:
logger.error(f"准备合并目录失败: {str(e)}")
return False
def _is_xml_file(self, filename):
"""判断是否为XML文件"""
return filename.endswith('.xml') or filename.endswith('.rels')
def _merge_styles(self, root1, root2):
"""合并样式表"""
try:
# 收集现有样式ID
existing_style_ids = {elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}styleId')
for elem in root1 if elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}styleId')}
# 合并新样式
added = 0
for style in root2:
style_id = style.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}styleId')
if style_id and style_id not in existing_style_ids:
root1.append(deepcopy(style))
existing_style_ids.add(style_id)
added += 1
logger.debug(f"添加了 {added} 个新样式")
return True
except Exception as e:
logger.error(f"合并样式失败: {str(e)}")
return False
def _merge_numbering(self, root1, root2):
"""合并编号列表"""
try:
# 收集现有编号ID
existing_num_ids = {num.get('numId') for num in root1.xpath('//w:num', namespaces=self.ns)}
# 合并新编号
added = 0
for num in root2.xpath('//w:num', namespaces=self.ns):
num_id = num.get('numId')
if num_id and num_id not in existing_num_ids:
root1.append(deepcopy(num))
existing_num_ids.add(num_id)
added += 1
logger.debug(f"添加了 {added} 个新编号")
return True
except Exception as e:
logger.error(f"合并编号失败: {str(e)}")
return False
def _merge_notes(self, root1, root2, note_type="footnote"):
"""合并脚注/尾注"""
try:
# 获取现有最大ID
max_id = max((int(note.get('id', 0)) for note in root1.xpath(f'//w:{note_type}', namespaces=self.ns)) if root1.xpath(f'//w:{note_type}', namespaces=self.ns) else 0)
# 合并新脚注/尾注
added = 0
for note in root2.xpath(f'//w:{note_type}', namespaces=self.ns):
max_id += 1
new_note = deepcopy(note)
new_note.set('id', str(max_id))
root1.append(new_note)
added += 1
logger.debug(f"添加了 {added} 个新{note_type}")
return True
except Exception as e:
logger.error(f"合并{note_type}失败: {str(e)}")
return False
def _merge_header_footer(self, root1, root2):
"""合并页眉页脚内容"""
try:
# # 简单追加所有内容
# for elem in root2:
# root1.append(deepcopy(elem))
return True
except Exception as e:
logger.error(f"合并页眉页脚失败: {str(e)}")
return False
def _merge_settings(self, root1, root2):
"""合并文档设置,智能整合两个文档的配置"""
try:
# 需要合并的设置项及其处理策略
merge_strategies = {
# 页面设置
'w:defaultTabStop': 'max', # 取较大的制表位宽度
'w:autoHyphenation': 'or', # 任一文档启用则启用
'w:consecutiveHyphenLimit': 'max', # 取较大的连字符限制
# 兼容性设置
'w:compat': 'merge', # 合并兼容性设置
'w:useFELayout': 'or', # 任一文档启用则启用
# 修订跟踪
'w:trackRevisions': 'or', # 任一文档启用则启用
'w:doNotTrackMoves': 'and', # 两文档都启用才启用
# 其他重要设置
'w:zoom': 'doc1', # 使用第一个文档的缩放设置
'w:mirrorMargins': 'or', # 任一文档启用则启用
}
# 记录合并的更改
changes = []
# 处理每个设置项
for setting in root2:
# 获取设置项名称
tag = setting.tag.split('}')[1] if '}' in setting.tag else setting.tag
# 跳过不需要处理的设置
if tag not in merge_strategies:
continue
strategy = merge_strategies[tag]
existing = root1.xpath(f'//w:{tag}', namespaces=self.ns)
if not existing:
# 如果doc1没有该设置直接添加
root1.append(deepcopy(setting))
changes.append(f"添加 {tag} = {setting.get('val', '')}")
else:
# 根据策略合并设置
existing_setting = existing[0]
if strategy == 'max':
val1 = float(existing_setting.get('val', 0))
val2 = float(setting.get('val', 0))
if val2 > val1:
existing_setting.set('val', str(val2))
changes.append(f"更新 {tag} 为较大值: {val1}{val2}")
elif strategy == 'or':
if setting.get('val') == '1' and existing_setting.get('val') != '1':
existing_setting.set('val', '1')
changes.append(f"启用 {tag}")
elif strategy == 'and':
if setting.get('val') != '1' and existing_setting.get('val') == '1':
existing_setting.set('val', '0')
changes.append(f"禁用 {tag} (因doc2禁用)")
elif strategy == 'merge' and tag == 'w:compat':
# 合并兼容性设置
for child in setting:
if not root1.xpath(f'//w:compat/w:{child.tag.split("}")[1]}', namespaces=self.ns):
existing_setting.append(deepcopy(child))
changes.append(f"添加兼容性设置 {child.tag.split('}')[1]}")
# 特殊处理:文档保护设置
doc_protection1 = root1.xpath('//w:documentProtection', namespaces=self.ns)
doc_protection2 = root2.xpath('//w:documentProtection', namespaces=self.ns)
if doc_protection2 and not doc_protection1:
root1.append(deepcopy(doc_protection2[0]))
changes.append("添加文档保护设置")
# 特殊处理:拼写和语法检查设置
proof_state1 = root1.xpath('//w:proofState', namespaces=self.ns)
proof_state2 = root2.xpath('//w:proofState', namespaces=self.ns)
if proof_state2:
if proof_state1:
# 合并拼写检查状态
for attr in ['spelling', 'grammar']:
if proof_state2[0].get(attr) == 'clean' and proof_state1[0].get(attr) != 'clean':
proof_state1[0].set(attr, 'clean')
changes.append(f"更新 {attr} 检查状态为 clean")
else:
root1.append(deepcopy(proof_state2[0]))
changes.append("添加拼写检查设置")
if changes:
logger.info(f"合并文档设置,进行了 {len(changes)} 处更改:\n - " + "\n - ".join(changes))
else:
logger.info("文档设置无需更改,保留第一个文档的设置")
return True
except Exception as e:
logger.error(f"合并文档设置失败: {str(e)}")
return False
def _merge_relationships(self, root1, root2):
"""合并关系文件"""
try:
# 收集所有现有的关系ID
existing_ids = {rel.get('Id') for rel in root1}
# 找出最大的rId值
max_id = 0
for rel in root1:
if rel.get('Id', '').startswith('rId'):
try:
current_id = int(rel.get('Id')[3:])
if current_id > max_id:
max_id = current_id
except ValueError:
pass
# 合并关系
added = 0
for rel in root2:
rel_id = rel.get('Id')
if rel_id not in existing_ids:
# 生成新的唯一ID
max_id += 1
new_id = f"rId{max_id}"
rel.set('Id', new_id)
root1.append(deepcopy(rel))
added += 1
logger.debug(f"添加了 {added} 个新关系")
return True
except Exception as e:
logger.error(f"合并关系失败: {str(e)}")
return False
def _merge_xml_files(self, file1, file2, output_file):
"""合并两个XML文件"""
try:
# 特殊处理settings.xml和rels文件
if 'settings.xml' in output_file:
if not os.path.exists(file2):
return True
tree1 = etree.parse(file1, self.parser)
tree2 = etree.parse(file2, self.parser)
return self._merge_settings(tree1.getroot(), tree2.getroot())
if '_rels' in output_file and output_file.endswith('.rels'):
if not os.path.exists(file2):
return True
tree1 = etree.parse(file1, self.parser)
tree2 = etree.parse(file2, self.parser)
return self._merge_relationships(tree1.getroot(), tree2.getroot())
# 其他XML文件合并逻辑
if not os.path.exists(file2):
return True
tree1 = etree.parse(file1, self.parser)
tree2 = etree.parse(file2, self.parser)
root1 = tree1.getroot()
root2 = tree2.getroot()
# 特殊处理不同类型的XML文件
if 'document.xml' in output_file:
self._merge_document_content(root1, root2)
elif 'styles.xml' in output_file:
self._merge_styles(root1, root2)
elif 'footnotes.xml' in output_file:
self._merge_notes(root1, root2, "footnote")
elif 'endnotes.xml' in output_file:
self._merge_notes(root1, root2, "endnote")
elif 'numbering.xml' in output_file:
self._merge_numbering(root1, root2)
elif 'header' in output_file or 'footer' in output_file:
self._merge_header_footer(root1, root2)
else:
# 默认合并策略:追加所有子元素
for child in root2:
root1.append(deepcopy(child))
# 保存合并后的XML
tree1.write(output_file, encoding='UTF-8', xml_declaration=True)
logger.debug(f"成功合并XML文件: {output_file}")
return True
except Exception as e:
logger.error(f"合并XML文件失败 {output_file}: {str(e)}")
return False
def _merge_document_content(self, root1, root2):
"""合并文档正文内容"""
try:
body1 = root1.xpath("//w:body", namespaces=self.ns)
body2 = root2.xpath("//w:body", namespaces=self.ns)
if not body1 or not body2:
logger.warning("文档缺少body元素")
return
body1 = body1[0]
body2 = body2[0]
# 在合并前添加分节符保持格式
sect_prs = body1.xpath(".//w:sectPr", namespaces=self.ns)
if sect_prs:
sect_pr = sect_prs[-1]
new_p = etree.Element("{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p")
new_p.append(deepcopy(sect_pr))
body1.append(new_p)
# 合并内容
for elem in body2:
body1.append(deepcopy(elem))
except Exception as e:
logger.error(f"合并文档内容失败: {str(e)}")
raise
def _deep_merge_docx(self):
"""深度合并两个文档的所有文件"""
logger.info("开始深度合并文档...")
# 遍历doc2的所有文件
for root, _, files in os.walk(self.doc2_dir):
for file in files:
src_file = os.path.join(root, file)
rel_path = os.path.relpath(src_file, self.doc2_dir)
dest_file = os.path.join(self.merged_dir, rel_path)
# 标准化路径比较
norm_rel_path = rel_path.replace("\\", "/")
if norm_rel_path not in self.existing_files:
# 新文件,直接复制
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
shutil.copy2(src_file, dest_file)
logger.debug(f"复制新文件: {norm_rel_path}")
else:
# 已存在文件,判断是否需要合并
if self._is_xml_file(file):
# XML文件需要合并
existing_file = os.path.join(self.merged_dir, rel_path)
if os.path.exists(existing_file):
if not self._merge_xml_files(existing_file, src_file, dest_file):
logger.warning(f"合并失败,保留原文件: {norm_rel_path}")
else:
# 非XML文件保留原文件
logger.debug(f"文件已存在,跳过: {norm_rel_path}")
logger.info("深度合并完成")
return True
def _repack_docx(self):
"""重新打包为新的docx文件"""
logger.info("开始重新打包文档...")
try:
with zipfile.ZipFile(self.output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, _, files in os.walk(self.merged_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, self.merged_dir)
zipf.write(file_path, arcname)
logger.info(f"成功创建合并文档: {self.output_path}")
return True
except Exception as e:
logger.error(f"重新打包失败: {str(e)}")
return False
def _cleanup(self):
"""清理临时文件"""
try:
shutil.rmtree(self.temp_dir)
logger.info("已清理临时文件")
except Exception as e:
logger.warning(f"清理临时文件失败: {str(e)}")
def merge(self):
"""执行合并流程"""
logger.info(f"开始合并文档: {self.doc1_path} + {self.doc2_path} -> {self.output_path}")
if not self._extract_docx():
return False
if not self._prepare_merged_dir():
return False
if not self._deep_merge_docx():
return False
if not self._repack_docx():
return False
self._cleanup()
logger.info("文档合并成功完成!")
return True
if __name__ == "__main__":
# 使用示例
merger = AdvancedDocxMerger(
doc1_path="jingfeng_fengmian1.docx",
doc2_path="quexian.docx",
output_path="merged_document.docx"
)
if merger.merge():
print("合并成功!")
else:
print("合并过程中出现错误,请查看日志文件。")

173
tools/win32_total_copy.py Normal file
View File

@ -0,0 +1,173 @@
import win32com.client as win32
import pythoncom
import logging
from pathlib import Path
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('docx_merge_win32com.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# Word常量定义如果无法从constants模块获取
class WordConstants:
wdCollapseEnd = 0
wdSectionBreakNextPage = 2
wdHeaderFooterPrimary = 1
wdHeaderFooterFirstPage = 2
wdHeaderFooterEvenPages = 3
wdBorderBottom = -3 # 下边框常量
wdLineStyleSingle = 1 # 单线样式
class WordDocumentMerger:
def __init__(self):
# 初始化COM对象
pythoncom.CoInitialize()
self.word_app = win32.DispatchEx('Word.Application')
self.word_app.Visible = False
self.word_app.DisplayAlerts = False
self.constants = WordConstants()
def merge_documents(self, doc1_path, doc2_path, output_path):
"""合并两个Word文档"""
doc1 = doc2 = None
try:
logger.info(f"开始合并文档: {doc1_path}{doc2_path}")
# 打开文档
doc1 = self.word_app.Documents.Open(str(doc1_path))
logger.info(f"已打开第一个文档: {doc1_path}")
doc2 = self.word_app.Documents.Open(str(doc2_path))
logger.info(f"已打开第二个文档: {doc2_path}")
# 插入第二个文档的内容
self._insert_document_content(doc1, doc2)
# 特别处理页眉格式
self._fix_header_borders(doc1)
# 保存合并后的文档
output_path = self._ensure_unique_filename(output_path)
doc1.SaveAs(str(output_path))
logger.info(f"已保存合并文档: {output_path}")
return True
except Exception as e:
logger.error(f"文档合并失败: {str(e)}", exc_info=True)
return False
finally:
self._cleanup(doc1, doc2)
def _insert_section_break(self, doc):
"""在文档末尾插入分节符"""
try:
end_range = doc.Content
end_range.Collapse(Direction=self.constants.wdCollapseEnd)
end_range.InsertBreak(Type=self.constants.wdSectionBreakNextPage)
logger.debug("已插入分节符")
except Exception as e:
logger.error(f"插入分节符失败: {str(e)}")
raise
def _insert_document_content(self, target_doc, source_doc):
"""插入文档内容"""
try:
source_range = source_doc.Content
target_range = target_doc.Content
target_range.Collapse(Direction=self.constants.wdCollapseEnd)
source_range.Copy()
target_range.Paste()
logger.info("已插入第二个文档内容")
# 合并页眉页脚
self._merge_headers_footers(target_doc, source_doc)
except Exception as e:
logger.error(f"插入文档内容失败: {str(e)}")
raise
def _merge_headers_footers(self, target_doc, source_doc):
"""合并页眉页脚"""
try:
last_section = target_doc.Sections(target_doc.Sections.Count)
src_section = source_doc.Sections(1)
# 合并页眉
for header_type in [self.constants.wdHeaderFooterPrimary,
self.constants.wdHeaderFooterFirstPage,
self.constants.wdHeaderFooterEvenPages]:
if src_section.Headers(header_type).Exists:
src_section.Headers(header_type).Range.Copy()
last_section.Headers(header_type).Range.Paste()
# 合并页脚
for footer_type in [self.constants.wdHeaderFooterPrimary,
self.constants.wdHeaderFooterFirstPage,
self.constants.wdHeaderFooterEvenPages]:
if src_section.Footers(footer_type).Exists:
src_section.Footers(footer_type).Range.Copy()
last_section.Footers(footer_type).Range.Paste()
logger.info("已合并页眉页脚")
except Exception as e:
logger.error(f"合并页眉页脚失败: {str(e)}")
raise
def _fix_header_borders(self, doc):
"""确保所有页眉都有底部边框线"""
try:
for section in doc.Sections:
for header_type in [self.constants.wdHeaderFooterPrimary,
self.constants.wdHeaderFooterFirstPage]:
if section.Headers(header_type).Exists:
header = section.Headers(header_type).Range
if header.Paragraphs.Count > 0:
border = header.Paragraphs(1).Borders(
self.constants.wdBorderBottom)
border.LineStyle = self.constants.wdLineStyleSingle
border.LineWidth = 4
logger.info("已修复所有页眉边框线")
except Exception as e:
logger.error(f"修复页眉边框失败: {str(e)}")
def _ensure_unique_filename(self, path):
"""确保文件名唯一"""
path = Path(path)
if not path.exists():
return path
counter = 1
while True:
new_path = path.parent / f"{path.stem}_{counter}{path.suffix}"
if not new_path.exists():
return new_path
counter += 1
def _cleanup(self, *docs):
"""清理资源"""
try:
for doc in docs:
if doc and hasattr(doc, 'Close'):
doc.Close(SaveChanges=False)
if hasattr(self, 'word_app') and self.word_app:
self.word_app.Quit()
pythoncom.CoUninitialize()
logger.info("已清理资源")
except Exception as e:
logger.error(f"清理资源失败: {str(e)}")
# if __name__ == "__main__":
# merger = WordDocumentMerger()
# # 修改为您的实际文件路径
# doc1_path = Path(r"C:\Users\VogelimKafig\Desktop\金风模板\jingfeng_fengmian1.docx")
# doc2_path = Path(r"C:\Users\VogelimKafig\Desktop\金风模板\quexian.docx")
# output_path = Path(r"C:\Users\VogelimKafig\Desktop\金风模板\merged_document.docx")
# if merger.merge_documents(doc1_path, doc2_path, output_path):
# print(f"合并成功! 输出文件: {output_path}")
# else:
# print("合并失败,请查看日志文件。")