""" Document creation and manipulation tools for Word Document Server. """ import os import json, re from typing import Dict, List, Optional, Any from docx import Document from utils.file_utils import check_file_writeable, ensure_docx_extension, create_document_copy from utils.document_utils import get_document_properties, extract_document_text, get_document_structure from core.styles import ensure_heading_style, ensure_table_style from docx.oxml.shared import qn from docx.oxml import OxmlElement from tools.content_tools import search_and_replace,add_picture_to_table async def create_document(filename: str, title: Optional[str] = None, author: Optional[str] = None) -> str: """创建一个包含可选元数据的新Word文档。 参数: filename: 要创建的文档名称(带或不带.docx扩展名) title: 可选标题 author: 可选作者 """ filename = ensure_docx_extension(filename) # Check if file is writeable is_writeable, error_message = check_file_writeable(filename) if not is_writeable: return f"Cannot create document: {error_message}" try: doc = Document() # Set properties if provided if title: doc.core_properties.title = title if author: doc.core_properties.author = author # Ensure necessary styles exist ensure_heading_style(doc) ensure_table_style(doc) # 更改纸张大小为A4 from docx.shared import Mm, Inches sections = doc.sections for section in sections: section.page_height = Mm(297) section.page_width = Mm(210) section.left_margin = Inches(0.94) section.right_margin = Inches(0.94) # Save the document doc.save(filename) return f"Document {filename} created successfully" except Exception as e: return f"Failed to create document: {str(e)}" async def get_document_info(filename: str) -> str: """获得文档信息 Args: filename: 目标文档 """ filename = ensure_docx_extension(filename) if not os.path.exists(filename): return f"Document {filename} does not exist" try: properties = get_document_properties(filename) return json.dumps(properties, indent=2) except Exception as e: return f"Failed to get document info: {str(e)}" async def get_document_text(filename: str) -> str: """获得文档的所有文本 Args: filename: 目标文档 """ filename = ensure_docx_extension(filename) return extract_document_text(filename) async def get_document_outline(filename: str) -> str: """获得文档的所有结构信息 Args: filename: 目标文档 """ filename = ensure_docx_extension(filename) structure = get_document_structure(filename) return json.dumps(structure, indent=2) async def list_available_documents(directory: str = ".") -> str: """列出目录下所有Word文档 Args: directory: 目录 """ try: if not os.path.exists(directory): return f"Directory {directory} does not exist" docx_files = [f for f in os.listdir(directory) if f.endswith('.docx')] if not docx_files: return f"No Word documents found in {directory}" result = f"Found {len(docx_files)} Word documents in {directory}:\n" for file in docx_files: file_path = os.path.join(directory, file) size = os.path.getsize(file_path) / 1024 # KB result += f"- {file} ({size:.2f} KB)\n" return result except Exception as e: return f"Failed to list documents: {str(e)}" async def copy_document(source_filename: str, destination_filename: Optional[str] = None) -> str: """创建文档的副本 Args: source_filename: 源文档路径 destination_filename: 目标文档路径,为空则为当前目录 """ source_filename = ensure_docx_extension(source_filename) if destination_filename: destination_filename = ensure_docx_extension(destination_filename) success, message, new_path = create_document_copy(source_filename, destination_filename) if success: return message else: return f"Failed to copy document: {message}" def add_documents(target_filename: str, source_filename: str) -> str: """将源文档(文本)添加到目标文档尾部 Args: target_doc: 目标文档 source_filename: 源文档路径 """ target_doc = Document(target_filename) source_filename = ensure_docx_extension(source_filename) source_doc = Document(source_filename) for source_paragraph in source_doc.paragraphs: new_paragraph = target_doc.add_paragraph(source_paragraph.text) new_paragraph.style = target_doc.styles['Normal'] # Default style #获取合并等样式2025427 new_paragraph.alignment = source_paragraph.alignment print(f"Source paragraph alignment: {source_paragraph.alignment}") # Try to match the style if possible try: if source_paragraph.style and source_paragraph.style.name in target_doc.styles: new_paragraph.style = target_doc.styles[source_paragraph.style.name] except Exception as e: print(f"Failed to apply style: {e}") # Copy run formatting for i, run in enumerate(source_paragraph.runs): if i < len(new_paragraph.runs): new_run = new_paragraph.runs[i] # Copy basic formatting new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline #添加同时合并字体2025427 new_run.font.name = run.font.name rPr = new_run.element.get_or_add_rPr() rFonts = rPr.get_or_add_rFonts() # 检查 run.font.name 是否为 None if run.font.name is None: # 设置默认的中文字体名称 run.font.name = '宋体 (中文正文)' # 或者使用其他你喜欢的中文字体 rFonts.set(qn('w:eastAsia'), run.font.name) new_run.font.color.rgb = run.font.color.rgb # Font size if specified if run.font.size: new_run.font.size = run.font.size target_doc.save(target_filename) return f"{target_filename}添加{source_filename}成功" def write_table(target_filename: str, rows: int, cols: int, table_num: int, data: Optional[List[List[str]]] = None, ifadjustheight: Optional[bool] = True, height: Optional[float] = 1, key_words: re.Pattern[str] = None, ALIGMENT: Optional[str] = 'CENTER') -> Document: """填写word文档里的表格,返回填写后的文档 Args: target_filename: 目标文档路径 rows: 表格行数 cols: 表格列数 table_num: 表格序号 data: 表格数据,二维列表,每个单元格为字符串 ifadjustheight: bool,为真则表格行高自动调整 """ target_filename = ensure_docx_extension(target_filename) # Check if target file is writeable is_writeable, error_message = check_file_writeable(target_filename) if not is_writeable: return f"Cannot create target document: {error_message}" try: target_filename = ensure_docx_extension(target_filename) target_doc = Document(target_filename) except Exception as e: print(f"获取{target_filename}失败:{str(e)}") # Try to set the table style try: target_doc.tables[table_num].style = 'Table Grid' except KeyError as k: pass except Exception as e: print(f"{target_doc}最后一个表格更改样式失败: {str(e)}") print("开始写入表格") from docx.enum.table import WD_TABLE_ALIGNMENT from docx.enum.table import WD_ALIGN_VERTICAL from docx.shared import Pt, Inches, Cm, RGBColor try: if data: for i, row_data in enumerate(data): if i >= rows + 1: break for j, cell_text in enumerate(row_data): if j >= cols + 1: break if str(cell_text) == "": continue print(f"在[{i},{j}]处写入{str(cell_text)}") target_doc.tables[table_num].cell(i,j).text = str(cell_text) print(key_words, cell_text) if key_words and key_words.search(str(cell_text)): print(f'{cell_text}包含关键之,已置红') target_doc.tables[table_num].cell(i,j).paragraphs[0].runs[0].font.color.rgb = RGBColor(255, 0, 0) target_doc.tables[table_num].cell(i,j).paragraphs[0].runs[0].font.name = "Times New Roman" #设置英文字体 target_doc.tables[table_num].cell(i,j).paragraphs[0].runs[0].font.size = Pt(10.5) # 字体大小 target_doc.tables[table_num].cell(i,j).paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋') #设置中文字体 if ALIGMENT == 'CENTER': target_doc.tables[table_num].cell(i,j).paragraphs[0].paragraph_format.alignment = WD_TABLE_ALIGNMENT.CENTER elif ALIGMENT == 'LEFT': target_doc.tables[table_num].cell(i,j).paragraphs[0].paragraph_format.alignment = WD_TABLE_ALIGNMENT.LEFT target_doc.tables[table_num].cell(i,j).vertical_alignment = WD_ALIGN_VERTICAL.CENTER if ifadjustheight: target_doc.tables[table_num].rows[i].height = Cm(height) except Exception as e: print(f"写入{target_filename}tables.cell({i},{j})失败:{str(e)}") print("表格写入完成") return target_doc def set_document_para(target_doc: Document) -> Document: """设置文档的段落格式 """ paragraphs_to_remove = [] for i, paragraph in enumerate(target_doc.paragraphs): if i <= 11: continue if not paragraph.text.strip(): paragraphs_to_remove.append(paragraph) for paragraph in paragraphs_to_remove: p = paragraph._element p.getparent().remove(p) return target_doc async def add_table_to_document(target_filename: str, source_filename: str, rows: int, cols: int, table_num: int, data: Optional[List[List[str]]] = None, ifadjustheight: Optional[bool] = True, height: Optional[float] = 1, key_words: re.Pattern[str] = None, ALIGMENT: Optional[str] = 'CENTER') -> str: """复制源文件中的文字与表格(先文字后表格格式)到目标文档 Args: target_filename: 目标文档路径 source_doc: 源文档路径 rows: 表格行数 cols: 表格列数 table_num: 表格序号 data: 表格数据,二维列表,每个单元格为字符串 ifadjustheight: bool,为真则表格行高自动调整 key_words: list, 关键字 """ target_filename = ensure_docx_extension(target_filename) source_filename = ensure_docx_extension(source_filename) source_doc = Document(source_filename) target_doc = Document(target_filename) try: # Copy all paragraphs for paragraph in source_doc.paragraphs: # Create a new paragraph with the same text and style new_paragraph = target_doc.add_paragraph(paragraph.text) new_paragraph.style = target_doc.styles['Normal'] # Default style #获取合并等样式2025427 new_paragraph.alignment = paragraph.alignment # 复制段落分页属性 new_paragraph.paragraph_format.page_break_before = paragraph.paragraph_format.page_break_before # Try to match the style if possible try: if paragraph.style and paragraph.style.name in target_doc.styles: new_paragraph.style = target_doc.styles[paragraph.style.name] except: pass # Copy run formatting for i, run in enumerate(paragraph.runs): if i < len(new_paragraph.runs): new_run = new_paragraph.runs[i] # Copy basic formatting new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline #添加同时合并字体2025427 new_run.font.name = run.font.name rPr = new_run.element.get_or_add_rPr() rFonts = rPr.get_or_add_rFonts() # 检查 run.font.name 是否为 None if run.font.name is None: # 设置默认的中文字体名称 run.font.name = '宋体(中文正文)' # 或者使用其他你喜欢的中文字体 rFonts.set(qn('w:eastAsia'), run.font.name) new_run.font.color.rgb = run.font.color.rgb # Font size if specified if run.font.size: new_run.font.size = run.font.size # 复制分页符(处理w:br标签) for element in run._element: if element.tag.endswith('br'): br_type = element.get(qn('type'), '') if br_type == 'page': new_br = OxmlElement('w:br') new_br.set(qn('type'), 'page') new_run._element.append(new_br) except Exception as e: print(f"添加表格前文章失败:{str(e)}") try:# Copy all tables from core.tables import copy_table copy_table(source_doc.tables[0], target_doc, ifadjustheight, height) except Exception as e: print(f"添加表格失败:{str(e)}") print(f"{target_doc}写入表格{source_doc.tables[0]}成功") target_doc = set_document_para(target_doc) target_doc.save(target_filename) target_doc = Document(target_filename) try: target_doc = write_table(target_filename, rows, cols, table_num, data, ifadjustheight, height, key_words, ALIGMENT) except Exception as e: print(f"{target_filename}写入{data}失败:{str(e)}") target_doc.save(target_filename) return target_doc,f"{target_filename}添加表格{source_doc}成功" async def add_table_and_replace(target_filename: str, source_filename: str, ifadjustheight: Optional[bool] = True, list_to_replace: dict = {}, height: Optional[float] = 1): """复制源文件中的文字与表格(先文字后表格格式)到目标文档 Args: target_filename: 目标文档路径 source_doc: 源文档路径 ifadjustheight: bool,为真则表格行高自动调整 list_to_replace: dict, 待替换内容和替换内容 """ target_filename = ensure_docx_extension(target_filename) source_filename = ensure_docx_extension(source_filename) source_doc = Document(source_filename) target_doc = Document(target_filename) try: # Copy all paragraphs for paragraph in source_doc.paragraphs: # Create a new paragraph with the same text and style new_paragraph = target_doc.add_paragraph(paragraph.text) new_paragraph.style = target_doc.styles['Normal'] # Default style #获取合并等样式2025427 new_paragraph.alignment = paragraph.alignment # 复制段落分页属性 new_paragraph.paragraph_format.page_break_before = paragraph.paragraph_format.page_break_before # Try to match the style if possible try: if paragraph.style and paragraph.style.name in target_doc.styles: new_paragraph.style = target_doc.styles[paragraph.style.name] except: pass # Copy run formatting for i, run in enumerate(paragraph.runs): if i < len(new_paragraph.runs): new_run = new_paragraph.runs[i] # Copy basic formatting new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline #添加同时合并字体2025427 new_run.font.name = run.font.name rPr = new_run.element.get_or_add_rPr() rFonts = rPr.get_or_add_rFonts() # 检查 run.font.name 是否为 None if run.font.name is None: # 设置默认的中文字体名称 run.font.name = '宋体(中文正文)' # 或者使用其他你喜欢的中文字体 rFonts.set(qn('w:eastAsia'), run.font.name) new_run.font.color.rgb = run.font.color.rgb # Font size if specified if run.font.size: new_run.font.size = run.font.size # 复制分页符(处理w:br标签) for element in run._element: if element.tag.endswith('br'): br_type = element.get(qn('type'), '') if br_type == 'page': new_br = OxmlElement('w:br') new_br.set(qn('type'), 'page') new_run._element.append(new_br) except Exception as e: print(f"添加表格前文章失败:{str(e)}") try:# Copy all tables from core.tables import copy_table copy_table(source_doc.tables[0], target_doc, ifadjustheight, height) target_doc.save(target_filename) except Exception as e: print(f"添加表格失败:{str(e)}") for find_text, replace_text in list_to_replace.items(): print(await search_and_replace(target_filename, find_text, replace_text)) async def merge_documents(target_filename: str, source_filenames: List[str], add_page_breaks: bool = True) -> str: """合并文档(文本) 表格会添加到最后 Args: target_filename: 合并后文档路径 source_filenames: 源文档路径(列表) add_page_breaks: bool,为真则每个源文档中间加入分页符 """ from core.tables import copy_table target_filename = ensure_docx_extension(target_filename) # Check if target file is writeable is_writeable, error_message = check_file_writeable(target_filename) if not is_writeable: return f"Cannot create target document: {error_message}" # Validate all source documents exist missing_files = [] for filename in source_filenames: doc_filename = ensure_docx_extension(filename) if not os.path.exists(doc_filename): missing_files.append(doc_filename) if missing_files: return f"Cannot merge documents. The following source files do not exist: {', '.join(missing_files)}" try: # Create a new document for the merged result target_doc = Document() # Process each source document for i, filename in enumerate(source_filenames): doc_filename = ensure_docx_extension(filename) source_doc = Document(doc_filename) # Add page break between documents (except before the first one) if add_page_breaks and i > 0: target_doc.add_page_break() # Copy all paragraphs for paragraph in source_doc.paragraphs: # Create a new paragraph with the same text and style new_paragraph = target_doc.add_paragraph(paragraph.text) new_paragraph.style = target_doc.styles['Normal'] # Default style #获取合并等样式2025427 new_paragraph.alignment = paragraph.alignment # Try to match the style if possible try: if paragraph.style and paragraph.style.name in target_doc.styles: new_paragraph.style = target_doc.styles[paragraph.style.name] except: pass # Copy run formatting for i, run in enumerate(paragraph.runs): if i < len(new_paragraph.runs): new_run = new_paragraph.runs[i] # Copy basic formatting new_run.bold = run.bold new_run.italic = run.italic new_run.underline = run.underline #添加同时合并字体2025427 new_run.font.name = run.font.name rPr = new_run.element.get_or_add_rPr() rFonts = rPr.get_or_add_rFonts() # 检查 run.font.name 是否为 None if run.font.name is None: # 设置默认的中文字体名称 run.font.name = '宋体(中文正文)' # 或者使用其他你喜欢的中文字体 rFonts.set(qn('w:eastAsia'), run.font.name) new_run.font.color.rgb = run.font.color.rgb # Font size if specified if run.font.size: new_run.font.size = run.font.size # Copy all tables for table in source_doc.tables: copy_table(table, target_doc) # Save the merged document target_doc.save(target_filename) return f"Successfully merged {len(source_filenames)} documents into {target_filename}" except Exception as e: return f"Failed to merge documents: {str(e)}" async def right_align_last_three_para(target_filename: str) -> str: """右对齐最后三个段落 Args: target_filename: 目标文档路径 """ target_filename = ensure_docx_extension(target_filename) # Check if target file is writeable is_writeable, error_message = check_file_writeable(target_filename) if not is_writeable: return f"Cannot right align paragraphs: {error_message}" try: # Open the target document target_doc = Document(target_filename) # Get the last three paragraphs paragraphs = target_doc.paragraphs[-3:] # Set the alignment of each paragraph to right from docx.enum.text import WD_ALIGN_PARAGRAPH for paragraph in paragraphs: paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT # Save the modified document target_doc.save(target_filename) return f"Successfully right aligned the last three paragraphs in {target_filename}" except Exception as e: return f"Failed to right align paragraphs: {str(e)}" async def process_images_table(data_dict, output_dir, start_i, JIANCHA_NEIRONG_PICTURES_TABLE, key_words = None): """添加对应表格且填写图片名与插入图片 Args: data_dict (dict): dict内容,图片:图片路径 output_dir (str): 输出路径 start_i (int): 总表格数量 JIANCHA_NEIRONG_PICTURES_TABLE (str): 二维表模板路径 Returns: int: 最后使用的表格序号 """ items = list(data_dict.items()) picture_num = len(items) line_index = 0 picture_index = 0 i = start_i for content_row in range(((picture_num + 2) // 3) * 2): if content_row % 2 == 1: # 文字行(从 items 取图片名) JIANCHA_NEIRONG_TEXT = [["" for _ in range(3)] for _ in range(1)] # 1行3列 for k in range(1): # 只有1行 for l in range(3): if line_index >= picture_num: break JIANCHA_NEIRONG_TEXT[k][l] = items[line_index][0] # 图片名 print(f'当前为文字表格,在({k},{l})位置插入文字: {items[line_index][0]}') line_index += 1 print(f"当前待插入表格: {JIANCHA_NEIRONG_TEXT}") print(f"当前表格序号为 {i}") output_doc, message = await add_table_to_document( output_dir, JIANCHA_NEIRONG_PICTURES_TABLE, 1, 3, i, JIANCHA_NEIRONG_TEXT, False, None, key_words ) i += 1 else: # 图片行(从 items 取图片路径) print(f"当前表格序号为 {i}") output_doc, message = await add_table_to_document( output_dir, JIANCHA_NEIRONG_PICTURES_TABLE, 1, 3, i, None, False ) for k in range(3): if picture_index < picture_num: pic_path = items[picture_index][1] # 图片路径 print(f"当前为图片表格,在(0,{k})位置插入图片: {pic_path}") print(await add_picture_to_table(output_doc, output_dir, 0, k, pic_path, i, 1.8898)) picture_index += 1 i += 1 print(message) return i # 返回最后使用的表格序号