diff --git a/__init__.pyc b/__init__.pyc new file mode 100644 index 0000000..be2a138 Binary files /dev/null and b/__init__.pyc differ diff --git a/document_utils.py b/document_utils.py new file mode 100644 index 0000000..d8a1e5b --- /dev/null +++ b/document_utils.py @@ -0,0 +1,167 @@ +""" +Document utility functions for Word Document Server. +""" +import json +from typing import Dict, List, Any +from docx import Document + + +def get_document_properties(doc_path: str) -> Dict[str, Any]: + """Get properties of a Word document.""" + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + try: + doc = Document(doc_path) + core_props = doc.core_properties + + return { + "title": core_props.title or "", + "author": core_props.author or "", + "subject": core_props.subject or "", + "keywords": core_props.keywords or "", + "created": str(core_props.created) if core_props.created else "", + "modified": str(core_props.modified) if core_props.modified else "", + "last_modified_by": core_props.last_modified_by or "", + "revision": core_props.revision or 0, + "page_count": len(doc.sections), + "word_count": sum(len(paragraph.text.split()) for paragraph in doc.paragraphs), + "paragraph_count": len(doc.paragraphs), + "table_count": len(doc.tables) + } + except Exception as e: + return {"error": f"Failed to get document properties: {str(e)}"} + + +def extract_document_text(doc_path: str) -> str: + """Extract all text from a Word document.""" + import os + if not os.path.exists(doc_path): + return f"Document {doc_path} does not exist" + + try: + doc = Document(doc_path) + text = [] + + for paragraph in doc.paragraphs: + text.append(paragraph.text) + + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + text.append(paragraph.text) + + return "\n".join(text) + except Exception as e: + return f"Failed to extract text: {str(e)}" + + +def get_document_structure(doc_path: str) -> Dict[str, Any]: + """Get the structure of a Word document.""" + import os + if not os.path.exists(doc_path): + return {"error": f"Document {doc_path} does not exist"} + + try: + doc = Document(doc_path) + structure = { + "paragraphs": [], + "tables": [] + } + + # Get paragraphs + for i, para in enumerate(doc.paragraphs): + structure["paragraphs"].append({ + "index": i, + "text": para.text[:100] + ("..." if len(para.text) > 100 else ""), + "style": para.style.name if para.style else "Normal" + }) + + # Get tables + for i, table in enumerate(doc.tables): + table_data = { + "index": i, + "rows": len(table.rows), + "columns": len(table.columns), + "preview": [] + } + + # Get sample of table data + max_rows = min(3, len(table.rows)) + for row_idx in range(max_rows): + row_data = [] + max_cols = min(3, len(table.columns)) + for col_idx in range(max_cols): + try: + cell_text = table.cell(row_idx, col_idx).text + row_data.append(cell_text[:20] + ("..." if len(cell_text) > 20 else "")) + except IndexError: + row_data.append("N/A") + table_data["preview"].append(row_data) + + structure["tables"].append(table_data) + + return structure + except Exception as e: + return {"error": f"Failed to get document structure: {str(e)}"} + + +def find_paragraph_by_text(doc, text, partial_match=False): + """ + Find paragraphs containing specific text. + + Args: + doc: Document object + text: Text to search for + partial_match: If True, matches paragraphs containing the text; if False, matches exact text + + Returns: + List of paragraph indices that match the criteria + """ + matching_paragraphs = [] + + for i, para in enumerate(doc.paragraphs): + if partial_match and text in para.text: + matching_paragraphs.append(i) + elif not partial_match and para.text == text: + matching_paragraphs.append(i) + + return matching_paragraphs + + +def find_and_replace_text(doc, old_text, new_text): + """ + Find and replace text throughout the document. + + Args: + doc: Document object + old_text: Text to find + new_text: Text to replace with + + Returns: + Number of replacements made + """ + count = 0 + + # Search in paragraphs + for para in doc.paragraphs: + if old_text in para.text: + for run in para.runs: + if old_text in run.text: + run.text = run.text.replace(old_text, new_text) + count += 1 + + # Search in tables + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + for para in cell.paragraphs: + if old_text in para.text: + for run in para.runs: + if old_text in run.text: + run.text = run.text.replace(old_text, new_text) + count += 1 + + return count diff --git a/document_utils.pyc b/document_utils.pyc new file mode 100644 index 0000000..2a92428 Binary files /dev/null and b/document_utils.pyc differ diff --git a/extended_document_utils.pyc b/extended_document_utils.pyc new file mode 100644 index 0000000..9c728e2 Binary files /dev/null and b/extended_document_utils.pyc differ