上传文件至 /

2025-07-25 14:32:40 +08:00 · 2025-07-25 14:32:40 +08:00 · 75baeea511
parent 57bdaca9b8
commit 75baeea511
4 changed files with 167 additions and 0 deletions
--- a/init.pyc
+++ b/init.pyc
--- a/document_utils.py
+++ b/document_utils.py
@ -0,0 +1,167 @@
+"""
+Document utility functions for Word Document Server.
+"""
+import json
+from typing import Dict, List, Any
+from docx import Document
+
+
+def get_document_properties(doc_path: str) -> Dict[str, Any]:
+    """Get properties of a Word document."""
+    import os
+    if not os.path.exists(doc_path):
+        return {"error": f"Document {doc_path} does not exist"}
+    
+    try:
+        doc = Document(doc_path)
+        core_props = doc.core_properties
+        
+        return {
+            "title": core_props.title or "",
+            "author": core_props.author or "",
+            "subject": core_props.subject or "",
+            "keywords": core_props.keywords or "",
+            "created": str(core_props.created) if core_props.created else "",
+            "modified": str(core_props.modified) if core_props.modified else "",
+            "last_modified_by": core_props.last_modified_by or "",
+            "revision": core_props.revision or 0,
+            "page_count": len(doc.sections),
+            "word_count": sum(len(paragraph.text.split()) for paragraph in doc.paragraphs),
+            "paragraph_count": len(doc.paragraphs),
+            "table_count": len(doc.tables)
+        }
+    except Exception as e:
+        return {"error": f"Failed to get document properties: {str(e)}"}
+
+
+def extract_document_text(doc_path: str) -> str:
+    """Extract all text from a Word document."""
+    import os
+    if not os.path.exists(doc_path):
+        return f"Document {doc_path} does not exist"
+    
+    try:
+        doc = Document(doc_path)
+        text = []
+        
+        for paragraph in doc.paragraphs:
+            text.append(paragraph.text)
+            
+        for table in doc.tables:
+            for row in table.rows:
+                for cell in row.cells:
+                    for paragraph in cell.paragraphs:
+                        text.append(paragraph.text)
+        
+        return "\n".join(text)
+    except Exception as e:
+        return f"Failed to extract text: {str(e)}"
+
+
+def get_document_structure(doc_path: str) -> Dict[str, Any]:
+    """Get the structure of a Word document."""
+    import os
+    if not os.path.exists(doc_path):
+        return {"error": f"Document {doc_path} does not exist"}
+    
+    try:
+        doc = Document(doc_path)
+        structure = {
+            "paragraphs": [],
+            "tables": []
+        }
+        
+        # Get paragraphs
+        for i, para in enumerate(doc.paragraphs):
+            structure["paragraphs"].append({
+                "index": i,
+                "text": para.text[:100] + ("..." if len(para.text) > 100 else ""),
+                "style": para.style.name if para.style else "Normal"
+            })
+        
+        # Get tables
+        for i, table in enumerate(doc.tables):
+            table_data = {
+                "index": i,
+                "rows": len(table.rows),
+                "columns": len(table.columns),
+                "preview": []
+            }
+            
+            # Get sample of table data
+            max_rows = min(3, len(table.rows))
+            for row_idx in range(max_rows):
+                row_data = []
+                max_cols = min(3, len(table.columns))
+                for col_idx in range(max_cols):
+                    try:
+                        cell_text = table.cell(row_idx, col_idx).text
+                        row_data.append(cell_text[:20] + ("..." if len(cell_text) > 20 else ""))
+                    except IndexError:
+                        row_data.append("N/A")
+                table_data["preview"].append(row_data)
+            
+            structure["tables"].append(table_data)
+        
+        return structure
+    except Exception as e:
+        return {"error": f"Failed to get document structure: {str(e)}"}
+
+
+def find_paragraph_by_text(doc, text, partial_match=False):
+    """
+    Find paragraphs containing specific text.
+    
+    Args:
+        doc: Document object
+        text: Text to search for
+        partial_match: If True, matches paragraphs containing the text; if False, matches exact text
+        
+    Returns:
+        List of paragraph indices that match the criteria
+    """
+    matching_paragraphs = []
+    
+    for i, para in enumerate(doc.paragraphs):
+        if partial_match and text in para.text:
+            matching_paragraphs.append(i)
+        elif not partial_match and para.text == text:
+            matching_paragraphs.append(i)
+            
+    return matching_paragraphs
+
+
+def find_and_replace_text(doc, old_text, new_text):
+    """
+    Find and replace text throughout the document.
+    
+    Args:
+        doc: Document object
+        old_text: Text to find
+        new_text: Text to replace with
+        
+    Returns:
+        Number of replacements made
+    """
+    count = 0
+    
+    # Search in paragraphs
+    for para in doc.paragraphs:
+        if old_text in para.text:
+            for run in para.runs:
+                if old_text in run.text:
+                    run.text = run.text.replace(old_text, new_text)
+                    count += 1
+    
+    # Search in tables
+    for table in doc.tables:
+        for row in table.rows:
+            for cell in row.cells:
+                for para in cell.paragraphs:
+                    if old_text in para.text:
+                        for run in para.runs:
+                            if old_text in run.text:
+                                run.text = run.text.replace(old_text, new_text)
+                                count += 1
+    
+    return count
--- a/document_utils.pyc
+++ b/document_utils.pyc
--- a/extended_document_utils.pyc
+++ b/extended_document_utils.pyc