Report_Generate_Server/tools/content_tools.py

"""
Content tools for Word Document Server.

These tools add various types of content to Word documents,
including headings, paragraphs, tables, images, and page breaks.
"""
import os
from typing import List, Optional, Dict, Any
from docx import Document
from docx.shared import Inches, Pt
from docx.oxml.shared import qn


from utils.file_utils import check_file_writeable, ensure_docx_extension
from utils.document_utils import find_and_replace_text
from core.styles import ensure_heading_style, ensure_table_style

def split_table_by_row_content(
    doc_path: str,
    output_path: str,
    table_num: int = 0
) -> str:
    """
    根据表格第二行第一列内容的行数对指定表格进行分行处理，
    并将每列内容按相同行数分割，不足则重复
    
    参数:
        doc_path: 输入Word文档路径
        output_path: 输出Word文档路径
        table_num: 要处理的表格序号(从0开始)
    """
    try:
        from docx import Document
        from docx.shared import Pt
        from docx.oxml.shared import qn
        from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
        
        # 打开文档
        doc = Document(doc_path)
        
        # 检查表格是否存在
        if len(doc.tables) <= table_num:
            return f"文档中不存在第{table_num+1}个表格"
        
        # 获取指定表格
        table = doc.tables[table_num]
        
        # 获取表格行数和列数
        row_count = len(table.rows)
        col_count = len(table.columns)
        
        # 如果表格行数小于2，无法处理
        if row_count < 2:
            doc.save(output_path)
            return "表格行数少于2行，无法按照要求分行"
        
        # 获取第二行第一列的文本内容
        second_row_first_cell = table.cell(1, 0)
        second_row_text = second_row_first_cell.text
        
        # 计算第二行第一列文本的行数（按换行符分割）
        lines_in_second_row = len(second_row_text.split('\n'))
        
        # 如果行数为0，设置为1（至少分为1部分）
        split_count = max(1, lines_in_second_row)
        
        print(f'原表格行数：{row_count}，第二行第一列内容行数：{split_count}，需要分割为：{split_count}部分')
        
        # 创建新表格来替代原表格（分割后的表格）
        # 新表格的行数 = 标题行(1) + 原数据行数 × 分割部分数
        new_table = doc.add_table(rows=1 + (row_count-1)*split_count, cols=col_count)
        
        # 设置表格样式
        new_table.style = table.style
        new_table.autofit = True
        
        # 1. 处理标题行（第一行）保持不变
        for col_idx in range(col_count):
            orig_cell = table.cell(0, col_idx)
            new_cell = new_table.cell(0, col_idx)
            
            # 复制内容并设置格式
            new_cell.text = orig_cell.text
            if orig_cell.paragraphs:
                # 设置格式
                new_cell.paragraphs[0].runs[0].font.name = "Times New Roman"
                new_cell.paragraphs[0].runs[0].font.size = Pt(10.5)
                new_cell.paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
                new_cell.paragraphs[0].paragraph_format.alignment = WD_TABLE_ALIGNMENT.CENTER
                new_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            new_cell.width = orig_cell.width
        
        # 2. 处理数据行（从第二行开始）
        for orig_row_idx in range(1, row_count):  # 遍历原表格的每一行数据
            for col_idx in range(col_count):     # 遍历每一列
                orig_cell = table.cell(orig_row_idx, col_idx)
                cell_text = orig_cell.text
                
                # 分割当前单元格内容
                cell_lines = cell_text.split('\n')
                cell_line_count = len(cell_lines)
                
                # 如果内容行数不足分割数，则重复最后一行
                if cell_line_count < split_count:
                    cell_lines += [cell_lines[-1]] * (split_count - cell_line_count)
                
                # 在新表格中对应的位置写入分割后的内容
                for part_idx in range(split_count):
                    # 计算新表格中的行位置
                    new_row_idx = 1 + (orig_row_idx-1)*split_count + part_idx
                    
                    # 获取新单元格
                    new_cell = new_table.cell(new_row_idx, col_idx)
                    
                    # 写入分割后的内容
                    line_text = cell_lines[part_idx] if part_idx < len(cell_lines) else cell_lines[-1]
                    new_cell.text = line_text
                    
                    # 设置格式
                    if new_cell.paragraphs:
                        new_cell.paragraphs[0].runs[0].font.name = "Times New Roman"
                        new_cell.paragraphs[0].runs[0].font.size = Pt(10.5)
                        new_cell.paragraphs[0].runs[0]._element.rPr.rFonts.set(qn('w:eastAsia'), '仿宋')
                        new_cell.paragraphs[0].paragraph_format.alignment = WD_TABLE_ALIGNMENT.CENTER
                        new_cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
                    
                    # 复制单元格宽度
                    new_cell.width = orig_cell.width
        
        # 删除原表格
        table._element.getparent().remove(table._element)
        
        # 保存文档
        doc.save(output_path)
        return f"第{table_num+1}个表格已成功分行处理"
        
    except Exception as e:
        return f"处理表格时出错: {str(e)}"


async def add_heading(filename: str, text: str, level: int = 1) -> str:
    """对文档增加标题
    
    Args:
        filename: 目标文档路径
        text: 标题文本
        level: 标题级别，1为最高级
    """
    filename = ensure_docx_extension(filename)
    
    # Ensure level is converted to integer
    try:
        level = int(level)
    except (ValueError, TypeError):
        return "Invalid parameter: level must be an integer between 1 and 9"
    
    # Validate level range
    if level < 1 or level > 9:
        return f"Invalid heading level: {level}. Level must be between 1 and 9."
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        # Suggest creating a copy
        return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document."
    
    try:
        doc = Document(filename)
        
        # Ensure heading styles exist
        ensure_heading_style(doc)
        
        # Try to add heading with style
        try:
            heading = doc.add_heading(text, level=level)
            doc.save(filename)
            return f"Heading '{text}' (level {level}) added to {filename}"
        except Exception as style_error:
            # If style-based approach fails, use direct formatting
            paragraph = doc.add_paragraph(text)
            paragraph.style = doc.styles['Normal']
            run = paragraph.runs[0]
            run.bold = True
            rPr = run.element.get_or_add_rPr()
            rFonts = rPr.get_or_add_rFonts()
            from docx.oxml.shared import qn
            rFonts.set(qn('w:eastAsia'), '宋体(中文正文)')
            # Adjust size based on heading level
            if level == 1:
                run.font.size = Pt(12)
            elif level == 2:
                run.font.size = Pt(14)
            else:
                run.font.size = Pt(12)
            
            doc.save(filename)
            return f"Heading '{text}' added to {filename} with direct formatting (style not available)"
    except Exception as e:
        return f"Failed to add heading: {str(e)}" 

async def add_paragraph(filename: str, text: str, style: Optional[str] = None) -> str:
    """对文档添加一个段落（一行）
    
    Args:
        filename: 目标文档路径
        text: 段落内容
        style: 段落样式，可选
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        # Suggest creating a copy
        return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document."
    
    try:
        doc = Document(filename)
        paragraph = doc.add_paragraph(text)
        
        if style:
            try:
                paragraph.style = style
            except KeyError:
                # Style doesn't exist, use normal and report it
                paragraph.style = doc.styles['Normal']
                        # Copy run formatting
                # for i, run in enumerate(paragraph.runs):
                #     if i < len(paragraph.runs):
                #         new_run = paragraph.runs[i]
                #         # Copy basic formatting
                #         new_run.bold = run.bold
                #         new_run.italic = run.italic
                #         new_run.underline = run.underline
                #         #添加同时合并字体2025427
                #         new_run.font.name = run.font.name
                #         rPr = new_run.element.get_or_add_rPr()  
                #         rFonts = rPr.get_or_add_rFonts()
                #         # 检查 run.font.name 是否为 None
                #         if run.font.name is None:
                #             # 设置默认的中文字体名称
                #             run.font.name = '宋体 (中文正文)'  # 或者使用其他你喜欢的中文字体
                #         rFonts.set(qn('w:eastAsia'), run.font.name)
                #         new_run.font.color.rgb = run.font.color.rgb
                        
                #         # Font size if specified
                #         if run.font.size:
                #             new_run.font.size = run.font.size
                doc.save(filename)
                return f"Style '{style}' not found, paragraph added with default style to {filename}"
        
        doc.save(filename)
        return f"Paragraph added to {filename}"
    except Exception as e:
        return f"Failed to add paragraph: {str(e)}"


async def add_table(filename: str, rows: int, cols: int, data: Optional[List[List[str]]] = None) -> str:
    """对文档添加一个表格
    
    Args:
        filename: 目标文档路径
        rows: 表格行数
        cols: 表格列数
        data: 二维数组列表，每一项为单元格内容，默认为空
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        # Suggest creating a copy
        return f"Cannot modify document: {error_message}. Consider creating a copy first or creating a new document."
    
    try:
        doc = Document(filename)
        table = doc.add_table(rows=rows, cols=cols)
        
        # Try to set the table style
        try:
            table.style = 'Table Grid'
        except KeyError:
            # If style doesn't exist, add basic borders
            pass
        
        # Fill table with data if provided
        if data:
            for i, row_data in enumerate(data):
                if i >= rows:
                    break
                for j, cell_text in enumerate(row_data):
                    if j >= cols:
                        break
                    table.cell(i, j).text = str(cell_text)
        
        doc.save(filename)
        return f"Table ({rows}x{cols}) added to {filename}"
    except Exception as e:
        return f"Failed to add table: {str(e)}"

async def add_picture_to_table(target_doc: Document, target_filename: str, row: int, col: int, image_path: str,table_num: int = -1, width: Optional[float] = None) -> str:
    """向文档中对应表格添加图片
    
    Args:
        target_doc: 目标文档
        target_filename: 目标文档保存路径
        row: 表格行数
        col: 表格列数
        image_path: 图片路径
        table_num: 表格序号，默认为-1，即最后一个表格
        width: 图片宽度，默认为None，表示使用原始图片大小
    """
    from PIL import Image
    if not os.path.exists(image_path):
        return f"Image file not found: {image_path}"
    
    # Check image file size
    try:
        image_size = os.path.getsize(image_path) / 1024  # Size in KB
        if image_size <= 0:
            return f"Image file appears to be empty: {image_path} (0 KB)"
        elif image_size > 9126:
            # Create the output directory if it doesn't exist
            output_dir = os.path.join(os.path.dirname(image_path), "压缩图片")
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            
            # Define the output path for the compressed image
            image_name = os.path.basename(image_path)
            output_path = os.path.join(output_dir, image_name)
            
            # Compress the image
            while image_size > 9126:
                print(f"压缩图片：{image_path} ({image_size:.2f} KB) -> {output_path} (9126 KB)")
                with Image.open(image_path) as img:
                    img.save(output_path, optimize=True, quality=85)
                    image_size = os.path.getsize(output_path) / 1024  # Size in KB
            
            # Update the image path to the compressed image path
            image_path = output_path
    except Exception as size_error:
        return f"Error checking image file: {str(size_error)}"

    try:
        table = target_doc.tables[table_num]
        # Add the picture to the cell
        cell = table.cell(row, col)
        if len(cell.text) == 1: cell.text = ""
        paragraph = cell.paragraphs[-1]
        run = paragraph.add_run()
        try:
            if width:
                run.add_picture(image_path, width=Inches(width))
            else:
                run.add_picture(image_path)
        except Exception as e:
                    # 如果添加图片时出现问题，尝试将图片转换为PNG格式
            try:
                print(f"正常添加失败，尝试转换图片后添加：{image_path}")
                # 打开图片
                img = Image.open(image_path)
                # 转换为PNG格式
                temp_image_path = os.path.splitext(image_path)[0] + '.png'
                img.save(temp_image_path, 'PNG')
                
                # 尝试添加转换后的图片
                if width:
                    run.add_picture(temp_image_path, width=Inches(width))
                else:
                    run.add_picture(temp_image_path)
                
                # 添加完成后删除转换后的图片
                os.remove(temp_image_path)
            except Exception as e:
                # 如果转换或添加转换后的图片时出现问题，返回错误信息
                return f"调用add_picture函数出现问题: {str(e)}"
        from docx.enum.table import WD_TABLE_ALIGNMENT
        from docx.enum.table import WD_ALIGN_VERTICAL
        cell.paragraphs[0].paragraph_format.alignment = WD_TABLE_ALIGNMENT.CENTER
        cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
        
        target_doc.save(target_filename)
        return f"Picture {image_path} added to table {table_num} cell ({row},{col})"
    except Exception as e:
        return f"Failed to add picture to table: {str(e)}"

import requests
from io import BytesIO
from PIL import Image

async def add_picture(filename: str, image_path: str, width: Optional[float] = None, height: Optional[float] = None) -> str:
    """添加一个图片到文档中（支持本地路径或 URL）
    
    Args:
        filename: 文档路径
        image_path: 图片路径（本地路径或 URL）
        width: 图片大小（英寸）
    """
    filename = ensure_docx_extension(filename)
    
    # 检查文档是否存在
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    abs_filename = os.path.abspath(filename)
    is_url = image_path.startswith(("http://", "https://"))
    
    try:
        doc = Document(abs_filename)
        
        # 处理 URL 图片
        if is_url:
            try:
                response = requests.get(image_path, timeout=10)
                response.raise_for_status()  # 检查请求是否成功
                image_bytes = BytesIO(response.content)
                
                # 验证图片有效性（可选）
                Image.open(image_bytes).verify()
                image_bytes.seek(0)  # 重置指针
                
                # 添加到文档
                if width:
                    doc.add_picture(image_bytes, width=Inches(width), height=Inches(height))
                else:
                    doc.add_picture(image_bytes)
                
                doc.save(abs_filename)
                return f"Picture from URL {image_path} added to {filename}"
            except Exception as url_error:
                return f"Failed to download/add URL image: {str(url_error)}"
        
        # 处理本地图片
        else:
            abs_image_path = os.path.abspath(image_path)
            if not os.path.exists(abs_image_path):
                return f"Image file not found: {abs_image_path}"
            
            # 检查文件大小和可读性（原逻辑）
            try:
                image_size = os.path.getsize(abs_image_path) / 1024
                if image_size <= 0:
                    return f"Image file is empty: {abs_image_path}"
            except Exception as size_error:
                return f"Error checking image file: {str(size_error)}"
            
            # 添加到文档
            try:
                if width:
                    doc.add_picture(abs_image_path, width=Inches(width), height=Inches(height))
                else:
                    doc.add_picture(abs_image_path)
                doc.save(abs_filename)
                return f"Picture {image_path} added to {filename}"
            except Exception as inner_error:
                return f"Failed to add picture: {str(inner_error)}"
    
    except Exception as outer_error:
        return f"Document processing error: {str(outer_error)}"


async def add_page_break(filename: str) -> str:
    """增加分页符
    
    Args:
        filename: 目标文档
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        return f"Cannot modify document: {error_message}. Consider creating a copy first."
    
    try:
        doc = Document(filename)
        doc.add_page_break()
        doc.save(filename)
        return f"Page break added to {filename}."
    except Exception as e:
        return f"Failed to add page break: {str(e)}"


async def add_table_of_contents(filename: str, title: str = "Table of Contents", max_level: int = 3) -> str:
    """根据标题样式向Word文档添加目录。
    
    参数:
        filename: Word文档的路径
        title: 可自行选择的一个标题
        max_level: 要包含的最大标题级别（1-9）
    """

    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        return f"Cannot modify document: {error_message}. Consider creating a copy first."
    
    try:
        # Ensure max_level is within valid range
        max_level = max(1, min(max_level, 9))
        
        doc = Document(filename)
        
        # Collect headings and their positions
        headings = []
        for i, paragraph in enumerate(doc.paragraphs):
            # Check if paragraph style is a heading
            if paragraph.style and paragraph.style.name.startswith('Heading '):
                try:
                    # Extract heading level from style name
                    level = int(paragraph.style.name.split(' ')[1])
                    if level <= max_level:
                        headings.append({
                            'level': level,
                            'text': paragraph.text,
                            'position': i
                        })
                except (ValueError, IndexError):
                    # Skip if heading level can't be determined
                    pass
        
        if not headings:
            return f"No headings found in document {filename}. Table of contents not created."
        
        # Create a new document with the TOC
        toc_doc = Document()
        
        # Add title
        if title:
            toc_doc.add_heading(title, level=1)
        
        # Add TOC entries
        for heading in headings:
            # Indent based on level (using tab characters)
            indent = '    ' * (heading['level'] - 1)
            toc_doc.add_paragraph(f"{indent}{heading['text']}")
        
        # Add page break
        toc_doc.add_page_break()
        
        # Get content from original document
        for paragraph in doc.paragraphs:
            p = toc_doc.add_paragraph(paragraph.text)
            # Copy style if possible
            try:
                if paragraph.style:
                    p.style = paragraph.style.name
            except:
                pass
        
        # Copy tables
        for table in doc.tables:
            # Create a new table with the same dimensions
            new_table = toc_doc.add_table(rows=len(table.rows), cols=len(table.columns))
            # Copy cell contents
            for i, row in enumerate(table.rows):
                for j, cell in enumerate(row.cells):
                    for paragraph in cell.paragraphs:
                        new_table.cell(i, j).text = paragraph.text
        
        # Save the new document with TOC
        toc_doc.save(filename)
        
        return f"Table of contents with {len(headings)} entries added to {filename}"
    except Exception as e:
        return f"Failed to add table of contents: {str(e)}"


async def delete_paragraph(filename: str, paragraph_index: int) -> str:
    """通过行索引从文档中删除一段
    
    Args:
        filename: Path to the Word document
        paragraph_index: 段落位置（第几行）
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        return f"Cannot modify document: {error_message}. Consider creating a copy first."
    
    try:
        doc = Document(filename)
        
        # Validate paragraph index
        if paragraph_index < 0 or paragraph_index >= len(doc.paragraphs):
            return f"Invalid paragraph index. Document has {len(doc.paragraphs)} paragraphs (0-{len(doc.paragraphs)-1})."
        
        # Delete the paragraph (by removing its content and setting it empty)
        # Note: python-docx doesn't support true paragraph deletion, this is a workaround
        paragraph = doc.paragraphs[paragraph_index]
        p = paragraph._p
        p.getparent().remove(p)
        
        doc.save(filename)
        return f"Paragraph at index {paragraph_index} deleted successfully."
    except Exception as e:
        return f"Failed to delete paragraph: {str(e)}"


async def search_and_replace(filename: str, find_text: str, replace_text: str) -> str:
    """替换所有find_text为replace_text
    
    Args:
        filename: Path to the Word document
        find_text: Text to search for
        replace_text: Text to replace with
    """
    filename = ensure_docx_extension(filename)
    
    if not os.path.exists(filename):
        return f"Document {filename} does not exist"
    
    # Check if file is writeable
    is_writeable, error_message = check_file_writeable(filename)
    if not is_writeable:
        return f"Cannot modify document: {error_message}. Consider creating a copy first."
    
    try:
        doc = Document(filename)
        
        # Perform find and replace
        count = find_and_replace_text(doc, find_text, replace_text)
        
        if count > 0:
            doc.save(filename)
            return f"Replaced {count} occurrence(s) of '{find_text}' with '{replace_text}'."
        else:
            return f"No occurrences of '{find_text}' found."
    except Exception as e:
        return f"Failed to search and replace: {str(e)}"