"""Extended PDF tools — Crop, Flatten, Repair, Metadata Editor."""
import os
import io
import logging

logger = logging.getLogger(__name__)


class PDFExtraError(Exception):
    """Custom exception for extended PDF tool failures."""
    pass


# ---------------------------------------------------------------------------
# Crop PDF
# ---------------------------------------------------------------------------
def crop_pdf(
    input_path: str,
    output_path: str,
    margin_left: float = 0,
    margin_right: float = 0,
    margin_top: float = 0,
    margin_bottom: float = 0,
    pages: str = "all",
) -> dict:
    """Crop margins from PDF pages.

    Args:
        input_path: Path to the input PDF
        output_path: Path for the cropped output
        margin_left/right/top/bottom: Points to crop from each side
        pages: "all" or comma-separated page numbers (1-based)

    Returns:
        dict with total_pages and output_size

    Raises:
        PDFExtraError: If cropping fails
    """
    try:
        from PyPDF2 import PdfReader, PdfWriter

        reader = PdfReader(input_path)
        writer = PdfWriter()
        total_pages = len(reader.pages)

        if total_pages == 0:
            raise PDFExtraError("PDF has no pages.")

        target_indices = _parse_pages(pages, total_pages)

        for i, page in enumerate(reader.pages):
            if i in target_indices:
                box = page.mediabox
                box.lower_left = (
                    float(box.lower_left[0]) + margin_left,
                    float(box.lower_left[1]) + margin_bottom,
                )
                box.upper_right = (
                    float(box.upper_right[0]) - margin_right,
                    float(box.upper_right[1]) - margin_top,
                )
                page.mediabox = box
                page.cropbox = box
            writer.add_page(page)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "wb") as f:
            writer.write(f)

        output_size = os.path.getsize(output_path)
        logger.info(f"Crop PDF: {len(target_indices)} pages cropped ({output_size} bytes)")
        return {
            "total_pages": total_pages,
            "cropped_pages": len(target_indices),
            "output_size": output_size,
        }

    except PDFExtraError:
        raise
    except Exception as e:
        raise PDFExtraError(f"Failed to crop PDF: {str(e)}")


# ---------------------------------------------------------------------------
# Flatten PDF (remove interactive form fields, annotations)
# ---------------------------------------------------------------------------
def flatten_pdf(input_path: str, output_path: str) -> dict:
    """Flatten a PDF — burn form fields and annotations into static content.

    Args:
        input_path: Path to the input PDF
        output_path: Path for the flattened output

    Returns:
        dict with total_pages and output_size

    Raises:
        PDFExtraError: If flatten fails
    """
    try:
        from PyPDF2 import PdfReader, PdfWriter

        reader = PdfReader(input_path)
        writer = PdfWriter()
        total_pages = len(reader.pages)

        if total_pages == 0:
            raise PDFExtraError("PDF has no pages.")

        for page in reader.pages:
            # Remove annotations to flatten
            if "/Annots" in page:
                del page["/Annots"]
            writer.add_page(page)

        # Remove AcroForm (interactive forms) at document level
        if "/AcroForm" in writer._root_object:
            del writer._root_object["/AcroForm"]

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "wb") as f:
            writer.write(f)

        output_size = os.path.getsize(output_path)
        logger.info(f"Flatten PDF: {total_pages} pages ({output_size} bytes)")
        return {"total_pages": total_pages, "output_size": output_size}

    except PDFExtraError:
        raise
    except Exception as e:
        raise PDFExtraError(f"Failed to flatten PDF: {str(e)}")


# ---------------------------------------------------------------------------
# Repair PDF
# ---------------------------------------------------------------------------
def repair_pdf(input_path: str, output_path: str) -> dict:
    """Attempt to repair a damaged PDF by re-writing it.

    Args:
        input_path: Path to the input PDF
        output_path: Path for the repaired output

    Returns:
        dict with total_pages, output_size, and repaired flag

    Raises:
        PDFExtraError: If repair fails
    """
    try:
        from PyPDF2 import PdfReader, PdfWriter
        from PyPDF2.errors import PdfReadError

        try:
            reader = PdfReader(input_path, strict=False)
        except PdfReadError as e:
            raise PDFExtraError(f"Cannot read PDF — file may be severely corrupted: {str(e)}")

        writer = PdfWriter()
        total_pages = len(reader.pages)

        if total_pages == 0:
            raise PDFExtraError("PDF has no recoverable pages.")

        recovered = 0
        for i, page in enumerate(reader.pages):
            try:
                writer.add_page(page)
                recovered += 1
            except Exception:
                logger.warning(f"Repair: skipped unrecoverable page {i + 1}")

        if recovered == 0:
            raise PDFExtraError("No pages could be recovered from the PDF.")

        # Copy metadata if available
        try:
            if reader.metadata:
                writer.add_metadata(reader.metadata)
        except Exception:
            pass

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "wb") as f:
            writer.write(f)

        output_size = os.path.getsize(output_path)
        logger.info(f"Repair PDF: {recovered}/{total_pages} pages recovered ({output_size} bytes)")
        return {
            "total_pages": total_pages,
            "recovered_pages": recovered,
            "output_size": output_size,
            "repaired": True,
        }

    except PDFExtraError:
        raise
    except Exception as e:
        raise PDFExtraError(f"Failed to repair PDF: {str(e)}")


# ---------------------------------------------------------------------------
# PDF Metadata Editor
# ---------------------------------------------------------------------------
def edit_pdf_metadata(
    input_path: str,
    output_path: str,
    title: str | None = None,
    author: str | None = None,
    subject: str | None = None,
    keywords: str | None = None,
    creator: str | None = None,
) -> dict:
    """Edit PDF metadata fields.

    Args:
        input_path: Path to the input PDF
        output_path: Path for the output PDF
        title/author/subject/keywords/creator: New metadata values (None = keep existing)

    Returns:
        dict with updated metadata and output_size

    Raises:
        PDFExtraError: If metadata edit fails
    """
    try:
        from PyPDF2 import PdfReader, PdfWriter

        reader = PdfReader(input_path)
        writer = PdfWriter()

        for page in reader.pages:
            writer.add_page(page)

        # Build metadata dict
        metadata = {}
        if title is not None:
            metadata["/Title"] = title
        if author is not None:
            metadata["/Author"] = author
        if subject is not None:
            metadata["/Subject"] = subject
        if keywords is not None:
            metadata["/Keywords"] = keywords
        if creator is not None:
            metadata["/Creator"] = creator

        if not metadata:
            raise PDFExtraError("At least one metadata field must be provided.")

        writer.add_metadata(metadata)

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "wb") as f:
            writer.write(f)

        output_size = os.path.getsize(output_path)

        # Read back to confirm
        current_meta = {}
        try:
            r2 = PdfReader(output_path)
            if r2.metadata:
                current_meta = {
                    "title": r2.metadata.get("/Title", ""),
                    "author": r2.metadata.get("/Author", ""),
                    "subject": r2.metadata.get("/Subject", ""),
                    "keywords": r2.metadata.get("/Keywords", ""),
                    "creator": r2.metadata.get("/Creator", ""),
                }
        except Exception:
            pass

        logger.info(f"Edit metadata: updated {len(metadata)} fields ({output_size} bytes)")
        return {
            "total_pages": len(reader.pages),
            "output_size": output_size,
            "metadata": current_meta,
        }

    except PDFExtraError:
        raise
    except Exception as e:
        raise PDFExtraError(f"Failed to edit PDF metadata: {str(e)}")


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _parse_pages(pages_spec: str, total_pages: int) -> set[int]:
    """Parse page specification to set of 0-based indices."""
    if pages_spec.strip().lower() == "all":
        return set(range(total_pages))

    indices = set()
    for part in pages_spec.split(","):
        part = part.strip()
        if "-" in part:
            try:
                start, end = part.split("-", 1)
                start = max(1, int(start))
                end = min(total_pages, int(end))
                for p in range(start, end + 1):
                    indices.add(p - 1)
            except ValueError:
                continue
        else:
            try:
                p = int(part)
                if 1 <= p <= total_pages:
                    indices.add(p - 1)
            except ValueError:
                continue
    return indices