SaaS-PDF/backend/tests/test_pdf_tools_service.py

"""Tests for PDF tools service — Merge, Split, Rotate, etc."""
import os
import pytest
from unittest.mock import patch, MagicMock

from app.services.pdf_tools_service import (
    add_watermark,
    merge_pdfs,
    PDFToolsError,
    remove_watermark,
    reorder_pdf_pages,
    rotate_pdf,
    split_pdf,
)


class TestMergePdfsService:
    def test_merge_file_not_found_raises(self, app):
        """Should raise when input file doesn't exist."""
        with app.app_context():
            with pytest.raises(PDFToolsError, match="not found"):
                merge_pdfs(
                    ['/tmp/nonexistent1.pdf', '/tmp/nonexistent2.pdf'],
                    '/tmp/merged_output.pdf',
                )

    def test_merge_success(self, app, tmp_path):
        """Should merge PDF files successfully."""
        with app.app_context():
            # Create test PDFs using PyPDF2
            try:
                from PyPDF2 import PdfWriter

                pdf1 = str(tmp_path / 'a.pdf')
                pdf2 = str(tmp_path / 'b.pdf')

                for path in [pdf1, pdf2]:
                    writer = PdfWriter()
                    writer.add_blank_page(width=612, height=792)
                    with open(path, 'wb') as f:
                        writer.write(f)

                output = str(tmp_path / 'merged.pdf')
                result = merge_pdfs([pdf1, pdf2], output)

                assert result['total_pages'] == 2
                assert result['files_merged'] == 2
                assert result['output_size'] > 0
                assert os.path.exists(output)
            except ImportError:
                pytest.skip("PyPDF2 not installed")


class TestSplitPdfService:
    def test_split_all_pages(self, app, tmp_path):
        """Should split PDF into individual pages."""
        with app.app_context():
            try:
                from PyPDF2 import PdfWriter

                # Create 3-page PDF
                input_path = str(tmp_path / 'multi.pdf')
                writer = PdfWriter()
                for _ in range(3):
                    writer.add_blank_page(width=612, height=792)
                with open(input_path, 'wb') as f:
                    writer.write(f)

                output_dir = str(tmp_path / 'split_output')
                result = split_pdf(input_path, output_dir, mode='all')

                assert result['total_pages'] == 3
                assert result['extracted_pages'] == 3
                assert os.path.exists(result['zip_path'])
            except ImportError:
                pytest.skip("PyPDF2 not installed")


class TestRotatePdfService:
    def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path):
        """Should raise a clear error for malformed page specs instead of failing generically."""
        with app.app_context():
            try:
                from PyPDF2 import PdfWriter

                input_path = str(tmp_path / 'rotate-source.pdf')
                output_path = str(tmp_path / 'rotate-output.pdf')

                writer = PdfWriter()
                writer.add_blank_page(width=612, height=792)
                writer.add_blank_page(width=612, height=792)
                with open(input_path, 'wb') as f:
                    writer.write(f)

                with pytest.raises(PDFToolsError, match='Invalid page format'):
                    rotate_pdf(input_path, output_path, rotation=90, pages='1-two')
            except ImportError:
                pytest.skip("PyPDF2 not installed")


class TestRemoveWatermarkService:
    def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path):
        """Should remove text watermarks generated by the platform watermark flow."""
        with app.app_context():
            try:
                from reportlab.pdfgen import canvas
                from PyPDF2 import PdfReader

                input_path = str(tmp_path / 'source.pdf')
                watermarked_path = str(tmp_path / 'watermarked.pdf')
                output_path = str(tmp_path / 'cleaned.pdf')

                c = canvas.Canvas(input_path)
                c.drawString(100, 700, 'Hello world')
                c.save()

                add_watermark(input_path, watermarked_path, 'CONFIDENTIAL')
                result = remove_watermark(watermarked_path, output_path)

                extracted_text = PdfReader(output_path).pages[0].extract_text() or ''

                assert result['total_pages'] == 1
                assert result['cleaned_pages'] == 1
                assert result['output_size'] > 0
                assert os.path.exists(output_path)
                assert 'Hello world' in extracted_text
                assert 'CONFIDENTIAL' not in extracted_text
            except ImportError:
                pytest.skip("PyPDF2/reportlab not installed")

    def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path):
        """Should remove supported trailing image watermark overlays while preserving page text."""
        with app.app_context():
            try:
                from PIL import Image
                from reportlab.pdfgen import canvas
                from PyPDF2 import PdfReader, PdfWriter

                input_path = str(tmp_path / 'source.pdf')
                overlay_path = str(tmp_path / 'overlay.pdf')
                watermarked_path = str(tmp_path / 'image-watermarked.pdf')
                output_path = str(tmp_path / 'image-cleaned.pdf')
                watermark_image_path = str(tmp_path / 'watermark.png')

                c = canvas.Canvas(input_path)
                c.drawString(100, 700, 'Hello world')
                c.save()

                Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path)

                c = canvas.Canvas(overlay_path)
                c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto')
                c.save()

                base_page = PdfReader(input_path).pages[0]
                overlay_page = PdfReader(overlay_path).pages[0]
                base_page.merge_page(overlay_page)

                writer = PdfWriter()
                writer.add_page(base_page)
                with open(watermarked_path, 'wb') as f:
                    writer.write(f)

                result = remove_watermark(watermarked_path, output_path)
                cleaned_page = PdfReader(output_path).pages[0]
                extracted_text = cleaned_page.extract_text() or ''
                cleaned_stream = cleaned_page.get_contents().get_data()

                assert result['total_pages'] == 1
                assert result['cleaned_pages'] == 1
                assert 'Hello world' in extracted_text
                assert b'/FormXob' not in cleaned_stream
            except ImportError:
                pytest.skip('PyPDF2/reportlab/Pillow not installed')

    def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path):
        """Should fail clearly instead of returning an unchanged PDF as success."""
        with app.app_context():
            try:
                from reportlab.pdfgen import canvas

                input_path = str(tmp_path / 'plain.pdf')
                output_path = str(tmp_path / 'plain_cleaned.pdf')

                c = canvas.Canvas(input_path)
                c.drawString(72, 720, 'Plain PDF without watermark')
                c.save()

                with pytest.raises(PDFToolsError, match='No removable watermark overlay'):
                    remove_watermark(input_path, output_path)
            except ImportError:
                pytest.skip("reportlab not installed")

    def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path):
        """Should raise a clear error when requested pages exceed document page count."""
        with app.app_context():
            try:
                from PyPDF2 import PdfWriter

                input_path = str(tmp_path / 'single-page.pdf')
                writer = PdfWriter()
                writer.add_blank_page(width=612, height=792)
                with open(input_path, 'wb') as f:
                    writer.write(f)

                output_dir = str(tmp_path / 'split_output')

                with pytest.raises(PDFToolsError, match='has only 1 page'):
                    split_pdf(input_path, output_dir, mode='range', pages='1-2')
            except ImportError:
                pytest.skip("PyPDF2 not installed")

    def test_split_range_invalid_format_returns_clear_message(self, app, tmp_path):
        """Should raise a clear error for malformed page ranges."""
        with app.app_context():
            try:
                from PyPDF2 import PdfWriter

                input_path = str(tmp_path / 'two-pages.pdf')
                writer = PdfWriter()
                writer.add_blank_page(width=612, height=792)
                writer.add_blank_page(width=612, height=792)
                with open(input_path, 'wb') as f:
                    writer.write(f)

                output_dir = str(tmp_path / 'split_output')

                with pytest.raises(PDFToolsError, match='Invalid page format'):
                    split_pdf(input_path, output_dir, mode='range', pages='1-2-3')
            except ImportError:
                pytest.skip("PyPDF2 not installed")


class TestReorderPdfService:
    def test_reorder_requires_full_page_permutation(self, app, tmp_path):
        """Should reject duplicates or omissions instead of silently dropping pages."""
        with app.app_context():
            try:
                from PyPDF2 import PdfWriter

                input_path = str(tmp_path / 'reorder-source.pdf')
                output_path = str(tmp_path / 'reorder-output.pdf')

                writer = PdfWriter()
                for _ in range(3):
                    writer.add_blank_page(width=612, height=792)
                with open(input_path, 'wb') as f:
                    writer.write(f)

                with pytest.raises(PDFToolsError, match='Provide every page exactly once'):
                    reorder_pdf_pages(input_path, output_path, [3, 1, 1])
            except ImportError:
                pytest.skip('PyPDF2 not installed')

    def test_reorder_accepts_full_page_permutation(self, app, tmp_path):
        """Should reorder when every page is present exactly once."""
        with app.app_context():
            try:
                from reportlab.pdfgen import canvas
                from PyPDF2 import PdfReader

                input_path = str(tmp_path / 'reorder-valid-source.pdf')
                output_path = str(tmp_path / 'reorder-valid-output.pdf')

                c = canvas.Canvas(input_path)
                for page_number in range(1, 4):
                    c.drawString(100, 700, f'Page {page_number}')
                    c.showPage()
                c.save()

                result = reorder_pdf_pages(input_path, output_path, [3, 1, 2])
                reader = PdfReader(output_path)

                assert result['reordered_pages'] == 3
                assert 'Page 3' in (reader.pages[0].extract_text() or '')
                assert 'Page 1' in (reader.pages[1].extract_text() or '')
                assert 'Page 2' in (reader.pages[2].extract_text() or '')
            except ImportError:
                pytest.skip('PyPDF2/reportlab not installed')