"""Tests for PDF tools service — Merge, Split, Rotate, etc.""" import os import pytest from unittest.mock import patch, MagicMock from app.services.pdf_tools_service import ( add_watermark, merge_pdfs, PDFToolsError, remove_watermark, reorder_pdf_pages, rotate_pdf, split_pdf, ) class TestMergePdfsService: def test_merge_file_not_found_raises(self, app): """Should raise when input file doesn't exist.""" with app.app_context(): with pytest.raises(PDFToolsError, match="not found"): merge_pdfs( ['/tmp/nonexistent1.pdf', '/tmp/nonexistent2.pdf'], '/tmp/merged_output.pdf', ) def test_merge_success(self, app, tmp_path): """Should merge PDF files successfully.""" with app.app_context(): # Create test PDFs using PyPDF2 try: from PyPDF2 import PdfWriter pdf1 = str(tmp_path / 'a.pdf') pdf2 = str(tmp_path / 'b.pdf') for path in [pdf1, pdf2]: writer = PdfWriter() writer.add_blank_page(width=612, height=792) with open(path, 'wb') as f: writer.write(f) output = str(tmp_path / 'merged.pdf') result = merge_pdfs([pdf1, pdf2], output) assert result['total_pages'] == 2 assert result['files_merged'] == 2 assert result['output_size'] > 0 assert os.path.exists(output) except ImportError: pytest.skip("PyPDF2 not installed") class TestSplitPdfService: def test_split_all_pages(self, app, tmp_path): """Should split PDF into individual pages.""" with app.app_context(): try: from PyPDF2 import PdfWriter # Create 3-page PDF input_path = str(tmp_path / 'multi.pdf') writer = PdfWriter() for _ in range(3): writer.add_blank_page(width=612, height=792) with open(input_path, 'wb') as f: writer.write(f) output_dir = str(tmp_path / 'split_output') result = split_pdf(input_path, output_dir, mode='all') assert result['total_pages'] == 3 assert result['extracted_pages'] == 3 assert os.path.exists(result['zip_path']) except ImportError: pytest.skip("PyPDF2 not installed") class TestRotatePdfService: def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path): """Should raise a clear error for malformed page specs instead of failing generically.""" with app.app_context(): try: from PyPDF2 import PdfWriter input_path = str(tmp_path / 'rotate-source.pdf') output_path = str(tmp_path / 'rotate-output.pdf') writer = PdfWriter() writer.add_blank_page(width=612, height=792) writer.add_blank_page(width=612, height=792) with open(input_path, 'wb') as f: writer.write(f) with pytest.raises(PDFToolsError, match='Invalid page format'): rotate_pdf(input_path, output_path, rotation=90, pages='1-two') except ImportError: pytest.skip("PyPDF2 not installed") class TestRemoveWatermarkService: def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path): """Should remove text watermarks generated by the platform watermark flow.""" with app.app_context(): try: from reportlab.pdfgen import canvas from PyPDF2 import PdfReader input_path = str(tmp_path / 'source.pdf') watermarked_path = str(tmp_path / 'watermarked.pdf') output_path = str(tmp_path / 'cleaned.pdf') c = canvas.Canvas(input_path) c.drawString(100, 700, 'Hello world') c.save() add_watermark(input_path, watermarked_path, 'CONFIDENTIAL') result = remove_watermark(watermarked_path, output_path) extracted_text = PdfReader(output_path).pages[0].extract_text() or '' assert result['total_pages'] == 1 assert result['cleaned_pages'] == 1 assert result['output_size'] > 0 assert os.path.exists(output_path) assert 'Hello world' in extracted_text assert 'CONFIDENTIAL' not in extracted_text except ImportError: pytest.skip("PyPDF2/reportlab not installed") def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path): """Should remove supported trailing image watermark overlays while preserving page text.""" with app.app_context(): try: from PIL import Image from reportlab.pdfgen import canvas from PyPDF2 import PdfReader, PdfWriter input_path = str(tmp_path / 'source.pdf') overlay_path = str(tmp_path / 'overlay.pdf') watermarked_path = str(tmp_path / 'image-watermarked.pdf') output_path = str(tmp_path / 'image-cleaned.pdf') watermark_image_path = str(tmp_path / 'watermark.png') c = canvas.Canvas(input_path) c.drawString(100, 700, 'Hello world') c.save() Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path) c = canvas.Canvas(overlay_path) c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto') c.save() base_page = PdfReader(input_path).pages[0] overlay_page = PdfReader(overlay_path).pages[0] base_page.merge_page(overlay_page) writer = PdfWriter() writer.add_page(base_page) with open(watermarked_path, 'wb') as f: writer.write(f) result = remove_watermark(watermarked_path, output_path) cleaned_page = PdfReader(output_path).pages[0] extracted_text = cleaned_page.extract_text() or '' cleaned_stream = cleaned_page.get_contents().get_data() assert result['total_pages'] == 1 assert result['cleaned_pages'] == 1 assert 'Hello world' in extracted_text assert b'/FormXob' not in cleaned_stream except ImportError: pytest.skip('PyPDF2/reportlab/Pillow not installed') def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path): """Should fail clearly instead of returning an unchanged PDF as success.""" with app.app_context(): try: from reportlab.pdfgen import canvas input_path = str(tmp_path / 'plain.pdf') output_path = str(tmp_path / 'plain_cleaned.pdf') c = canvas.Canvas(input_path) c.drawString(72, 720, 'Plain PDF without watermark') c.save() with pytest.raises(PDFToolsError, match='No removable watermark overlay'): remove_watermark(input_path, output_path) except ImportError: pytest.skip("reportlab not installed") def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path): """Should raise a clear error when requested pages exceed document page count.""" with app.app_context(): try: from PyPDF2 import PdfWriter input_path = str(tmp_path / 'single-page.pdf') writer = PdfWriter() writer.add_blank_page(width=612, height=792) with open(input_path, 'wb') as f: writer.write(f) output_dir = str(tmp_path / 'split_output') with pytest.raises(PDFToolsError, match='has only 1 page'): split_pdf(input_path, output_dir, mode='range', pages='1-2') except ImportError: pytest.skip("PyPDF2 not installed") def test_split_range_invalid_format_returns_clear_message(self, app, tmp_path): """Should raise a clear error for malformed page ranges.""" with app.app_context(): try: from PyPDF2 import PdfWriter input_path = str(tmp_path / 'two-pages.pdf') writer = PdfWriter() writer.add_blank_page(width=612, height=792) writer.add_blank_page(width=612, height=792) with open(input_path, 'wb') as f: writer.write(f) output_dir = str(tmp_path / 'split_output') with pytest.raises(PDFToolsError, match='Invalid page format'): split_pdf(input_path, output_dir, mode='range', pages='1-2-3') except ImportError: pytest.skip("PyPDF2 not installed") class TestReorderPdfService: def test_reorder_requires_full_page_permutation(self, app, tmp_path): """Should reject duplicates or omissions instead of silently dropping pages.""" with app.app_context(): try: from PyPDF2 import PdfWriter input_path = str(tmp_path / 'reorder-source.pdf') output_path = str(tmp_path / 'reorder-output.pdf') writer = PdfWriter() for _ in range(3): writer.add_blank_page(width=612, height=792) with open(input_path, 'wb') as f: writer.write(f) with pytest.raises(PDFToolsError, match='Provide every page exactly once'): reorder_pdf_pages(input_path, output_path, [3, 1, 1]) except ImportError: pytest.skip('PyPDF2 not installed') def test_reorder_accepts_full_page_permutation(self, app, tmp_path): """Should reorder when every page is present exactly once.""" with app.app_context(): try: from reportlab.pdfgen import canvas from PyPDF2 import PdfReader input_path = str(tmp_path / 'reorder-valid-source.pdf') output_path = str(tmp_path / 'reorder-valid-output.pdf') c = canvas.Canvas(input_path) for page_number in range(1, 4): c.drawString(100, 700, f'Page {page_number}') c.showPage() c.save() result = reorder_pdf_pages(input_path, output_path, [3, 1, 2]) reader = PdfReader(output_path) assert result['reordered_pages'] == 3 assert 'Page 3' in (reader.pages[0].extract_text() or '') assert 'Page 1' in (reader.pages[1].extract_text() or '') assert 'Page 2' in (reader.pages[2].extract_text() or '') except ImportError: pytest.skip('PyPDF2/reportlab not installed')