feat: Enhance PDF tools with new reorder and watermark removal functionalities

- Added tests for rotating PDFs, removing watermarks, and reordering pages in the backend. - Implemented frontend logic to read page counts from uploaded PDFs and validate page orders. - Updated internationalization files to include new strings for reorder and watermark removal features. - Improved user feedback during page count reading and validation in the Reorder PDF component. - Ensured that the reorder functionality requires a complete permutation of pages.
2026-03-11 14:21:25 +02:00
parent e4e3b9fe2d
commit e06e64f85f
8 changed files with 641 additions and 69 deletions
--- a/backend/tests/test_pdf_tools_service.py
+++ b/backend/tests/test_pdf_tools_service.py
@@ -4,9 +4,13 @@ import pytest
 from unittest.mock import patch, MagicMock

 from app.services.pdf_tools_service import (
+    add_watermark,
    merge_pdfs,
-    split_pdf,
    PDFToolsError,
+    remove_watermark,
+    reorder_pdf_pages,
+    rotate_pdf,
+    split_pdf,
 )


@@ -71,6 +75,122 @@ class TestSplitPdfService:
            except ImportError:
                pytest.skip("PyPDF2 not installed")

+
+class TestRotatePdfService:
+    def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path):
+        """Should raise a clear error for malformed page specs instead of failing generically."""
+        with app.app_context():
+            try:
+                from PyPDF2 import PdfWriter
+
+                input_path = str(tmp_path / 'rotate-source.pdf')
+                output_path = str(tmp_path / 'rotate-output.pdf')
+
+                writer = PdfWriter()
+                writer.add_blank_page(width=612, height=792)
+                writer.add_blank_page(width=612, height=792)
+                with open(input_path, 'wb') as f:
+                    writer.write(f)
+
+                with pytest.raises(PDFToolsError, match='Invalid page format'):
+                    rotate_pdf(input_path, output_path, rotation=90, pages='1-two')
+            except ImportError:
+                pytest.skip("PyPDF2 not installed")
+
+
+class TestRemoveWatermarkService:
+    def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path):
+        """Should remove text watermarks generated by the platform watermark flow."""
+        with app.app_context():
+            try:
+                from reportlab.pdfgen import canvas
+                from PyPDF2 import PdfReader
+
+                input_path = str(tmp_path / 'source.pdf')
+                watermarked_path = str(tmp_path / 'watermarked.pdf')
+                output_path = str(tmp_path / 'cleaned.pdf')
+
+                c = canvas.Canvas(input_path)
+                c.drawString(100, 700, 'Hello world')
+                c.save()
+
+                add_watermark(input_path, watermarked_path, 'CONFIDENTIAL')
+                result = remove_watermark(watermarked_path, output_path)
+
+                extracted_text = PdfReader(output_path).pages[0].extract_text() or ''
+
+                assert result['total_pages'] == 1
+                assert result['cleaned_pages'] == 1
+                assert result['output_size'] > 0
+                assert os.path.exists(output_path)
+                assert 'Hello world' in extracted_text
+                assert 'CONFIDENTIAL' not in extracted_text
+            except ImportError:
+                pytest.skip("PyPDF2/reportlab not installed")
+
+    def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path):
+        """Should remove supported trailing image watermark overlays while preserving page text."""
+        with app.app_context():
+            try:
+                from PIL import Image
+                from reportlab.pdfgen import canvas
+                from PyPDF2 import PdfReader, PdfWriter
+
+                input_path = str(tmp_path / 'source.pdf')
+                overlay_path = str(tmp_path / 'overlay.pdf')
+                watermarked_path = str(tmp_path / 'image-watermarked.pdf')
+                output_path = str(tmp_path / 'image-cleaned.pdf')
+                watermark_image_path = str(tmp_path / 'watermark.png')
+
+                c = canvas.Canvas(input_path)
+                c.drawString(100, 700, 'Hello world')
+                c.save()
+
+                Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path)
+
+                c = canvas.Canvas(overlay_path)
+                c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto')
+                c.save()
+
+                base_page = PdfReader(input_path).pages[0]
+                overlay_page = PdfReader(overlay_path).pages[0]
+                base_page.merge_page(overlay_page)
+
+                writer = PdfWriter()
+                writer.add_page(base_page)
+                with open(watermarked_path, 'wb') as f:
+                    writer.write(f)
+
+                result = remove_watermark(watermarked_path, output_path)
+                cleaned_page = PdfReader(output_path).pages[0]
+                extracted_text = cleaned_page.extract_text() or ''
+                cleaned_stream = cleaned_page.get_contents().get_data()
+
+                assert result['total_pages'] == 1
+                assert result['cleaned_pages'] == 1
+                assert 'Hello world' in extracted_text
+                assert b'/FormXob' not in cleaned_stream
+            except ImportError:
+                pytest.skip('PyPDF2/reportlab/Pillow not installed')
+
+    def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path):
+        """Should fail clearly instead of returning an unchanged PDF as success."""
+        with app.app_context():
+            try:
+                from reportlab.pdfgen import canvas
+
+                input_path = str(tmp_path / 'plain.pdf')
+                output_path = str(tmp_path / 'plain_cleaned.pdf')
+
+                c = canvas.Canvas(input_path)
+                c.drawString(72, 720, 'Plain PDF without watermark')
+                c.save()
+
+                with pytest.raises(PDFToolsError, match='No removable watermark overlay'):
+                    remove_watermark(input_path, output_path)
+            except ImportError:
+                pytest.skip("reportlab not installed")
+
    def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path):
        """Should raise a clear error when requested pages exceed document page count."""
        with app.app_context():
@@ -108,4 +228,52 @@ class TestSplitPdfService:
                with pytest.raises(PDFToolsError, match='Invalid page format'):
                    split_pdf(input_path, output_dir, mode='range', pages='1-2-3')
            except ImportError:
-                pytest.skip("PyPDF2 not installed")
+                pytest.skip("PyPDF2 not installed")
+
+
+class TestReorderPdfService:
+    def test_reorder_requires_full_page_permutation(self, app, tmp_path):
+        """Should reject duplicates or omissions instead of silently dropping pages."""
+        with app.app_context():
+            try:
+                from PyPDF2 import PdfWriter
+
+                input_path = str(tmp_path / 'reorder-source.pdf')
+                output_path = str(tmp_path / 'reorder-output.pdf')
+
+                writer = PdfWriter()
+                for _ in range(3):
+                    writer.add_blank_page(width=612, height=792)
+                with open(input_path, 'wb') as f:
+                    writer.write(f)
+
+                with pytest.raises(PDFToolsError, match='Provide every page exactly once'):
+                    reorder_pdf_pages(input_path, output_path, [3, 1, 1])
+            except ImportError:
+                pytest.skip('PyPDF2 not installed')
+
+    def test_reorder_accepts_full_page_permutation(self, app, tmp_path):
+        """Should reorder when every page is present exactly once."""
+        with app.app_context():
+            try:
+                from reportlab.pdfgen import canvas
+                from PyPDF2 import PdfReader
+
+                input_path = str(tmp_path / 'reorder-valid-source.pdf')
+                output_path = str(tmp_path / 'reorder-valid-output.pdf')
+
+                c = canvas.Canvas(input_path)
+                for page_number in range(1, 4):
+                    c.drawString(100, 700, f'Page {page_number}')
+                    c.showPage()
+                c.save()
+
+                result = reorder_pdf_pages(input_path, output_path, [3, 1, 2])
+                reader = PdfReader(output_path)
+
+                assert result['reordered_pages'] == 3
+                assert 'Page 3' in (reader.pages[0].extract_text() or '')
+                assert 'Page 1' in (reader.pages[1].extract_text() or '')
+                assert 'Page 2' in (reader.pages[2].extract_text() or '')
+            except ImportError:
+                pytest.skip('PyPDF2/reportlab not installed')