feat: Enhance PDF tools with new reorder and watermark removal functionalities
- Added tests for rotating PDFs, removing watermarks, and reordering pages in the backend. - Implemented frontend logic to read page counts from uploaded PDFs and validate page orders. - Updated internationalization files to include new strings for reorder and watermark removal features. - Improved user feedback during page count reading and validation in the Reorder PDF component. - Ensured that the reorder functionality requires a complete permutation of pages.
This commit is contained in:
@@ -4,9 +4,13 @@ import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from app.services.pdf_tools_service import (
|
||||
add_watermark,
|
||||
merge_pdfs,
|
||||
split_pdf,
|
||||
PDFToolsError,
|
||||
remove_watermark,
|
||||
reorder_pdf_pages,
|
||||
rotate_pdf,
|
||||
split_pdf,
|
||||
)
|
||||
|
||||
|
||||
@@ -71,6 +75,122 @@ class TestSplitPdfService:
|
||||
except ImportError:
|
||||
pytest.skip("PyPDF2 not installed")
|
||||
|
||||
|
||||
class TestRotatePdfService:
|
||||
def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path):
|
||||
"""Should raise a clear error for malformed page specs instead of failing generically."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from PyPDF2 import PdfWriter
|
||||
|
||||
input_path = str(tmp_path / 'rotate-source.pdf')
|
||||
output_path = str(tmp_path / 'rotate-output.pdf')
|
||||
|
||||
writer = PdfWriter()
|
||||
writer.add_blank_page(width=612, height=792)
|
||||
writer.add_blank_page(width=612, height=792)
|
||||
with open(input_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
|
||||
with pytest.raises(PDFToolsError, match='Invalid page format'):
|
||||
rotate_pdf(input_path, output_path, rotation=90, pages='1-two')
|
||||
except ImportError:
|
||||
pytest.skip("PyPDF2 not installed")
|
||||
|
||||
|
||||
class TestRemoveWatermarkService:
|
||||
def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path):
|
||||
"""Should remove text watermarks generated by the platform watermark flow."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from reportlab.pdfgen import canvas
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
input_path = str(tmp_path / 'source.pdf')
|
||||
watermarked_path = str(tmp_path / 'watermarked.pdf')
|
||||
output_path = str(tmp_path / 'cleaned.pdf')
|
||||
|
||||
c = canvas.Canvas(input_path)
|
||||
c.drawString(100, 700, 'Hello world')
|
||||
c.save()
|
||||
|
||||
add_watermark(input_path, watermarked_path, 'CONFIDENTIAL')
|
||||
result = remove_watermark(watermarked_path, output_path)
|
||||
|
||||
extracted_text = PdfReader(output_path).pages[0].extract_text() or ''
|
||||
|
||||
assert result['total_pages'] == 1
|
||||
assert result['cleaned_pages'] == 1
|
||||
assert result['output_size'] > 0
|
||||
assert os.path.exists(output_path)
|
||||
assert 'Hello world' in extracted_text
|
||||
assert 'CONFIDENTIAL' not in extracted_text
|
||||
except ImportError:
|
||||
pytest.skip("PyPDF2/reportlab not installed")
|
||||
|
||||
def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path):
|
||||
"""Should remove supported trailing image watermark overlays while preserving page text."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from PIL import Image
|
||||
from reportlab.pdfgen import canvas
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
input_path = str(tmp_path / 'source.pdf')
|
||||
overlay_path = str(tmp_path / 'overlay.pdf')
|
||||
watermarked_path = str(tmp_path / 'image-watermarked.pdf')
|
||||
output_path = str(tmp_path / 'image-cleaned.pdf')
|
||||
watermark_image_path = str(tmp_path / 'watermark.png')
|
||||
|
||||
c = canvas.Canvas(input_path)
|
||||
c.drawString(100, 700, 'Hello world')
|
||||
c.save()
|
||||
|
||||
Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path)
|
||||
|
||||
c = canvas.Canvas(overlay_path)
|
||||
c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto')
|
||||
c.save()
|
||||
|
||||
base_page = PdfReader(input_path).pages[0]
|
||||
overlay_page = PdfReader(overlay_path).pages[0]
|
||||
base_page.merge_page(overlay_page)
|
||||
|
||||
writer = PdfWriter()
|
||||
writer.add_page(base_page)
|
||||
with open(watermarked_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
|
||||
result = remove_watermark(watermarked_path, output_path)
|
||||
cleaned_page = PdfReader(output_path).pages[0]
|
||||
extracted_text = cleaned_page.extract_text() or ''
|
||||
cleaned_stream = cleaned_page.get_contents().get_data()
|
||||
|
||||
assert result['total_pages'] == 1
|
||||
assert result['cleaned_pages'] == 1
|
||||
assert 'Hello world' in extracted_text
|
||||
assert b'/FormXob' not in cleaned_stream
|
||||
except ImportError:
|
||||
pytest.skip('PyPDF2/reportlab/Pillow not installed')
|
||||
|
||||
def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path):
|
||||
"""Should fail clearly instead of returning an unchanged PDF as success."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
input_path = str(tmp_path / 'plain.pdf')
|
||||
output_path = str(tmp_path / 'plain_cleaned.pdf')
|
||||
|
||||
c = canvas.Canvas(input_path)
|
||||
c.drawString(72, 720, 'Plain PDF without watermark')
|
||||
c.save()
|
||||
|
||||
with pytest.raises(PDFToolsError, match='No removable watermark overlay'):
|
||||
remove_watermark(input_path, output_path)
|
||||
except ImportError:
|
||||
pytest.skip("reportlab not installed")
|
||||
|
||||
def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path):
|
||||
"""Should raise a clear error when requested pages exceed document page count."""
|
||||
with app.app_context():
|
||||
@@ -108,4 +228,52 @@ class TestSplitPdfService:
|
||||
with pytest.raises(PDFToolsError, match='Invalid page format'):
|
||||
split_pdf(input_path, output_dir, mode='range', pages='1-2-3')
|
||||
except ImportError:
|
||||
pytest.skip("PyPDF2 not installed")
|
||||
pytest.skip("PyPDF2 not installed")
|
||||
|
||||
|
||||
class TestReorderPdfService:
|
||||
def test_reorder_requires_full_page_permutation(self, app, tmp_path):
|
||||
"""Should reject duplicates or omissions instead of silently dropping pages."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from PyPDF2 import PdfWriter
|
||||
|
||||
input_path = str(tmp_path / 'reorder-source.pdf')
|
||||
output_path = str(tmp_path / 'reorder-output.pdf')
|
||||
|
||||
writer = PdfWriter()
|
||||
for _ in range(3):
|
||||
writer.add_blank_page(width=612, height=792)
|
||||
with open(input_path, 'wb') as f:
|
||||
writer.write(f)
|
||||
|
||||
with pytest.raises(PDFToolsError, match='Provide every page exactly once'):
|
||||
reorder_pdf_pages(input_path, output_path, [3, 1, 1])
|
||||
except ImportError:
|
||||
pytest.skip('PyPDF2 not installed')
|
||||
|
||||
def test_reorder_accepts_full_page_permutation(self, app, tmp_path):
|
||||
"""Should reorder when every page is present exactly once."""
|
||||
with app.app_context():
|
||||
try:
|
||||
from reportlab.pdfgen import canvas
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
input_path = str(tmp_path / 'reorder-valid-source.pdf')
|
||||
output_path = str(tmp_path / 'reorder-valid-output.pdf')
|
||||
|
||||
c = canvas.Canvas(input_path)
|
||||
for page_number in range(1, 4):
|
||||
c.drawString(100, 700, f'Page {page_number}')
|
||||
c.showPage()
|
||||
c.save()
|
||||
|
||||
result = reorder_pdf_pages(input_path, output_path, [3, 1, 2])
|
||||
reader = PdfReader(output_path)
|
||||
|
||||
assert result['reordered_pages'] == 3
|
||||
assert 'Page 3' in (reader.pages[0].extract_text() or '')
|
||||
assert 'Page 1' in (reader.pages[1].extract_text() or '')
|
||||
assert 'Page 2' in (reader.pages[2].extract_text() or '')
|
||||
except ImportError:
|
||||
pytest.skip('PyPDF2/reportlab not installed')
|
||||
Reference in New Issue
Block a user