- Added tests for rotating PDFs, removing watermarks, and reordering pages in the backend. - Implemented frontend logic to read page counts from uploaded PDFs and validate page orders. - Updated internationalization files to include new strings for reorder and watermark removal features. - Improved user feedback during page count reading and validation in the Reorder PDF component. - Ensured that the reorder functionality requires a complete permutation of pages.
279 lines
11 KiB
Python
279 lines
11 KiB
Python
"""Tests for PDF tools service — Merge, Split, Rotate, etc."""
|
|
import os
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from app.services.pdf_tools_service import (
|
|
add_watermark,
|
|
merge_pdfs,
|
|
PDFToolsError,
|
|
remove_watermark,
|
|
reorder_pdf_pages,
|
|
rotate_pdf,
|
|
split_pdf,
|
|
)
|
|
|
|
|
|
class TestMergePdfsService:
|
|
def test_merge_file_not_found_raises(self, app):
|
|
"""Should raise when input file doesn't exist."""
|
|
with app.app_context():
|
|
with pytest.raises(PDFToolsError, match="not found"):
|
|
merge_pdfs(
|
|
['/tmp/nonexistent1.pdf', '/tmp/nonexistent2.pdf'],
|
|
'/tmp/merged_output.pdf',
|
|
)
|
|
|
|
def test_merge_success(self, app, tmp_path):
|
|
"""Should merge PDF files successfully."""
|
|
with app.app_context():
|
|
# Create test PDFs using PyPDF2
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
pdf1 = str(tmp_path / 'a.pdf')
|
|
pdf2 = str(tmp_path / 'b.pdf')
|
|
|
|
for path in [pdf1, pdf2]:
|
|
writer = PdfWriter()
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
output = str(tmp_path / 'merged.pdf')
|
|
result = merge_pdfs([pdf1, pdf2], output)
|
|
|
|
assert result['total_pages'] == 2
|
|
assert result['files_merged'] == 2
|
|
assert result['output_size'] > 0
|
|
assert os.path.exists(output)
|
|
except ImportError:
|
|
pytest.skip("PyPDF2 not installed")
|
|
|
|
|
|
class TestSplitPdfService:
|
|
def test_split_all_pages(self, app, tmp_path):
|
|
"""Should split PDF into individual pages."""
|
|
with app.app_context():
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
# Create 3-page PDF
|
|
input_path = str(tmp_path / 'multi.pdf')
|
|
writer = PdfWriter()
|
|
for _ in range(3):
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(input_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
output_dir = str(tmp_path / 'split_output')
|
|
result = split_pdf(input_path, output_dir, mode='all')
|
|
|
|
assert result['total_pages'] == 3
|
|
assert result['extracted_pages'] == 3
|
|
assert os.path.exists(result['zip_path'])
|
|
except ImportError:
|
|
pytest.skip("PyPDF2 not installed")
|
|
|
|
|
|
class TestRotatePdfService:
|
|
def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path):
|
|
"""Should raise a clear error for malformed page specs instead of failing generically."""
|
|
with app.app_context():
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
input_path = str(tmp_path / 'rotate-source.pdf')
|
|
output_path = str(tmp_path / 'rotate-output.pdf')
|
|
|
|
writer = PdfWriter()
|
|
writer.add_blank_page(width=612, height=792)
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(input_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
with pytest.raises(PDFToolsError, match='Invalid page format'):
|
|
rotate_pdf(input_path, output_path, rotation=90, pages='1-two')
|
|
except ImportError:
|
|
pytest.skip("PyPDF2 not installed")
|
|
|
|
|
|
class TestRemoveWatermarkService:
|
|
def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path):
|
|
"""Should remove text watermarks generated by the platform watermark flow."""
|
|
with app.app_context():
|
|
try:
|
|
from reportlab.pdfgen import canvas
|
|
from PyPDF2 import PdfReader
|
|
|
|
input_path = str(tmp_path / 'source.pdf')
|
|
watermarked_path = str(tmp_path / 'watermarked.pdf')
|
|
output_path = str(tmp_path / 'cleaned.pdf')
|
|
|
|
c = canvas.Canvas(input_path)
|
|
c.drawString(100, 700, 'Hello world')
|
|
c.save()
|
|
|
|
add_watermark(input_path, watermarked_path, 'CONFIDENTIAL')
|
|
result = remove_watermark(watermarked_path, output_path)
|
|
|
|
extracted_text = PdfReader(output_path).pages[0].extract_text() or ''
|
|
|
|
assert result['total_pages'] == 1
|
|
assert result['cleaned_pages'] == 1
|
|
assert result['output_size'] > 0
|
|
assert os.path.exists(output_path)
|
|
assert 'Hello world' in extracted_text
|
|
assert 'CONFIDENTIAL' not in extracted_text
|
|
except ImportError:
|
|
pytest.skip("PyPDF2/reportlab not installed")
|
|
|
|
def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path):
|
|
"""Should remove supported trailing image watermark overlays while preserving page text."""
|
|
with app.app_context():
|
|
try:
|
|
from PIL import Image
|
|
from reportlab.pdfgen import canvas
|
|
from PyPDF2 import PdfReader, PdfWriter
|
|
|
|
input_path = str(tmp_path / 'source.pdf')
|
|
overlay_path = str(tmp_path / 'overlay.pdf')
|
|
watermarked_path = str(tmp_path / 'image-watermarked.pdf')
|
|
output_path = str(tmp_path / 'image-cleaned.pdf')
|
|
watermark_image_path = str(tmp_path / 'watermark.png')
|
|
|
|
c = canvas.Canvas(input_path)
|
|
c.drawString(100, 700, 'Hello world')
|
|
c.save()
|
|
|
|
Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path)
|
|
|
|
c = canvas.Canvas(overlay_path)
|
|
c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto')
|
|
c.save()
|
|
|
|
base_page = PdfReader(input_path).pages[0]
|
|
overlay_page = PdfReader(overlay_path).pages[0]
|
|
base_page.merge_page(overlay_page)
|
|
|
|
writer = PdfWriter()
|
|
writer.add_page(base_page)
|
|
with open(watermarked_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
result = remove_watermark(watermarked_path, output_path)
|
|
cleaned_page = PdfReader(output_path).pages[0]
|
|
extracted_text = cleaned_page.extract_text() or ''
|
|
cleaned_stream = cleaned_page.get_contents().get_data()
|
|
|
|
assert result['total_pages'] == 1
|
|
assert result['cleaned_pages'] == 1
|
|
assert 'Hello world' in extracted_text
|
|
assert b'/FormXob' not in cleaned_stream
|
|
except ImportError:
|
|
pytest.skip('PyPDF2/reportlab/Pillow not installed')
|
|
|
|
def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path):
|
|
"""Should fail clearly instead of returning an unchanged PDF as success."""
|
|
with app.app_context():
|
|
try:
|
|
from reportlab.pdfgen import canvas
|
|
|
|
input_path = str(tmp_path / 'plain.pdf')
|
|
output_path = str(tmp_path / 'plain_cleaned.pdf')
|
|
|
|
c = canvas.Canvas(input_path)
|
|
c.drawString(72, 720, 'Plain PDF without watermark')
|
|
c.save()
|
|
|
|
with pytest.raises(PDFToolsError, match='No removable watermark overlay'):
|
|
remove_watermark(input_path, output_path)
|
|
except ImportError:
|
|
pytest.skip("reportlab not installed")
|
|
|
|
def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path):
|
|
"""Should raise a clear error when requested pages exceed document page count."""
|
|
with app.app_context():
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
input_path = str(tmp_path / 'single-page.pdf')
|
|
writer = PdfWriter()
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(input_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
output_dir = str(tmp_path / 'split_output')
|
|
|
|
with pytest.raises(PDFToolsError, match='has only 1 page'):
|
|
split_pdf(input_path, output_dir, mode='range', pages='1-2')
|
|
except ImportError:
|
|
pytest.skip("PyPDF2 not installed")
|
|
|
|
def test_split_range_invalid_format_returns_clear_message(self, app, tmp_path):
|
|
"""Should raise a clear error for malformed page ranges."""
|
|
with app.app_context():
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
input_path = str(tmp_path / 'two-pages.pdf')
|
|
writer = PdfWriter()
|
|
writer.add_blank_page(width=612, height=792)
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(input_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
output_dir = str(tmp_path / 'split_output')
|
|
|
|
with pytest.raises(PDFToolsError, match='Invalid page format'):
|
|
split_pdf(input_path, output_dir, mode='range', pages='1-2-3')
|
|
except ImportError:
|
|
pytest.skip("PyPDF2 not installed")
|
|
|
|
|
|
class TestReorderPdfService:
|
|
def test_reorder_requires_full_page_permutation(self, app, tmp_path):
|
|
"""Should reject duplicates or omissions instead of silently dropping pages."""
|
|
with app.app_context():
|
|
try:
|
|
from PyPDF2 import PdfWriter
|
|
|
|
input_path = str(tmp_path / 'reorder-source.pdf')
|
|
output_path = str(tmp_path / 'reorder-output.pdf')
|
|
|
|
writer = PdfWriter()
|
|
for _ in range(3):
|
|
writer.add_blank_page(width=612, height=792)
|
|
with open(input_path, 'wb') as f:
|
|
writer.write(f)
|
|
|
|
with pytest.raises(PDFToolsError, match='Provide every page exactly once'):
|
|
reorder_pdf_pages(input_path, output_path, [3, 1, 1])
|
|
except ImportError:
|
|
pytest.skip('PyPDF2 not installed')
|
|
|
|
def test_reorder_accepts_full_page_permutation(self, app, tmp_path):
|
|
"""Should reorder when every page is present exactly once."""
|
|
with app.app_context():
|
|
try:
|
|
from reportlab.pdfgen import canvas
|
|
from PyPDF2 import PdfReader
|
|
|
|
input_path = str(tmp_path / 'reorder-valid-source.pdf')
|
|
output_path = str(tmp_path / 'reorder-valid-output.pdf')
|
|
|
|
c = canvas.Canvas(input_path)
|
|
for page_number in range(1, 4):
|
|
c.drawString(100, 700, f'Page {page_number}')
|
|
c.showPage()
|
|
c.save()
|
|
|
|
result = reorder_pdf_pages(input_path, output_path, [3, 1, 2])
|
|
reader = PdfReader(output_path)
|
|
|
|
assert result['reordered_pages'] == 3
|
|
assert 'Page 3' in (reader.pages[0].extract_text() or '')
|
|
assert 'Page 1' in (reader.pages[1].extract_text() or '')
|
|
assert 'Page 2' in (reader.pages[2].extract_text() or '')
|
|
except ImportError:
|
|
pytest.skip('PyPDF2/reportlab not installed') |