Files
SaaS-PDF/backend/tests/test_pdf_tools_service.py
Your Name e06e64f85f feat: Enhance PDF tools with new reorder and watermark removal functionalities
- Added tests for rotating PDFs, removing watermarks, and reordering pages in the backend.
- Implemented frontend logic to read page counts from uploaded PDFs and validate page orders.
- Updated internationalization files to include new strings for reorder and watermark removal features.
- Improved user feedback during page count reading and validation in the Reorder PDF component.
- Ensured that the reorder functionality requires a complete permutation of pages.
2026-03-11 14:21:25 +02:00

279 lines
11 KiB
Python

"""Tests for PDF tools service — Merge, Split, Rotate, etc."""
import os
import pytest
from unittest.mock import patch, MagicMock
from app.services.pdf_tools_service import (
add_watermark,
merge_pdfs,
PDFToolsError,
remove_watermark,
reorder_pdf_pages,
rotate_pdf,
split_pdf,
)
class TestMergePdfsService:
def test_merge_file_not_found_raises(self, app):
"""Should raise when input file doesn't exist."""
with app.app_context():
with pytest.raises(PDFToolsError, match="not found"):
merge_pdfs(
['/tmp/nonexistent1.pdf', '/tmp/nonexistent2.pdf'],
'/tmp/merged_output.pdf',
)
def test_merge_success(self, app, tmp_path):
"""Should merge PDF files successfully."""
with app.app_context():
# Create test PDFs using PyPDF2
try:
from PyPDF2 import PdfWriter
pdf1 = str(tmp_path / 'a.pdf')
pdf2 = str(tmp_path / 'b.pdf')
for path in [pdf1, pdf2]:
writer = PdfWriter()
writer.add_blank_page(width=612, height=792)
with open(path, 'wb') as f:
writer.write(f)
output = str(tmp_path / 'merged.pdf')
result = merge_pdfs([pdf1, pdf2], output)
assert result['total_pages'] == 2
assert result['files_merged'] == 2
assert result['output_size'] > 0
assert os.path.exists(output)
except ImportError:
pytest.skip("PyPDF2 not installed")
class TestSplitPdfService:
def test_split_all_pages(self, app, tmp_path):
"""Should split PDF into individual pages."""
with app.app_context():
try:
from PyPDF2 import PdfWriter
# Create 3-page PDF
input_path = str(tmp_path / 'multi.pdf')
writer = PdfWriter()
for _ in range(3):
writer.add_blank_page(width=612, height=792)
with open(input_path, 'wb') as f:
writer.write(f)
output_dir = str(tmp_path / 'split_output')
result = split_pdf(input_path, output_dir, mode='all')
assert result['total_pages'] == 3
assert result['extracted_pages'] == 3
assert os.path.exists(result['zip_path'])
except ImportError:
pytest.skip("PyPDF2 not installed")
class TestRotatePdfService:
def test_rotate_range_invalid_format_returns_clear_message(self, app, tmp_path):
"""Should raise a clear error for malformed page specs instead of failing generically."""
with app.app_context():
try:
from PyPDF2 import PdfWriter
input_path = str(tmp_path / 'rotate-source.pdf')
output_path = str(tmp_path / 'rotate-output.pdf')
writer = PdfWriter()
writer.add_blank_page(width=612, height=792)
writer.add_blank_page(width=612, height=792)
with open(input_path, 'wb') as f:
writer.write(f)
with pytest.raises(PDFToolsError, match='Invalid page format'):
rotate_pdf(input_path, output_path, rotation=90, pages='1-two')
except ImportError:
pytest.skip("PyPDF2 not installed")
class TestRemoveWatermarkService:
def test_remove_text_watermark_from_reportlab_overlay(self, app, tmp_path):
"""Should remove text watermarks generated by the platform watermark flow."""
with app.app_context():
try:
from reportlab.pdfgen import canvas
from PyPDF2 import PdfReader
input_path = str(tmp_path / 'source.pdf')
watermarked_path = str(tmp_path / 'watermarked.pdf')
output_path = str(tmp_path / 'cleaned.pdf')
c = canvas.Canvas(input_path)
c.drawString(100, 700, 'Hello world')
c.save()
add_watermark(input_path, watermarked_path, 'CONFIDENTIAL')
result = remove_watermark(watermarked_path, output_path)
extracted_text = PdfReader(output_path).pages[0].extract_text() or ''
assert result['total_pages'] == 1
assert result['cleaned_pages'] == 1
assert result['output_size'] > 0
assert os.path.exists(output_path)
assert 'Hello world' in extracted_text
assert 'CONFIDENTIAL' not in extracted_text
except ImportError:
pytest.skip("PyPDF2/reportlab not installed")
def test_remove_image_watermark_overlay_from_trailing_xobject(self, app, tmp_path):
"""Should remove supported trailing image watermark overlays while preserving page text."""
with app.app_context():
try:
from PIL import Image
from reportlab.pdfgen import canvas
from PyPDF2 import PdfReader, PdfWriter
input_path = str(tmp_path / 'source.pdf')
overlay_path = str(tmp_path / 'overlay.pdf')
watermarked_path = str(tmp_path / 'image-watermarked.pdf')
output_path = str(tmp_path / 'image-cleaned.pdf')
watermark_image_path = str(tmp_path / 'watermark.png')
c = canvas.Canvas(input_path)
c.drawString(100, 700, 'Hello world')
c.save()
Image.new('RGBA', (200, 80), (220, 38, 38, 96)).save(watermark_image_path)
c = canvas.Canvas(overlay_path)
c.drawImage(watermark_image_path, 180, 360, width=240, height=96, mask='auto')
c.save()
base_page = PdfReader(input_path).pages[0]
overlay_page = PdfReader(overlay_path).pages[0]
base_page.merge_page(overlay_page)
writer = PdfWriter()
writer.add_page(base_page)
with open(watermarked_path, 'wb') as f:
writer.write(f)
result = remove_watermark(watermarked_path, output_path)
cleaned_page = PdfReader(output_path).pages[0]
extracted_text = cleaned_page.extract_text() or ''
cleaned_stream = cleaned_page.get_contents().get_data()
assert result['total_pages'] == 1
assert result['cleaned_pages'] == 1
assert 'Hello world' in extracted_text
assert b'/FormXob' not in cleaned_stream
except ImportError:
pytest.skip('PyPDF2/reportlab/Pillow not installed')
def test_remove_watermark_raises_when_no_supported_pattern_found(self, app, tmp_path):
"""Should fail clearly instead of returning an unchanged PDF as success."""
with app.app_context():
try:
from reportlab.pdfgen import canvas
input_path = str(tmp_path / 'plain.pdf')
output_path = str(tmp_path / 'plain_cleaned.pdf')
c = canvas.Canvas(input_path)
c.drawString(72, 720, 'Plain PDF without watermark')
c.save()
with pytest.raises(PDFToolsError, match='No removable watermark overlay'):
remove_watermark(input_path, output_path)
except ImportError:
pytest.skip("reportlab not installed")
def test_split_range_out_of_bounds_includes_total_pages(self, app, tmp_path):
"""Should raise a clear error when requested pages exceed document page count."""
with app.app_context():
try:
from PyPDF2 import PdfWriter
input_path = str(tmp_path / 'single-page.pdf')
writer = PdfWriter()
writer.add_blank_page(width=612, height=792)
with open(input_path, 'wb') as f:
writer.write(f)
output_dir = str(tmp_path / 'split_output')
with pytest.raises(PDFToolsError, match='has only 1 page'):
split_pdf(input_path, output_dir, mode='range', pages='1-2')
except ImportError:
pytest.skip("PyPDF2 not installed")
def test_split_range_invalid_format_returns_clear_message(self, app, tmp_path):
"""Should raise a clear error for malformed page ranges."""
with app.app_context():
try:
from PyPDF2 import PdfWriter
input_path = str(tmp_path / 'two-pages.pdf')
writer = PdfWriter()
writer.add_blank_page(width=612, height=792)
writer.add_blank_page(width=612, height=792)
with open(input_path, 'wb') as f:
writer.write(f)
output_dir = str(tmp_path / 'split_output')
with pytest.raises(PDFToolsError, match='Invalid page format'):
split_pdf(input_path, output_dir, mode='range', pages='1-2-3')
except ImportError:
pytest.skip("PyPDF2 not installed")
class TestReorderPdfService:
def test_reorder_requires_full_page_permutation(self, app, tmp_path):
"""Should reject duplicates or omissions instead of silently dropping pages."""
with app.app_context():
try:
from PyPDF2 import PdfWriter
input_path = str(tmp_path / 'reorder-source.pdf')
output_path = str(tmp_path / 'reorder-output.pdf')
writer = PdfWriter()
for _ in range(3):
writer.add_blank_page(width=612, height=792)
with open(input_path, 'wb') as f:
writer.write(f)
with pytest.raises(PDFToolsError, match='Provide every page exactly once'):
reorder_pdf_pages(input_path, output_path, [3, 1, 1])
except ImportError:
pytest.skip('PyPDF2 not installed')
def test_reorder_accepts_full_page_permutation(self, app, tmp_path):
"""Should reorder when every page is present exactly once."""
with app.app_context():
try:
from reportlab.pdfgen import canvas
from PyPDF2 import PdfReader
input_path = str(tmp_path / 'reorder-valid-source.pdf')
output_path = str(tmp_path / 'reorder-valid-output.pdf')
c = canvas.Canvas(input_path)
for page_number in range(1, 4):
c.drawString(100, 700, f'Page {page_number}')
c.showPage()
c.save()
result = reorder_pdf_pages(input_path, output_path, [3, 1, 2])
reader = PdfReader(output_path)
assert result['reordered_pages'] == 3
assert 'Page 3' in (reader.pages[0].extract_text() or '')
assert 'Page 1' in (reader.pages[1].extract_text() or '')
assert 'Page 2' in (reader.pages[2].extract_text() or '')
except ImportError:
pytest.skip('PyPDF2/reportlab not installed')