Add OCR, Background Removal, and PDF Editor features with tests
- Implemented OCR functionality using pytesseract for image and PDF text extraction. - Added Background Removal service using rembg for image processing. - Developed PDF Editor service for applying text annotations to PDF files. - Created corresponding API routes for OCR, Background Removal, and PDF Editor. - Added frontend components for OCR and Background Removal tools. - Integrated feature flagging for new tools, ensuring they are disabled by default. - Implemented comprehensive unit tests for OCR service, PDF editor, and background removal. - Updated documentation to reflect new features and usage instructions. - Added translations for new features in English, Arabic, and French.
This commit is contained in:
66
backend/tests/test_ocr_service.py
Normal file
66
backend/tests/test_ocr_service.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Tests for OCR service and PDF editor service — unit tests with mocking."""
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from app.services.ocr_service import ocr_image, OCRError, SUPPORTED_LANGUAGES
|
||||
|
||||
|
||||
class TestOcrServiceConstants:
|
||||
def test_supported_languages(self):
|
||||
"""Verify the supported languages dict."""
|
||||
assert "eng" in SUPPORTED_LANGUAGES
|
||||
assert "ara" in SUPPORTED_LANGUAGES
|
||||
assert "fra" in SUPPORTED_LANGUAGES
|
||||
assert len(SUPPORTED_LANGUAGES) == 3
|
||||
|
||||
|
||||
class TestOcrImage:
|
||||
def test_ocr_image_success(self):
|
||||
"""Should return text and char_count from image (mocked pytesseract)."""
|
||||
mock_pytesseract = MagicMock()
|
||||
mock_pytesseract.image_to_string.return_value = " Hello World "
|
||||
mock_pytesseract.pytesseract.tesseract_cmd = ""
|
||||
|
||||
mock_img = MagicMock()
|
||||
mock_img.mode = "RGB"
|
||||
mock_img.__enter__ = MagicMock(return_value=mock_img)
|
||||
mock_img.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch.dict(sys.modules, {"pytesseract": mock_pytesseract}):
|
||||
with patch("app.services.ocr_service.Image") as mock_pil:
|
||||
mock_pil.open.return_value = mock_img
|
||||
result = ocr_image("/fake/path.png", lang="eng")
|
||||
|
||||
assert result["text"] == "Hello World"
|
||||
assert result["char_count"] == 11
|
||||
assert result["lang"] == "eng"
|
||||
|
||||
def test_ocr_image_invalid_lang_fallback(self):
|
||||
"""Invalid language should fall back to 'eng'."""
|
||||
mock_pytesseract = MagicMock()
|
||||
mock_pytesseract.image_to_string.return_value = "Test"
|
||||
mock_pytesseract.pytesseract.tesseract_cmd = ""
|
||||
|
||||
mock_img = MagicMock()
|
||||
mock_img.mode = "RGB"
|
||||
mock_img.__enter__ = MagicMock(return_value=mock_img)
|
||||
mock_img.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch.dict(sys.modules, {"pytesseract": mock_pytesseract}):
|
||||
with patch("app.services.ocr_service.Image") as mock_pil:
|
||||
mock_pil.open.return_value = mock_img
|
||||
result = ocr_image("/fake/path.png", lang="zzzz")
|
||||
|
||||
assert result["lang"] == "eng"
|
||||
|
||||
|
||||
class TestPdfEditorService:
|
||||
def test_no_edits_raises(self):
|
||||
"""Should raise PDFEditorError when no edits provided."""
|
||||
from app.services.pdf_editor_service import apply_pdf_edits, PDFEditorError
|
||||
with pytest.raises(PDFEditorError, match="No edits"):
|
||||
apply_pdf_edits("/fake.pdf", "/out.pdf", [])
|
||||
Reference in New Issue
Block a user