feat: enhance production readiness with environment configuration, dependency checks, and sitemap updates
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""HTML to PDF conversion service."""
|
||||
import os
|
||||
import logging
|
||||
from importlib.metadata import PackageNotFoundError, version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -10,6 +11,54 @@ class HtmlToPdfError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]:
|
||||
"""Parse a package version into comparable integer parts."""
|
||||
if not raw_version:
|
||||
return ()
|
||||
|
||||
parts: list[int] = []
|
||||
for token in raw_version.replace("-", ".").split("."):
|
||||
digits = "".join(ch for ch in token if ch.isdigit())
|
||||
if not digits:
|
||||
break
|
||||
parts.append(int(digits))
|
||||
return tuple(parts)
|
||||
|
||||
|
||||
def _get_installed_version(package_name: str) -> str | None:
|
||||
"""Return installed package version, if available."""
|
||||
try:
|
||||
return version(package_name)
|
||||
except PackageNotFoundError:
|
||||
return None
|
||||
|
||||
|
||||
def _get_dependency_mismatch_error() -> str | None:
|
||||
"""
|
||||
Detect the known WeasyPrint/pydyf incompatibility before conversion starts.
|
||||
|
||||
WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while
|
||||
pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises:
|
||||
"PDF.__init__() takes 1 positional argument but 3 were given".
|
||||
"""
|
||||
weasyprint_version = _get_installed_version("weasyprint")
|
||||
pydyf_version = _get_installed_version("pydyf")
|
||||
if not weasyprint_version or not pydyf_version:
|
||||
return None
|
||||
|
||||
if (
|
||||
_parse_version_parts(weasyprint_version) < (62,)
|
||||
and _parse_version_parts(pydyf_version) >= (0, 11)
|
||||
):
|
||||
return (
|
||||
"Installed HTML-to-PDF dependencies are incompatible: "
|
||||
f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. "
|
||||
"Reinstall backend dependencies after pinning pydyf<0.11."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def html_to_pdf(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
@@ -30,6 +79,10 @@ def html_to_pdf(
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
try:
|
||||
dependency_error = _get_dependency_mismatch_error()
|
||||
if dependency_error:
|
||||
raise HtmlToPdfError(dependency_error)
|
||||
|
||||
from weasyprint import HTML
|
||||
|
||||
HTML(filename=input_path).write_pdf(output_path)
|
||||
@@ -67,6 +120,10 @@ def html_string_to_pdf(
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
try:
|
||||
dependency_error = _get_dependency_mismatch_error()
|
||||
if dependency_error:
|
||||
raise HtmlToPdfError(dependency_error)
|
||||
|
||||
from weasyprint import HTML
|
||||
|
||||
HTML(string=html_content).write_pdf(output_path)
|
||||
|
||||
Binary file not shown.
@@ -37,6 +37,8 @@ class BaseConfig:
|
||||
"docx": [
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
],
|
||||
"html": ["text/html", "application/xhtml+xml"],
|
||||
"htm": ["text/html", "application/xhtml+xml"],
|
||||
"png": ["image/png"],
|
||||
"jpg": ["image/jpeg"],
|
||||
"jpeg": ["image/jpeg"],
|
||||
@@ -52,6 +54,8 @@ class BaseConfig:
|
||||
"pdf": 20 * 1024 * 1024, # 20MB
|
||||
"doc": 15 * 1024 * 1024, # 15MB
|
||||
"docx": 15 * 1024 * 1024, # 15MB
|
||||
"html": 10 * 1024 * 1024, # 10MB
|
||||
"htm": 10 * 1024 * 1024, # 10MB
|
||||
"png": 10 * 1024 * 1024, # 10MB
|
||||
"jpg": 10 * 1024 * 1024, # 10MB
|
||||
"jpeg": 10 * 1024 * 1024, # 10MB
|
||||
|
||||
@@ -29,6 +29,7 @@ openpyxl>=3.1,<4.0
|
||||
qrcode[pil]>=7.4,<8.0
|
||||
|
||||
# HTML to PDF
|
||||
pydyf>=0.8,<0.11
|
||||
weasyprint>=60.0,<62.0
|
||||
|
||||
# OCR
|
||||
|
||||
@@ -52,13 +52,36 @@ class TestFileValidator:
|
||||
mock_file.tell = content.tell
|
||||
mock_file.read = content.read
|
||||
|
||||
with patch('app.utils.file_validator.magic') as mock_magic:
|
||||
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
|
||||
'app.utils.file_validator.magic', create=True
|
||||
) as mock_magic:
|
||||
mock_magic.from_buffer.return_value = 'application/pdf'
|
||||
filename, ext = validate_file(mock_file, allowed_types=["pdf"])
|
||||
|
||||
assert filename == 'document.pdf'
|
||||
assert ext == 'pdf'
|
||||
|
||||
def test_valid_html_passes(self, app):
|
||||
"""Should accept valid HTML file with correct MIME type."""
|
||||
with app.app_context():
|
||||
html_bytes = b'<!doctype html><html><body>Hello</body></html>'
|
||||
content = io.BytesIO(html_bytes)
|
||||
|
||||
mock_file = MagicMock()
|
||||
mock_file.filename = 'page.html'
|
||||
mock_file.seek = content.seek
|
||||
mock_file.tell = content.tell
|
||||
mock_file.read = content.read
|
||||
|
||||
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
|
||||
'app.utils.file_validator.magic', create=True
|
||||
) as mock_magic:
|
||||
mock_magic.from_buffer.return_value = 'text/html'
|
||||
filename, ext = validate_file(mock_file, allowed_types=["html", "htm"])
|
||||
|
||||
assert filename == 'page.html'
|
||||
assert ext == 'html'
|
||||
|
||||
def test_mime_mismatch_raises(self, app):
|
||||
"""Should raise when MIME type doesn't match extension."""
|
||||
with app.app_context():
|
||||
@@ -70,7 +93,9 @@ class TestFileValidator:
|
||||
mock_file.tell = content.tell
|
||||
mock_file.read = content.read
|
||||
|
||||
with patch('app.utils.file_validator.magic') as mock_magic:
|
||||
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
|
||||
'app.utils.file_validator.magic', create=True
|
||||
) as mock_magic:
|
||||
mock_magic.from_buffer.return_value = 'text/plain'
|
||||
with pytest.raises(FileValidationError, match="does not match"):
|
||||
validate_file(mock_file, allowed_types=["pdf"])
|
||||
@@ -102,7 +127,9 @@ class TestFileValidator:
|
||||
mock_file.tell = content.tell
|
||||
mock_file.read = content.read
|
||||
|
||||
with patch('app.utils.file_validator.magic') as mock_magic:
|
||||
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
|
||||
'app.utils.file_validator.magic', create=True
|
||||
) as mock_magic:
|
||||
mock_magic.from_buffer.return_value = 'application/pdf'
|
||||
with pytest.raises(FileValidationError, match="unsafe"):
|
||||
validate_file(mock_file, allowed_types=["pdf"])
|
||||
validate_file(mock_file, allowed_types=["pdf"])
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
import io
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from app.services.html_to_pdf_service import _get_dependency_mismatch_error
|
||||
|
||||
|
||||
class TestHtmlToPdf:
|
||||
def test_no_file(self, client):
|
||||
@@ -41,3 +43,33 @@ class TestHtmlToPdf:
|
||||
assert response.status_code == 202
|
||||
json_data = response.get_json()
|
||||
assert 'task_id' in json_data
|
||||
|
||||
def test_detects_weasyprint_pydyf_version_mismatch(self, monkeypatch):
|
||||
"""Should flag the known WeasyPrint/pydyf incompatibility."""
|
||||
versions = {
|
||||
'weasyprint': '61.2',
|
||||
'pydyf': '0.12.1',
|
||||
}
|
||||
monkeypatch.setattr(
|
||||
'app.services.html_to_pdf_service._get_installed_version',
|
||||
lambda package_name: versions.get(package_name),
|
||||
)
|
||||
|
||||
error = _get_dependency_mismatch_error()
|
||||
|
||||
assert error is not None
|
||||
assert 'WeasyPrint 61.2' in error
|
||||
assert 'pydyf 0.12.1' in error
|
||||
|
||||
def test_allows_compatible_weasyprint_pydyf_versions(self, monkeypatch):
|
||||
"""Should not flag compatible dependency versions."""
|
||||
versions = {
|
||||
'weasyprint': '61.2',
|
||||
'pydyf': '0.10.0',
|
||||
}
|
||||
monkeypatch.setattr(
|
||||
'app.services.html_to_pdf_service._get_installed_version',
|
||||
lambda package_name: versions.get(package_name),
|
||||
)
|
||||
|
||||
assert _get_dependency_mismatch_error() is None
|
||||
|
||||
Reference in New Issue
Block a user