feat: enhance production readiness with environment configuration, dependency checks, and sitemap updates

This commit is contained in:
Your Name
2026-03-15 13:29:02 +02:00
parent 3217681108
commit c167091399
11 changed files with 392 additions and 51 deletions

View File

@@ -1,6 +1,7 @@
"""HTML to PDF conversion service."""
import os
import logging
from importlib.metadata import PackageNotFoundError, version
logger = logging.getLogger(__name__)
@@ -10,6 +11,54 @@ class HtmlToPdfError(Exception):
pass
def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]:
"""Parse a package version into comparable integer parts."""
if not raw_version:
return ()
parts: list[int] = []
for token in raw_version.replace("-", ".").split("."):
digits = "".join(ch for ch in token if ch.isdigit())
if not digits:
break
parts.append(int(digits))
return tuple(parts)
def _get_installed_version(package_name: str) -> str | None:
"""Return installed package version, if available."""
try:
return version(package_name)
except PackageNotFoundError:
return None
def _get_dependency_mismatch_error() -> str | None:
"""
Detect the known WeasyPrint/pydyf incompatibility before conversion starts.
WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while
pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises:
"PDF.__init__() takes 1 positional argument but 3 were given".
"""
weasyprint_version = _get_installed_version("weasyprint")
pydyf_version = _get_installed_version("pydyf")
if not weasyprint_version or not pydyf_version:
return None
if (
_parse_version_parts(weasyprint_version) < (62,)
and _parse_version_parts(pydyf_version) >= (0, 11)
):
return (
"Installed HTML-to-PDF dependencies are incompatible: "
f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. "
"Reinstall backend dependencies after pinning pydyf<0.11."
)
return None
def html_to_pdf(
input_path: str,
output_path: str,
@@ -30,6 +79,10 @@ def html_to_pdf(
os.makedirs(os.path.dirname(output_path), exist_ok=True)
try:
dependency_error = _get_dependency_mismatch_error()
if dependency_error:
raise HtmlToPdfError(dependency_error)
from weasyprint import HTML
HTML(filename=input_path).write_pdf(output_path)
@@ -67,6 +120,10 @@ def html_string_to_pdf(
os.makedirs(os.path.dirname(output_path), exist_ok=True)
try:
dependency_error = _get_dependency_mismatch_error()
if dependency_error:
raise HtmlToPdfError(dependency_error)
from weasyprint import HTML
HTML(string=html_content).write_pdf(output_path)

Binary file not shown.

View File

@@ -37,6 +37,8 @@ class BaseConfig:
"docx": [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
],
"html": ["text/html", "application/xhtml+xml"],
"htm": ["text/html", "application/xhtml+xml"],
"png": ["image/png"],
"jpg": ["image/jpeg"],
"jpeg": ["image/jpeg"],
@@ -52,6 +54,8 @@ class BaseConfig:
"pdf": 20 * 1024 * 1024, # 20MB
"doc": 15 * 1024 * 1024, # 15MB
"docx": 15 * 1024 * 1024, # 15MB
"html": 10 * 1024 * 1024, # 10MB
"htm": 10 * 1024 * 1024, # 10MB
"png": 10 * 1024 * 1024, # 10MB
"jpg": 10 * 1024 * 1024, # 10MB
"jpeg": 10 * 1024 * 1024, # 10MB

View File

@@ -29,6 +29,7 @@ openpyxl>=3.1,<4.0
qrcode[pil]>=7.4,<8.0
# HTML to PDF
pydyf>=0.8,<0.11
weasyprint>=60.0,<62.0
# OCR

View File

@@ -52,13 +52,36 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'application/pdf'
filename, ext = validate_file(mock_file, allowed_types=["pdf"])
assert filename == 'document.pdf'
assert ext == 'pdf'
def test_valid_html_passes(self, app):
"""Should accept valid HTML file with correct MIME type."""
with app.app_context():
html_bytes = b'<!doctype html><html><body>Hello</body></html>'
content = io.BytesIO(html_bytes)
mock_file = MagicMock()
mock_file.filename = 'page.html'
mock_file.seek = content.seek
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'text/html'
filename, ext = validate_file(mock_file, allowed_types=["html", "htm"])
assert filename == 'page.html'
assert ext == 'html'
def test_mime_mismatch_raises(self, app):
"""Should raise when MIME type doesn't match extension."""
with app.app_context():
@@ -70,7 +93,9 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'text/plain'
with pytest.raises(FileValidationError, match="does not match"):
validate_file(mock_file, allowed_types=["pdf"])
@@ -102,7 +127,9 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'application/pdf'
with pytest.raises(FileValidationError, match="unsafe"):
validate_file(mock_file, allowed_types=["pdf"])
validate_file(mock_file, allowed_types=["pdf"])

View File

@@ -2,6 +2,8 @@
import io
from unittest.mock import MagicMock
from app.services.html_to_pdf_service import _get_dependency_mismatch_error
class TestHtmlToPdf:
def test_no_file(self, client):
@@ -41,3 +43,33 @@ class TestHtmlToPdf:
assert response.status_code == 202
json_data = response.get_json()
assert 'task_id' in json_data
def test_detects_weasyprint_pydyf_version_mismatch(self, monkeypatch):
"""Should flag the known WeasyPrint/pydyf incompatibility."""
versions = {
'weasyprint': '61.2',
'pydyf': '0.12.1',
}
monkeypatch.setattr(
'app.services.html_to_pdf_service._get_installed_version',
lambda package_name: versions.get(package_name),
)
error = _get_dependency_mismatch_error()
assert error is not None
assert 'WeasyPrint 61.2' in error
assert 'pydyf 0.12.1' in error
def test_allows_compatible_weasyprint_pydyf_versions(self, monkeypatch):
"""Should not flag compatible dependency versions."""
versions = {
'weasyprint': '61.2',
'pydyf': '0.10.0',
}
monkeypatch.setattr(
'app.services.html_to_pdf_service._get_installed_version',
lambda package_name: versions.get(package_name),
)
assert _get_dependency_mismatch_error() is None