142 lines
3.8 KiB
Python
142 lines
3.8 KiB
Python
"""HTML to PDF conversion service."""
|
|
import os
|
|
import logging
|
|
from importlib.metadata import PackageNotFoundError, version
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class HtmlToPdfError(Exception):
|
|
"""Custom exception for HTML to PDF conversion failures."""
|
|
pass
|
|
|
|
|
|
def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]:
|
|
"""Parse a package version into comparable integer parts."""
|
|
if not raw_version:
|
|
return ()
|
|
|
|
parts: list[int] = []
|
|
for token in raw_version.replace("-", ".").split("."):
|
|
digits = "".join(ch for ch in token if ch.isdigit())
|
|
if not digits:
|
|
break
|
|
parts.append(int(digits))
|
|
return tuple(parts)
|
|
|
|
|
|
def _get_installed_version(package_name: str) -> str | None:
|
|
"""Return installed package version, if available."""
|
|
try:
|
|
return version(package_name)
|
|
except PackageNotFoundError:
|
|
return None
|
|
|
|
|
|
def _get_dependency_mismatch_error() -> str | None:
|
|
"""
|
|
Detect the known WeasyPrint/pydyf incompatibility before conversion starts.
|
|
|
|
WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while
|
|
pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises:
|
|
"PDF.__init__() takes 1 positional argument but 3 were given".
|
|
"""
|
|
weasyprint_version = _get_installed_version("weasyprint")
|
|
pydyf_version = _get_installed_version("pydyf")
|
|
if not weasyprint_version or not pydyf_version:
|
|
return None
|
|
|
|
if (
|
|
_parse_version_parts(weasyprint_version) < (62,)
|
|
and _parse_version_parts(pydyf_version) >= (0, 11)
|
|
):
|
|
return (
|
|
"Installed HTML-to-PDF dependencies are incompatible: "
|
|
f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. "
|
|
"Reinstall backend dependencies after pinning pydyf<0.11."
|
|
)
|
|
|
|
return None
|
|
|
|
|
|
def html_to_pdf(
|
|
input_path: str,
|
|
output_path: str,
|
|
) -> dict:
|
|
"""
|
|
Convert an HTML file to PDF.
|
|
|
|
Args:
|
|
input_path: Path to the input HTML file
|
|
output_path: Path for the output PDF
|
|
|
|
Returns:
|
|
dict with output_size
|
|
|
|
Raises:
|
|
HtmlToPdfError: If conversion fails
|
|
"""
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
|
|
try:
|
|
dependency_error = _get_dependency_mismatch_error()
|
|
if dependency_error:
|
|
raise HtmlToPdfError(dependency_error)
|
|
|
|
from weasyprint import HTML
|
|
|
|
HTML(filename=input_path).write_pdf(output_path)
|
|
|
|
output_size = os.path.getsize(output_path)
|
|
logger.info(f"HTML→PDF conversion completed ({output_size} bytes)")
|
|
|
|
return {
|
|
"output_size": output_size,
|
|
}
|
|
|
|
except ImportError:
|
|
raise HtmlToPdfError("weasyprint library is not installed.")
|
|
except Exception as e:
|
|
raise HtmlToPdfError(f"Failed to convert HTML to PDF: {str(e)}")
|
|
|
|
|
|
def html_string_to_pdf(
|
|
html_content: str,
|
|
output_path: str,
|
|
) -> dict:
|
|
"""
|
|
Convert an HTML string to PDF.
|
|
|
|
Args:
|
|
html_content: HTML content as string
|
|
output_path: Path for the output PDF
|
|
|
|
Returns:
|
|
dict with output_size
|
|
|
|
Raises:
|
|
HtmlToPdfError: If conversion fails
|
|
"""
|
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
|
|
try:
|
|
dependency_error = _get_dependency_mismatch_error()
|
|
if dependency_error:
|
|
raise HtmlToPdfError(dependency_error)
|
|
|
|
from weasyprint import HTML
|
|
|
|
HTML(string=html_content).write_pdf(output_path)
|
|
|
|
output_size = os.path.getsize(output_path)
|
|
logger.info(f"HTML string→PDF conversion completed ({output_size} bytes)")
|
|
|
|
return {
|
|
"output_size": output_size,
|
|
}
|
|
|
|
except ImportError:
|
|
raise HtmlToPdfError("weasyprint library is not installed.")
|
|
except Exception as e:
|
|
raise HtmlToPdfError(f"Failed to convert HTML to PDF: {str(e)}")
|