"""HTML to PDF conversion service.""" import os import logging from importlib.metadata import PackageNotFoundError, version logger = logging.getLogger(__name__) class HtmlToPdfError(Exception): """Custom exception for HTML to PDF conversion failures.""" pass def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]: """Parse a package version into comparable integer parts.""" if not raw_version: return () parts: list[int] = [] for token in raw_version.replace("-", ".").split("."): digits = "".join(ch for ch in token if ch.isdigit()) if not digits: break parts.append(int(digits)) return tuple(parts) def _get_installed_version(package_name: str) -> str | None: """Return installed package version, if available.""" try: return version(package_name) except PackageNotFoundError: return None def _get_dependency_mismatch_error() -> str | None: """ Detect the known WeasyPrint/pydyf incompatibility before conversion starts. WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises: "PDF.__init__() takes 1 positional argument but 3 were given". """ weasyprint_version = _get_installed_version("weasyprint") pydyf_version = _get_installed_version("pydyf") if not weasyprint_version or not pydyf_version: return None if ( _parse_version_parts(weasyprint_version) < (62,) and _parse_version_parts(pydyf_version) >= (0, 11) ): return ( "Installed HTML-to-PDF dependencies are incompatible: " f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. " "Reinstall backend dependencies after pinning pydyf<0.11." ) return None def html_to_pdf( input_path: str, output_path: str, ) -> dict: """ Convert an HTML file to PDF. Args: input_path: Path to the input HTML file output_path: Path for the output PDF Returns: dict with output_size Raises: HtmlToPdfError: If conversion fails """ os.makedirs(os.path.dirname(output_path), exist_ok=True) try: dependency_error = _get_dependency_mismatch_error() if dependency_error: raise HtmlToPdfError(dependency_error) from weasyprint import HTML HTML(filename=input_path).write_pdf(output_path) output_size = os.path.getsize(output_path) logger.info(f"HTML→PDF conversion completed ({output_size} bytes)") return { "output_size": output_size, } except ImportError: raise HtmlToPdfError("weasyprint library is not installed.") except Exception as e: raise HtmlToPdfError(f"Failed to convert HTML to PDF: {str(e)}") def html_string_to_pdf( html_content: str, output_path: str, ) -> dict: """ Convert an HTML string to PDF. Args: html_content: HTML content as string output_path: Path for the output PDF Returns: dict with output_size Raises: HtmlToPdfError: If conversion fails """ os.makedirs(os.path.dirname(output_path), exist_ok=True) try: dependency_error = _get_dependency_mismatch_error() if dependency_error: raise HtmlToPdfError(dependency_error) from weasyprint import HTML HTML(string=html_content).write_pdf(output_path) output_size = os.path.getsize(output_path) logger.info(f"HTML string→PDF conversion completed ({output_size} bytes)") return { "output_size": output_size, } except ImportError: raise HtmlToPdfError("weasyprint library is not installed.") except Exception as e: raise HtmlToPdfError(f"Failed to convert HTML to PDF: {str(e)}")