diff --git a/.env.example b/.env.example index b83aa94..2f84565 100644 --- a/.env.example +++ b/.env.example @@ -3,6 +3,9 @@ FLASK_ENV=development FLASK_DEBUG=1 SECRET_KEY=change-me-in-production +# Site Domain (used in sitemap, robots.txt, emails) +SITE_DOMAIN=https://saas-pdf.com + # Redis REDIS_URL=redis://redis:6379/0 @@ -31,6 +34,23 @@ DATABASE_PATH=/app/data/saas_pdf.db # CORS CORS_ORIGINS=http://localhost:5173,http://localhost:3000 +# Stripe Payments +STRIPE_SECRET_KEY=sk_test_XXXXXXXXXXXXXXXXXXXXXXXX +STRIPE_WEBHOOK_SECRET=whsec_XXXXXXXXXXXXXXXXXXXXXXXX +STRIPE_PRICE_ID_PRO_MONTHLY=price_XXXXXXXXXXXXXXXX +STRIPE_PRICE_ID_PRO_YEARLY=price_XXXXXXXXXXXXXXXX + +# Sentry Error Monitoring +SENTRY_DSN= +SENTRY_ENVIRONMENT=development + +# PostgreSQL (production) — leave empty to use SQLite +DATABASE_URL= + +# Frontend +VITE_SITE_DOMAIN=https://saas-pdf.com +VITE_SENTRY_DSN= + # Frontend Analytics / Ads (Vite) VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX VITE_PLAUSIBLE_DOMAIN= diff --git a/backend/app/services/html_to_pdf_service.py b/backend/app/services/html_to_pdf_service.py index e3913e4..8b4797b 100644 --- a/backend/app/services/html_to_pdf_service.py +++ b/backend/app/services/html_to_pdf_service.py @@ -1,6 +1,7 @@ """HTML to PDF conversion service.""" import os import logging +from importlib.metadata import PackageNotFoundError, version logger = logging.getLogger(__name__) @@ -10,6 +11,54 @@ class HtmlToPdfError(Exception): pass +def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]: + """Parse a package version into comparable integer parts.""" + if not raw_version: + return () + + parts: list[int] = [] + for token in raw_version.replace("-", ".").split("."): + digits = "".join(ch for ch in token if ch.isdigit()) + if not digits: + break + parts.append(int(digits)) + return tuple(parts) + + +def _get_installed_version(package_name: str) -> str | None: + """Return installed package version, if available.""" + try: + return version(package_name) + except PackageNotFoundError: + return None + + +def _get_dependency_mismatch_error() -> str | None: + """ + Detect the known WeasyPrint/pydyf incompatibility before conversion starts. + + WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while + pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises: + "PDF.__init__() takes 1 positional argument but 3 were given". + """ + weasyprint_version = _get_installed_version("weasyprint") + pydyf_version = _get_installed_version("pydyf") + if not weasyprint_version or not pydyf_version: + return None + + if ( + _parse_version_parts(weasyprint_version) < (62,) + and _parse_version_parts(pydyf_version) >= (0, 11) + ): + return ( + "Installed HTML-to-PDF dependencies are incompatible: " + f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. " + "Reinstall backend dependencies after pinning pydyf<0.11." + ) + + return None + + def html_to_pdf( input_path: str, output_path: str, @@ -30,6 +79,10 @@ def html_to_pdf( os.makedirs(os.path.dirname(output_path), exist_ok=True) try: + dependency_error = _get_dependency_mismatch_error() + if dependency_error: + raise HtmlToPdfError(dependency_error) + from weasyprint import HTML HTML(filename=input_path).write_pdf(output_path) @@ -67,6 +120,10 @@ def html_string_to_pdf( os.makedirs(os.path.dirname(output_path), exist_ok=True) try: + dependency_error = _get_dependency_mismatch_error() + if dependency_error: + raise HtmlToPdfError(dependency_error) + from weasyprint import HTML HTML(string=html_content).write_pdf(output_path) diff --git a/backend/celerybeat-schedule b/backend/celerybeat-schedule index 0dfce7b..e03a67e 100644 Binary files a/backend/celerybeat-schedule and b/backend/celerybeat-schedule differ diff --git a/backend/config/__init__.py b/backend/config/__init__.py index cdc8cda..fa77255 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -37,6 +37,8 @@ class BaseConfig: "docx": [ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ], + "html": ["text/html", "application/xhtml+xml"], + "htm": ["text/html", "application/xhtml+xml"], "png": ["image/png"], "jpg": ["image/jpeg"], "jpeg": ["image/jpeg"], @@ -52,6 +54,8 @@ class BaseConfig: "pdf": 20 * 1024 * 1024, # 20MB "doc": 15 * 1024 * 1024, # 15MB "docx": 15 * 1024 * 1024, # 15MB + "html": 10 * 1024 * 1024, # 10MB + "htm": 10 * 1024 * 1024, # 10MB "png": 10 * 1024 * 1024, # 10MB "jpg": 10 * 1024 * 1024, # 10MB "jpeg": 10 * 1024 * 1024, # 10MB diff --git a/backend/requirements.txt b/backend/requirements.txt index e7a8d1e..f1c260b 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -29,6 +29,7 @@ openpyxl>=3.1,<4.0 qrcode[pil]>=7.4,<8.0 # HTML to PDF +pydyf>=0.8,<0.11 weasyprint>=60.0,<62.0 # OCR diff --git a/backend/tests/test_file_validator.py b/backend/tests/test_file_validator.py index 3e7efdc..c95d099 100644 --- a/backend/tests/test_file_validator.py +++ b/backend/tests/test_file_validator.py @@ -52,13 +52,36 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'application/pdf' filename, ext = validate_file(mock_file, allowed_types=["pdf"]) assert filename == 'document.pdf' assert ext == 'pdf' + def test_valid_html_passes(self, app): + """Should accept valid HTML file with correct MIME type.""" + with app.app_context(): + html_bytes = b'
Hello' + content = io.BytesIO(html_bytes) + + mock_file = MagicMock() + mock_file.filename = 'page.html' + mock_file.seek = content.seek + mock_file.tell = content.tell + mock_file.read = content.read + + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: + mock_magic.from_buffer.return_value = 'text/html' + filename, ext = validate_file(mock_file, allowed_types=["html", "htm"]) + + assert filename == 'page.html' + assert ext == 'html' + def test_mime_mismatch_raises(self, app): """Should raise when MIME type doesn't match extension.""" with app.app_context(): @@ -70,7 +93,9 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'text/plain' with pytest.raises(FileValidationError, match="does not match"): validate_file(mock_file, allowed_types=["pdf"]) @@ -102,7 +127,9 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'application/pdf' with pytest.raises(FileValidationError, match="unsafe"): - validate_file(mock_file, allowed_types=["pdf"]) \ No newline at end of file + validate_file(mock_file, allowed_types=["pdf"]) diff --git a/backend/tests/test_html_to_pdf.py b/backend/tests/test_html_to_pdf.py index 4d283a2..dbb6984 100644 --- a/backend/tests/test_html_to_pdf.py +++ b/backend/tests/test_html_to_pdf.py @@ -2,6 +2,8 @@ import io from unittest.mock import MagicMock +from app.services.html_to_pdf_service import _get_dependency_mismatch_error + class TestHtmlToPdf: def test_no_file(self, client): @@ -41,3 +43,33 @@ class TestHtmlToPdf: assert response.status_code == 202 json_data = response.get_json() assert 'task_id' in json_data + + def test_detects_weasyprint_pydyf_version_mismatch(self, monkeypatch): + """Should flag the known WeasyPrint/pydyf incompatibility.""" + versions = { + 'weasyprint': '61.2', + 'pydyf': '0.12.1', + } + monkeypatch.setattr( + 'app.services.html_to_pdf_service._get_installed_version', + lambda package_name: versions.get(package_name), + ) + + error = _get_dependency_mismatch_error() + + assert error is not None + assert 'WeasyPrint 61.2' in error + assert 'pydyf 0.12.1' in error + + def test_allows_compatible_weasyprint_pydyf_versions(self, monkeypatch): + """Should not flag compatible dependency versions.""" + versions = { + 'weasyprint': '61.2', + 'pydyf': '0.10.0', + } + monkeypatch.setattr( + 'app.services.html_to_pdf_service._get_installed_version', + lambda package_name: versions.get(package_name), + ) + + assert _get_dependency_mismatch_error() is None diff --git a/frontend/.env.example b/frontend/.env.example index 72a602f..471f900 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -1,3 +1,5 @@ +VITE_SITE_DOMAIN=https://saas-pdf.com +VITE_SENTRY_DSN= VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX VITE_PLAUSIBLE_DOMAIN= VITE_PLAUSIBLE_SRC=https://plausible.io/js/script.js diff --git a/frontend/public/robots.txt b/frontend/public/robots.txt index 14039b0..162e000 100644 --- a/frontend/public/robots.txt +++ b/frontend/public/robots.txt @@ -7,7 +7,7 @@ Disallow: /forgot-password Disallow: /reset-password # Sitemaps -Sitemap: https://yourdomain.com/sitemap.xml +Sitemap: https://saas-pdf.com/sitemap.xml # AI/LLM discoverability # See also: /llms.txt diff --git a/frontend/public/sitemap.xml b/frontend/public/sitemap.xml index cd9467f..54b698f 100644 --- a/frontend/public/sitemap.xml +++ b/frontend/public/sitemap.xml @@ -1,53 +1,245 @@