diff --git a/.env.example b/.env.example index b83aa94..2f84565 100644 --- a/.env.example +++ b/.env.example @@ -3,6 +3,9 @@ FLASK_ENV=development FLASK_DEBUG=1 SECRET_KEY=change-me-in-production +# Site Domain (used in sitemap, robots.txt, emails) +SITE_DOMAIN=https://saas-pdf.com + # Redis REDIS_URL=redis://redis:6379/0 @@ -31,6 +34,23 @@ DATABASE_PATH=/app/data/saas_pdf.db # CORS CORS_ORIGINS=http://localhost:5173,http://localhost:3000 +# Stripe Payments +STRIPE_SECRET_KEY=sk_test_XXXXXXXXXXXXXXXXXXXXXXXX +STRIPE_WEBHOOK_SECRET=whsec_XXXXXXXXXXXXXXXXXXXXXXXX +STRIPE_PRICE_ID_PRO_MONTHLY=price_XXXXXXXXXXXXXXXX +STRIPE_PRICE_ID_PRO_YEARLY=price_XXXXXXXXXXXXXXXX + +# Sentry Error Monitoring +SENTRY_DSN= +SENTRY_ENVIRONMENT=development + +# PostgreSQL (production) — leave empty to use SQLite +DATABASE_URL= + +# Frontend +VITE_SITE_DOMAIN=https://saas-pdf.com +VITE_SENTRY_DSN= + # Frontend Analytics / Ads (Vite) VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX VITE_PLAUSIBLE_DOMAIN= diff --git a/backend/app/services/html_to_pdf_service.py b/backend/app/services/html_to_pdf_service.py index e3913e4..8b4797b 100644 --- a/backend/app/services/html_to_pdf_service.py +++ b/backend/app/services/html_to_pdf_service.py @@ -1,6 +1,7 @@ """HTML to PDF conversion service.""" import os import logging +from importlib.metadata import PackageNotFoundError, version logger = logging.getLogger(__name__) @@ -10,6 +11,54 @@ class HtmlToPdfError(Exception): pass +def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]: + """Parse a package version into comparable integer parts.""" + if not raw_version: + return () + + parts: list[int] = [] + for token in raw_version.replace("-", ".").split("."): + digits = "".join(ch for ch in token if ch.isdigit()) + if not digits: + break + parts.append(int(digits)) + return tuple(parts) + + +def _get_installed_version(package_name: str) -> str | None: + """Return installed package version, if available.""" + try: + return version(package_name) + except PackageNotFoundError: + return None + + +def _get_dependency_mismatch_error() -> str | None: + """ + Detect the known WeasyPrint/pydyf incompatibility before conversion starts. + + WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while + pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises: + "PDF.__init__() takes 1 positional argument but 3 were given". + """ + weasyprint_version = _get_installed_version("weasyprint") + pydyf_version = _get_installed_version("pydyf") + if not weasyprint_version or not pydyf_version: + return None + + if ( + _parse_version_parts(weasyprint_version) < (62,) + and _parse_version_parts(pydyf_version) >= (0, 11) + ): + return ( + "Installed HTML-to-PDF dependencies are incompatible: " + f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. " + "Reinstall backend dependencies after pinning pydyf<0.11." + ) + + return None + + def html_to_pdf( input_path: str, output_path: str, @@ -30,6 +79,10 @@ def html_to_pdf( os.makedirs(os.path.dirname(output_path), exist_ok=True) try: + dependency_error = _get_dependency_mismatch_error() + if dependency_error: + raise HtmlToPdfError(dependency_error) + from weasyprint import HTML HTML(filename=input_path).write_pdf(output_path) @@ -67,6 +120,10 @@ def html_string_to_pdf( os.makedirs(os.path.dirname(output_path), exist_ok=True) try: + dependency_error = _get_dependency_mismatch_error() + if dependency_error: + raise HtmlToPdfError(dependency_error) + from weasyprint import HTML HTML(string=html_content).write_pdf(output_path) diff --git a/backend/celerybeat-schedule b/backend/celerybeat-schedule index 0dfce7b..e03a67e 100644 Binary files a/backend/celerybeat-schedule and b/backend/celerybeat-schedule differ diff --git a/backend/config/__init__.py b/backend/config/__init__.py index cdc8cda..fa77255 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -37,6 +37,8 @@ class BaseConfig: "docx": [ "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ], + "html": ["text/html", "application/xhtml+xml"], + "htm": ["text/html", "application/xhtml+xml"], "png": ["image/png"], "jpg": ["image/jpeg"], "jpeg": ["image/jpeg"], @@ -52,6 +54,8 @@ class BaseConfig: "pdf": 20 * 1024 * 1024, # 20MB "doc": 15 * 1024 * 1024, # 15MB "docx": 15 * 1024 * 1024, # 15MB + "html": 10 * 1024 * 1024, # 10MB + "htm": 10 * 1024 * 1024, # 10MB "png": 10 * 1024 * 1024, # 10MB "jpg": 10 * 1024 * 1024, # 10MB "jpeg": 10 * 1024 * 1024, # 10MB diff --git a/backend/requirements.txt b/backend/requirements.txt index e7a8d1e..f1c260b 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -29,6 +29,7 @@ openpyxl>=3.1,<4.0 qrcode[pil]>=7.4,<8.0 # HTML to PDF +pydyf>=0.8,<0.11 weasyprint>=60.0,<62.0 # OCR diff --git a/backend/tests/test_file_validator.py b/backend/tests/test_file_validator.py index 3e7efdc..c95d099 100644 --- a/backend/tests/test_file_validator.py +++ b/backend/tests/test_file_validator.py @@ -52,13 +52,36 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'application/pdf' filename, ext = validate_file(mock_file, allowed_types=["pdf"]) assert filename == 'document.pdf' assert ext == 'pdf' + def test_valid_html_passes(self, app): + """Should accept valid HTML file with correct MIME type.""" + with app.app_context(): + html_bytes = b'Hello' + content = io.BytesIO(html_bytes) + + mock_file = MagicMock() + mock_file.filename = 'page.html' + mock_file.seek = content.seek + mock_file.tell = content.tell + mock_file.read = content.read + + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: + mock_magic.from_buffer.return_value = 'text/html' + filename, ext = validate_file(mock_file, allowed_types=["html", "htm"]) + + assert filename == 'page.html' + assert ext == 'html' + def test_mime_mismatch_raises(self, app): """Should raise when MIME type doesn't match extension.""" with app.app_context(): @@ -70,7 +93,9 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'text/plain' with pytest.raises(FileValidationError, match="does not match"): validate_file(mock_file, allowed_types=["pdf"]) @@ -102,7 +127,9 @@ class TestFileValidator: mock_file.tell = content.tell mock_file.read = content.read - with patch('app.utils.file_validator.magic') as mock_magic: + with patch('app.utils.file_validator.HAS_MAGIC', True), patch( + 'app.utils.file_validator.magic', create=True + ) as mock_magic: mock_magic.from_buffer.return_value = 'application/pdf' with pytest.raises(FileValidationError, match="unsafe"): - validate_file(mock_file, allowed_types=["pdf"]) \ No newline at end of file + validate_file(mock_file, allowed_types=["pdf"]) diff --git a/backend/tests/test_html_to_pdf.py b/backend/tests/test_html_to_pdf.py index 4d283a2..dbb6984 100644 --- a/backend/tests/test_html_to_pdf.py +++ b/backend/tests/test_html_to_pdf.py @@ -2,6 +2,8 @@ import io from unittest.mock import MagicMock +from app.services.html_to_pdf_service import _get_dependency_mismatch_error + class TestHtmlToPdf: def test_no_file(self, client): @@ -41,3 +43,33 @@ class TestHtmlToPdf: assert response.status_code == 202 json_data = response.get_json() assert 'task_id' in json_data + + def test_detects_weasyprint_pydyf_version_mismatch(self, monkeypatch): + """Should flag the known WeasyPrint/pydyf incompatibility.""" + versions = { + 'weasyprint': '61.2', + 'pydyf': '0.12.1', + } + monkeypatch.setattr( + 'app.services.html_to_pdf_service._get_installed_version', + lambda package_name: versions.get(package_name), + ) + + error = _get_dependency_mismatch_error() + + assert error is not None + assert 'WeasyPrint 61.2' in error + assert 'pydyf 0.12.1' in error + + def test_allows_compatible_weasyprint_pydyf_versions(self, monkeypatch): + """Should not flag compatible dependency versions.""" + versions = { + 'weasyprint': '61.2', + 'pydyf': '0.10.0', + } + monkeypatch.setattr( + 'app.services.html_to_pdf_service._get_installed_version', + lambda package_name: versions.get(package_name), + ) + + assert _get_dependency_mismatch_error() is None diff --git a/frontend/.env.example b/frontend/.env.example index 72a602f..471f900 100644 --- a/frontend/.env.example +++ b/frontend/.env.example @@ -1,3 +1,5 @@ +VITE_SITE_DOMAIN=https://saas-pdf.com +VITE_SENTRY_DSN= VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX VITE_PLAUSIBLE_DOMAIN= VITE_PLAUSIBLE_SRC=https://plausible.io/js/script.js diff --git a/frontend/public/robots.txt b/frontend/public/robots.txt index 14039b0..162e000 100644 --- a/frontend/public/robots.txt +++ b/frontend/public/robots.txt @@ -7,7 +7,7 @@ Disallow: /forgot-password Disallow: /reset-password # Sitemaps -Sitemap: https://yourdomain.com/sitemap.xml +Sitemap: https://saas-pdf.com/sitemap.xml # AI/LLM discoverability # See also: /llms.txt diff --git a/frontend/public/sitemap.xml b/frontend/public/sitemap.xml index cd9467f..54b698f 100644 --- a/frontend/public/sitemap.xml +++ b/frontend/public/sitemap.xml @@ -1,53 +1,245 @@ - - https://yourdomain.com/daily1.0 - https://yourdomain.com/aboutmonthly0.4 - https://yourdomain.com/contactmonthly0.4 - https://yourdomain.com/privacyyearly0.3 - https://yourdomain.com/termsyearly0.3 - https://yourdomain.com/pricingmonthly0.7 - https://yourdomain.com/blogweekly0.6 + + https://saas-pdf.com/ + 2026-03-14 + daily + 1.0 + + + https://saas-pdf.com/about + 2026-03-14 + monthly + 0.4 + + + https://saas-pdf.com/contact + 2026-03-14 + monthly + 0.4 + + + https://saas-pdf.com/privacy + 2026-03-14 + yearly + 0.3 + + + https://saas-pdf.com/terms + 2026-03-14 + yearly + 0.3 + + + https://saas-pdf.com/pricing + 2026-03-14 + monthly + 0.7 + + + https://saas-pdf.com/blog + 2026-03-14 + weekly + 0.6 + - https://yourdomain.com/tools/pdf-to-wordweekly0.9 - https://yourdomain.com/tools/word-to-pdfweekly0.9 - https://yourdomain.com/tools/compress-pdfweekly0.9 - https://yourdomain.com/tools/merge-pdfweekly0.9 - https://yourdomain.com/tools/split-pdfweekly0.8 - https://yourdomain.com/tools/rotate-pdfweekly0.7 - https://yourdomain.com/tools/pdf-to-imagesweekly0.8 - https://yourdomain.com/tools/images-to-pdfweekly0.8 - https://yourdomain.com/tools/watermark-pdfweekly0.7 - https://yourdomain.com/tools/remove-watermark-pdfweekly0.7 - https://yourdomain.com/tools/protect-pdfweekly0.8 - https://yourdomain.com/tools/unlock-pdfweekly0.8 - https://yourdomain.com/tools/page-numbersweekly0.7 - https://yourdomain.com/tools/reorder-pdfweekly0.7 - https://yourdomain.com/tools/extract-pagesweekly0.7 - https://yourdomain.com/tools/pdf-editorweekly0.8 - https://yourdomain.com/tools/pdf-flowchartweekly0.7 + + https://saas-pdf.com/tools/pdf-to-word + 2026-03-14 + weekly + 0.9 + + + https://saas-pdf.com/tools/word-to-pdf + 2026-03-14 + weekly + 0.9 + + + https://saas-pdf.com/tools/compress-pdf + 2026-03-14 + weekly + 0.9 + + + https://saas-pdf.com/tools/merge-pdf + 2026-03-14 + weekly + 0.9 + + + https://saas-pdf.com/tools/split-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/rotate-pdf + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/pdf-to-images + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/images-to-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/watermark-pdf + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/remove-watermark-pdf + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/protect-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/unlock-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/page-numbers + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/reorder-pdf + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/extract-pages + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/pdf-editor + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/pdf-flowchart + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/pdf-to-excel + 2026-03-14 + weekly + 0.8 + - https://yourdomain.com/tools/image-converterweekly0.8 - https://yourdomain.com/tools/image-resizeweekly0.8 - https://yourdomain.com/tools/compress-imageweekly0.8 - https://yourdomain.com/tools/remove-backgroundweekly0.8 + + https://saas-pdf.com/tools/image-converter + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/image-resize + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/compress-image + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/remove-background + 2026-03-14 + weekly + 0.8 + - https://yourdomain.com/tools/ocrweekly0.8 - https://yourdomain.com/tools/chat-pdfweekly0.8 - https://yourdomain.com/tools/summarize-pdfweekly0.8 - https://yourdomain.com/tools/translate-pdfweekly0.8 - https://yourdomain.com/tools/extract-tablesweekly0.8 - - - https://yourdomain.com/tools/pdf-to-excelweekly0.8 - https://yourdomain.com/tools/html-to-pdfweekly0.7 + + https://saas-pdf.com/tools/ocr + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/chat-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/summarize-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/translate-pdf + 2026-03-14 + weekly + 0.8 + + + https://saas-pdf.com/tools/extract-tables + 2026-03-14 + weekly + 0.8 + - https://yourdomain.com/tools/qr-codeweekly0.7 - https://yourdomain.com/tools/video-to-gifweekly0.7 - https://yourdomain.com/tools/word-counterweekly0.6 - https://yourdomain.com/tools/text-cleanerweekly0.6 - + + https://saas-pdf.com/tools/html-to-pdf + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/qr-code + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/video-to-gif + 2026-03-14 + weekly + 0.7 + + + https://saas-pdf.com/tools/word-counter + 2026-03-14 + weekly + 0.6 + + + https://saas-pdf.com/tools/text-cleaner + 2026-03-14 + weekly + 0.6 + + \ No newline at end of file diff --git a/scripts/generate_sitemap.py b/scripts/generate_sitemap.py index 5b11ae8..bb2db31 100644 --- a/scripts/generate_sitemap.py +++ b/scripts/generate_sitemap.py @@ -4,11 +4,13 @@ generate_sitemap.py Generates sitemap.xml for SEO from the full route inventory. Usage: - python scripts/generate_sitemap.py --domain https://yourdomain.com - python scripts/generate_sitemap.py --domain https://yourdomain.com --output frontend/public/sitemap.xml + python scripts/generate_sitemap.py --domain https://saas-pdf.com + python scripts/generate_sitemap.py --domain https://saas-pdf.com --output frontend/public/sitemap.xml + # Or set SITE_DOMAIN env var and omit --domain """ import argparse +import os from datetime import datetime # ─── Route definitions with priority and changefreq ────────────────────────── @@ -113,10 +115,14 @@ def generate_sitemap(domain: str) -> str: def main(): parser = argparse.ArgumentParser(description='Generate sitemap.xml') - parser.add_argument('--domain', type=str, required=True, help='Site domain (e.g. https://yourdomain.com)') + parser.add_argument('--domain', type=str, default=os.environ.get('SITE_DOMAIN', ''), + help='Site domain (e.g. https://saas-pdf.com). Falls back to SITE_DOMAIN env var.') parser.add_argument('--output', type=str, default='frontend/public/sitemap.xml', help='Output file path') args = parser.parse_args() + if not args.domain: + parser.error('--domain is required (or set SITE_DOMAIN env var)') + domain = args.domain.rstrip('/') sitemap = generate_sitemap(domain)