feat: enhance production readiness with environment configuration, dependency checks, and sitemap updates

This commit is contained in:
Your Name
2026-03-15 13:29:02 +02:00
parent 3217681108
commit c167091399
11 changed files with 392 additions and 51 deletions

View File

@@ -3,6 +3,9 @@ FLASK_ENV=development
FLASK_DEBUG=1
SECRET_KEY=change-me-in-production
# Site Domain (used in sitemap, robots.txt, emails)
SITE_DOMAIN=https://saas-pdf.com
# Redis
REDIS_URL=redis://redis:6379/0
@@ -31,6 +34,23 @@ DATABASE_PATH=/app/data/saas_pdf.db
# CORS
CORS_ORIGINS=http://localhost:5173,http://localhost:3000
# Stripe Payments
STRIPE_SECRET_KEY=sk_test_XXXXXXXXXXXXXXXXXXXXXXXX
STRIPE_WEBHOOK_SECRET=whsec_XXXXXXXXXXXXXXXXXXXXXXXX
STRIPE_PRICE_ID_PRO_MONTHLY=price_XXXXXXXXXXXXXXXX
STRIPE_PRICE_ID_PRO_YEARLY=price_XXXXXXXXXXXXXXXX
# Sentry Error Monitoring
SENTRY_DSN=
SENTRY_ENVIRONMENT=development
# PostgreSQL (production) — leave empty to use SQLite
DATABASE_URL=
# Frontend
VITE_SITE_DOMAIN=https://saas-pdf.com
VITE_SENTRY_DSN=
# Frontend Analytics / Ads (Vite)
VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX
VITE_PLAUSIBLE_DOMAIN=

View File

@@ -1,6 +1,7 @@
"""HTML to PDF conversion service."""
import os
import logging
from importlib.metadata import PackageNotFoundError, version
logger = logging.getLogger(__name__)
@@ -10,6 +11,54 @@ class HtmlToPdfError(Exception):
pass
def _parse_version_parts(raw_version: str | None) -> tuple[int, ...]:
"""Parse a package version into comparable integer parts."""
if not raw_version:
return ()
parts: list[int] = []
for token in raw_version.replace("-", ".").split("."):
digits = "".join(ch for ch in token if ch.isdigit())
if not digits:
break
parts.append(int(digits))
return tuple(parts)
def _get_installed_version(package_name: str) -> str | None:
"""Return installed package version, if available."""
try:
return version(package_name)
except PackageNotFoundError:
return None
def _get_dependency_mismatch_error() -> str | None:
"""
Detect the known WeasyPrint/pydyf incompatibility before conversion starts.
WeasyPrint 61.x instantiates pydyf.PDF with constructor arguments, while
pydyf 0.11+ moved these parameters to PDF.write(). That mismatch raises:
"PDF.__init__() takes 1 positional argument but 3 were given".
"""
weasyprint_version = _get_installed_version("weasyprint")
pydyf_version = _get_installed_version("pydyf")
if not weasyprint_version or not pydyf_version:
return None
if (
_parse_version_parts(weasyprint_version) < (62,)
and _parse_version_parts(pydyf_version) >= (0, 11)
):
return (
"Installed HTML-to-PDF dependencies are incompatible: "
f"WeasyPrint {weasyprint_version} with pydyf {pydyf_version}. "
"Reinstall backend dependencies after pinning pydyf<0.11."
)
return None
def html_to_pdf(
input_path: str,
output_path: str,
@@ -30,6 +79,10 @@ def html_to_pdf(
os.makedirs(os.path.dirname(output_path), exist_ok=True)
try:
dependency_error = _get_dependency_mismatch_error()
if dependency_error:
raise HtmlToPdfError(dependency_error)
from weasyprint import HTML
HTML(filename=input_path).write_pdf(output_path)
@@ -67,6 +120,10 @@ def html_string_to_pdf(
os.makedirs(os.path.dirname(output_path), exist_ok=True)
try:
dependency_error = _get_dependency_mismatch_error()
if dependency_error:
raise HtmlToPdfError(dependency_error)
from weasyprint import HTML
HTML(string=html_content).write_pdf(output_path)

Binary file not shown.

View File

@@ -37,6 +37,8 @@ class BaseConfig:
"docx": [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
],
"html": ["text/html", "application/xhtml+xml"],
"htm": ["text/html", "application/xhtml+xml"],
"png": ["image/png"],
"jpg": ["image/jpeg"],
"jpeg": ["image/jpeg"],
@@ -52,6 +54,8 @@ class BaseConfig:
"pdf": 20 * 1024 * 1024, # 20MB
"doc": 15 * 1024 * 1024, # 15MB
"docx": 15 * 1024 * 1024, # 15MB
"html": 10 * 1024 * 1024, # 10MB
"htm": 10 * 1024 * 1024, # 10MB
"png": 10 * 1024 * 1024, # 10MB
"jpg": 10 * 1024 * 1024, # 10MB
"jpeg": 10 * 1024 * 1024, # 10MB

View File

@@ -29,6 +29,7 @@ openpyxl>=3.1,<4.0
qrcode[pil]>=7.4,<8.0
# HTML to PDF
pydyf>=0.8,<0.11
weasyprint>=60.0,<62.0
# OCR

View File

@@ -52,13 +52,36 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'application/pdf'
filename, ext = validate_file(mock_file, allowed_types=["pdf"])
assert filename == 'document.pdf'
assert ext == 'pdf'
def test_valid_html_passes(self, app):
"""Should accept valid HTML file with correct MIME type."""
with app.app_context():
html_bytes = b'<!doctype html><html><body>Hello</body></html>'
content = io.BytesIO(html_bytes)
mock_file = MagicMock()
mock_file.filename = 'page.html'
mock_file.seek = content.seek
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'text/html'
filename, ext = validate_file(mock_file, allowed_types=["html", "htm"])
assert filename == 'page.html'
assert ext == 'html'
def test_mime_mismatch_raises(self, app):
"""Should raise when MIME type doesn't match extension."""
with app.app_context():
@@ -70,7 +93,9 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'text/plain'
with pytest.raises(FileValidationError, match="does not match"):
validate_file(mock_file, allowed_types=["pdf"])
@@ -102,7 +127,9 @@ class TestFileValidator:
mock_file.tell = content.tell
mock_file.read = content.read
with patch('app.utils.file_validator.magic') as mock_magic:
with patch('app.utils.file_validator.HAS_MAGIC', True), patch(
'app.utils.file_validator.magic', create=True
) as mock_magic:
mock_magic.from_buffer.return_value = 'application/pdf'
with pytest.raises(FileValidationError, match="unsafe"):
validate_file(mock_file, allowed_types=["pdf"])
validate_file(mock_file, allowed_types=["pdf"])

View File

@@ -2,6 +2,8 @@
import io
from unittest.mock import MagicMock
from app.services.html_to_pdf_service import _get_dependency_mismatch_error
class TestHtmlToPdf:
def test_no_file(self, client):
@@ -41,3 +43,33 @@ class TestHtmlToPdf:
assert response.status_code == 202
json_data = response.get_json()
assert 'task_id' in json_data
def test_detects_weasyprint_pydyf_version_mismatch(self, monkeypatch):
"""Should flag the known WeasyPrint/pydyf incompatibility."""
versions = {
'weasyprint': '61.2',
'pydyf': '0.12.1',
}
monkeypatch.setattr(
'app.services.html_to_pdf_service._get_installed_version',
lambda package_name: versions.get(package_name),
)
error = _get_dependency_mismatch_error()
assert error is not None
assert 'WeasyPrint 61.2' in error
assert 'pydyf 0.12.1' in error
def test_allows_compatible_weasyprint_pydyf_versions(self, monkeypatch):
"""Should not flag compatible dependency versions."""
versions = {
'weasyprint': '61.2',
'pydyf': '0.10.0',
}
monkeypatch.setattr(
'app.services.html_to_pdf_service._get_installed_version',
lambda package_name: versions.get(package_name),
)
assert _get_dependency_mismatch_error() is None

View File

@@ -1,3 +1,5 @@
VITE_SITE_DOMAIN=https://saas-pdf.com
VITE_SENTRY_DSN=
VITE_GA_MEASUREMENT_ID=G-XXXXXXXXXX
VITE_PLAUSIBLE_DOMAIN=
VITE_PLAUSIBLE_SRC=https://plausible.io/js/script.js

View File

@@ -7,7 +7,7 @@ Disallow: /forgot-password
Disallow: /reset-password
# Sitemaps
Sitemap: https://yourdomain.com/sitemap.xml
Sitemap: https://saas-pdf.com/sitemap.xml
# AI/LLM discoverability
# See also: /llms.txt

View File

@@ -1,53 +1,245 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<!-- Pages -->
<url><loc>https://yourdomain.com/</loc><changefreq>daily</changefreq><priority>1.0</priority></url>
<url><loc>https://yourdomain.com/about</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>
<url><loc>https://yourdomain.com/contact</loc><changefreq>monthly</changefreq><priority>0.4</priority></url>
<url><loc>https://yourdomain.com/privacy</loc><changefreq>yearly</changefreq><priority>0.3</priority></url>
<url><loc>https://yourdomain.com/terms</loc><changefreq>yearly</changefreq><priority>0.3</priority></url>
<url><loc>https://yourdomain.com/pricing</loc><changefreq>monthly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/blog</loc><changefreq>weekly</changefreq><priority>0.6</priority></url>
<url>
<loc>https://saas-pdf.com/</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>daily</changefreq>
<priority>1.0</priority>
</url>
<url>
<loc>https://saas-pdf.com/about</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>https://saas-pdf.com/contact</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>monthly</changefreq>
<priority>0.4</priority>
</url>
<url>
<loc>https://saas-pdf.com/privacy</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>yearly</changefreq>
<priority>0.3</priority>
</url>
<url>
<loc>https://saas-pdf.com/terms</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>yearly</changefreq>
<priority>0.3</priority>
</url>
<url>
<loc>https://saas-pdf.com/pricing</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/blog</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<!-- PDF Tools -->
<url><loc>https://yourdomain.com/tools/pdf-to-word</loc><changefreq>weekly</changefreq><priority>0.9</priority></url>
<url><loc>https://yourdomain.com/tools/word-to-pdf</loc><changefreq>weekly</changefreq><priority>0.9</priority></url>
<url><loc>https://yourdomain.com/tools/compress-pdf</loc><changefreq>weekly</changefreq><priority>0.9</priority></url>
<url><loc>https://yourdomain.com/tools/merge-pdf</loc><changefreq>weekly</changefreq><priority>0.9</priority></url>
<url><loc>https://yourdomain.com/tools/split-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/rotate-pdf</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/pdf-to-images</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/images-to-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/watermark-pdf</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/remove-watermark-pdf</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/protect-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/unlock-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/page-numbers</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/reorder-pdf</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/extract-pages</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/pdf-editor</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/pdf-flowchart</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url>
<loc>https://saas-pdf.com/tools/pdf-to-word</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/word-to-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/compress-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/merge-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/split-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/rotate-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/pdf-to-images</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/images-to-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/watermark-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/remove-watermark-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/protect-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/unlock-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/page-numbers</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/reorder-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/extract-pages</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/pdf-editor</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/pdf-flowchart</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/pdf-to-excel</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<!-- Image Tools -->
<url><loc>https://yourdomain.com/tools/image-converter</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/image-resize</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/compress-image</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/remove-background</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url>
<loc>https://saas-pdf.com/tools/image-converter</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/image-resize</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/compress-image</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/remove-background</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<!-- AI Tools -->
<url><loc>https://yourdomain.com/tools/ocr</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/chat-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/summarize-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/translate-pdf</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/extract-tables</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<!-- Convert Tools -->
<url><loc>https://yourdomain.com/tools/pdf-to-excel</loc><changefreq>weekly</changefreq><priority>0.8</priority></url>
<url><loc>https://yourdomain.com/tools/html-to-pdf</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url>
<loc>https://saas-pdf.com/tools/ocr</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/chat-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/summarize-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/translate-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/extract-tables</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<!-- Utility Tools -->
<url><loc>https://yourdomain.com/tools/qr-code</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/video-to-gif</loc><changefreq>weekly</changefreq><priority>0.7</priority></url>
<url><loc>https://yourdomain.com/tools/word-counter</loc><changefreq>weekly</changefreq><priority>0.6</priority></url>
<url><loc>https://yourdomain.com/tools/text-cleaner</loc><changefreq>weekly</changefreq><priority>0.6</priority></url>
</urlset>
<url>
<loc>https://saas-pdf.com/tools/html-to-pdf</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/qr-code</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/video-to-gif</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/word-counter</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://saas-pdf.com/tools/text-cleaner</loc>
<lastmod>2026-03-14</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
</urlset>

View File

@@ -4,11 +4,13 @@ generate_sitemap.py
Generates sitemap.xml for SEO from the full route inventory.
Usage:
python scripts/generate_sitemap.py --domain https://yourdomain.com
python scripts/generate_sitemap.py --domain https://yourdomain.com --output frontend/public/sitemap.xml
python scripts/generate_sitemap.py --domain https://saas-pdf.com
python scripts/generate_sitemap.py --domain https://saas-pdf.com --output frontend/public/sitemap.xml
# Or set SITE_DOMAIN env var and omit --domain
"""
import argparse
import os
from datetime import datetime
# ─── Route definitions with priority and changefreq ──────────────────────────
@@ -113,10 +115,14 @@ def generate_sitemap(domain: str) -> str:
def main():
parser = argparse.ArgumentParser(description='Generate sitemap.xml')
parser.add_argument('--domain', type=str, required=True, help='Site domain (e.g. https://yourdomain.com)')
parser.add_argument('--domain', type=str, default=os.environ.get('SITE_DOMAIN', ''),
help='Site domain (e.g. https://saas-pdf.com). Falls back to SITE_DOMAIN env var.')
parser.add_argument('--output', type=str, default='frontend/public/sitemap.xml', help='Output file path')
args = parser.parse_args()
if not args.domain:
parser.error('--domain is required (or set SITE_DOMAIN env var)')
domain = args.domain.rstrip('/')
sitemap = generate_sitemap(domain)