- Added a new route for comparison pages in routes.ts. - Introduced a TOOL_WORKFLOWS object in seoData.ts to define tool usage sequences. - Updated internal link generation to include workflow slugs. - Added Arabic, English, and French translations for comparison features and FAQs in respective i18n files. - Implemented the ComparisonPage component to display feature comparisons, advantages, verdicts, and related tools. - Enhanced sitemap generation script to include comparison pages.
240 lines
8.3 KiB
Python
240 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_sitemap.py
|
|
Generates sitemap.xml for SEO from the full route inventory.
|
|
|
|
Usage:
|
|
python scripts/generate_sitemap.py --domain https://dociva.io
|
|
python scripts/generate_sitemap.py --domain https://dociva.io --output frontend/public/sitemap.xml
|
|
# Or set SITE_DOMAIN env var and omit --domain
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# ─── Route definitions with priority and changefreq ──────────────────────────
|
|
|
|
PAGES = [
|
|
{'path': '/', 'changefreq': 'daily', 'priority': '1.0'},
|
|
{'path': '/about', 'changefreq': 'monthly', 'priority': '0.4'},
|
|
{'path': '/contact', 'changefreq': 'monthly', 'priority': '0.4'},
|
|
{'path': '/privacy', 'changefreq': 'yearly', 'priority': '0.3'},
|
|
{'path': '/terms', 'changefreq': 'yearly', 'priority': '0.3'},
|
|
{'path': '/pricing', 'changefreq': 'monthly', 'priority': '0.7'},
|
|
{'path': '/blog', 'changefreq': 'weekly', 'priority': '0.6'},
|
|
]
|
|
|
|
# PDF Tools
|
|
PDF_TOOLS = [
|
|
{'slug': 'pdf-to-word', 'priority': '0.9'},
|
|
{'slug': 'word-to-pdf', 'priority': '0.9'},
|
|
{'slug': 'compress-pdf', 'priority': '0.9'},
|
|
{'slug': 'merge-pdf', 'priority': '0.9'},
|
|
{'slug': 'split-pdf', 'priority': '0.8'},
|
|
{'slug': 'rotate-pdf', 'priority': '0.7'},
|
|
{'slug': 'pdf-to-images', 'priority': '0.8'},
|
|
{'slug': 'images-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'watermark-pdf', 'priority': '0.7'},
|
|
{'slug': 'remove-watermark-pdf','priority': '0.7'},
|
|
{'slug': 'protect-pdf', 'priority': '0.8'},
|
|
{'slug': 'unlock-pdf', 'priority': '0.8'},
|
|
{'slug': 'page-numbers', 'priority': '0.7'},
|
|
{'slug': 'reorder-pdf', 'priority': '0.7'},
|
|
{'slug': 'extract-pages', 'priority': '0.7'},
|
|
{'slug': 'pdf-editor', 'priority': '0.8'},
|
|
{'slug': 'pdf-flowchart', 'priority': '0.7'},
|
|
{'slug': 'pdf-to-excel', 'priority': '0.8'},
|
|
# Phase 2
|
|
{'slug': 'sign-pdf', 'priority': '0.8'},
|
|
{'slug': 'crop-pdf', 'priority': '0.7'},
|
|
{'slug': 'flatten-pdf', 'priority': '0.7'},
|
|
{'slug': 'repair-pdf', 'priority': '0.7'},
|
|
{'slug': 'pdf-metadata', 'priority': '0.6'},
|
|
]
|
|
|
|
# Image Tools
|
|
IMAGE_TOOLS = [
|
|
{'slug': 'image-converter', 'priority': '0.8'},
|
|
{'slug': 'image-resize', 'priority': '0.8'},
|
|
{'slug': 'compress-image', 'priority': '0.8'},
|
|
{'slug': 'remove-background', 'priority': '0.8'},
|
|
# Phase 2
|
|
{'slug': 'image-crop', 'priority': '0.7'},
|
|
{'slug': 'image-rotate-flip', 'priority': '0.7'},
|
|
]
|
|
|
|
# AI Tools
|
|
AI_TOOLS = [
|
|
{'slug': 'ocr', 'priority': '0.8'},
|
|
{'slug': 'chat-pdf', 'priority': '0.8'},
|
|
{'slug': 'summarize-pdf', 'priority': '0.8'},
|
|
{'slug': 'translate-pdf', 'priority': '0.8'},
|
|
{'slug': 'extract-tables', 'priority': '0.8'},
|
|
]
|
|
|
|
# Convert / Utility Tools
|
|
UTILITY_TOOLS = [
|
|
{'slug': 'html-to-pdf', 'priority': '0.7'},
|
|
{'slug': 'qr-code', 'priority': '0.7'},
|
|
{'slug': 'video-to-gif', 'priority': '0.7'},
|
|
{'slug': 'word-counter', 'priority': '0.6'},
|
|
{'slug': 'text-cleaner', 'priority': '0.6'},
|
|
# Phase 2
|
|
{'slug': 'pdf-to-pptx', 'priority': '0.8'},
|
|
{'slug': 'excel-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'pptx-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'barcode-generator', 'priority': '0.7'},
|
|
]
|
|
|
|
TOOL_GROUPS = [
|
|
('PDF Tools', PDF_TOOLS),
|
|
('Image Tools', IMAGE_TOOLS),
|
|
('AI Tools', AI_TOOLS),
|
|
('Utility Tools', UTILITY_TOOLS),
|
|
]
|
|
|
|
# Comparison Pages
|
|
COMPARISON_PAGES = [
|
|
{'slug': 'compress-pdf-vs-ilovepdf', 'priority': '0.7'},
|
|
{'slug': 'merge-pdf-vs-smallpdf', 'priority': '0.7'},
|
|
{'slug': 'pdf-to-word-vs-adobe-acrobat', 'priority': '0.7'},
|
|
{'slug': 'compress-image-vs-tinypng', 'priority': '0.7'},
|
|
{'slug': 'ocr-vs-adobe-scan', 'priority': '0.7'},
|
|
]
|
|
|
|
|
|
def get_seo_landing_paths() -> tuple[list[str], list[str]]:
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
seo_config_path = repo_root / 'frontend' / 'src' / 'config' / 'seo-tools.json'
|
|
|
|
if not seo_config_path.exists():
|
|
return [], []
|
|
|
|
raw = json.loads(seo_config_path.read_text(encoding='utf-8'))
|
|
tool_pages = [entry.get('slug', '').strip() for entry in raw.get('toolPages', []) if entry.get('slug')]
|
|
collection_pages = [entry.get('slug', '').strip() for entry in raw.get('collectionPages', []) if entry.get('slug')]
|
|
return tool_pages, collection_pages
|
|
|
|
|
|
def get_blog_slugs() -> list[str]:
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
blog_articles_path = repo_root / 'frontend' / 'src' / 'content' / 'blogArticles.ts'
|
|
|
|
if not blog_articles_path.exists():
|
|
return []
|
|
|
|
content = blog_articles_path.read_text(encoding='utf-8')
|
|
return list(dict.fromkeys(re.findall(r"slug:\s*'([^']+)'", content)))
|
|
|
|
|
|
def generate_sitemap(domain: str) -> str:
|
|
today = datetime.now().strftime('%Y-%m-%d')
|
|
urls = []
|
|
blog_slugs = get_blog_slugs()
|
|
seo_tool_pages, seo_collection_pages = get_seo_landing_paths()
|
|
|
|
# Static pages
|
|
for page in PAGES:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}{page["path"]}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>{page["changefreq"]}</changefreq>
|
|
<priority>{page["priority"]}</priority>
|
|
</url>''')
|
|
|
|
if blog_slugs:
|
|
urls.append('\n <!-- Blog Posts -->')
|
|
for slug in blog_slugs:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/blog/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>monthly</changefreq>
|
|
<priority>0.6</priority>
|
|
</url>''')
|
|
|
|
# Tool pages by category
|
|
for label, routes in TOOL_GROUPS:
|
|
urls.append(f'\n <!-- {label} -->')
|
|
for route in routes:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/tools/{route["slug"]}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>{route["priority"]}</priority>
|
|
</url>''')
|
|
|
|
if seo_tool_pages:
|
|
urls.append('\n <!-- Programmatic SEO Tool Pages -->')
|
|
for slug in seo_tool_pages:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.88</priority>
|
|
</url>''')
|
|
|
|
if seo_collection_pages:
|
|
urls.append('\n <!-- SEO Collection Pages -->')
|
|
for slug in seo_collection_pages:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.82</priority>
|
|
</url>''')
|
|
|
|
# Comparison pages
|
|
if COMPARISON_PAGES:
|
|
urls.append('\n <!-- Comparison Pages -->')
|
|
for page in COMPARISON_PAGES:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/compare/{page["slug"]}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>monthly</changefreq>
|
|
<priority>{page["priority"]}</priority>
|
|
</url>''')
|
|
|
|
sitemap = f'''<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
{chr(10).join(urls)}
|
|
</urlset>'''
|
|
|
|
return sitemap
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Generate sitemap.xml')
|
|
parser.add_argument('--domain', type=str, default=os.environ.get('SITE_DOMAIN', ''),
|
|
help='Site domain (e.g. https://dociva.io). Falls back to SITE_DOMAIN env var.')
|
|
parser.add_argument('--output', type=str, default='frontend/public/sitemap.xml', help='Output file path')
|
|
args = parser.parse_args()
|
|
|
|
if not args.domain:
|
|
parser.error('--domain is required (or set SITE_DOMAIN env var)')
|
|
|
|
domain = args.domain.rstrip('/')
|
|
sitemap = generate_sitemap(domain)
|
|
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
f.write(sitemap)
|
|
|
|
seo_tool_pages, seo_collection_pages = get_seo_landing_paths()
|
|
total = (
|
|
len(PAGES)
|
|
+ len(get_blog_slugs())
|
|
+ sum(len(routes) for _, routes in TOOL_GROUPS)
|
|
+ len(seo_tool_pages)
|
|
+ len(seo_collection_pages)
|
|
+ len(COMPARISON_PAGES)
|
|
)
|
|
print(f"Sitemap generated: {args.output}")
|
|
print(f"Total URLs: {total}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|