- Introduced seoPages.ts to manage SEO-related configurations and types for programmatic tools and collection pages. - Created SeoCollectionPage and SeoProgrammaticPage components to render SEO content dynamically based on the new configuration. - Enhanced API service to ensure CSRF token handling for secure requests. - Added generateHowTo utility function for structured data generation. - Updated sitemap generation script to include SEO tool and collection pages. - Configured TypeScript to resolve JSON modules for easier integration of SEO data. ستراتيجية التنفيذ لم أغير أي core logic في أدوات التحويل أو الضغط أو التحرير استخدمت architecture إضافية فوق النظام الحالي بدل استبداله جعلت الـ SEO pages تعتمد على source of truth واحد حتى يسهل التوسع ربطت التوليد مع build حتى لا تبقى sitemap وrobots ثابتة أو منسية دعمت العربية والإنجليزية داخل نفس config الجديد عززت internal linking من: صفحات SEO إلى tool pages صفحات SEO إلى collection pages footer إلى collection pages Suggested tools داخل صفحات الأدوات التحقق
219 lines
7.6 KiB
Python
219 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_sitemap.py
|
|
Generates sitemap.xml for SEO from the full route inventory.
|
|
|
|
Usage:
|
|
python scripts/generate_sitemap.py --domain https://dociva.io
|
|
python scripts/generate_sitemap.py --domain https://dociva.io --output frontend/public/sitemap.xml
|
|
# Or set SITE_DOMAIN env var and omit --domain
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# ─── Route definitions with priority and changefreq ──────────────────────────
|
|
|
|
PAGES = [
|
|
{'path': '/', 'changefreq': 'daily', 'priority': '1.0'},
|
|
{'path': '/about', 'changefreq': 'monthly', 'priority': '0.4'},
|
|
{'path': '/contact', 'changefreq': 'monthly', 'priority': '0.4'},
|
|
{'path': '/privacy', 'changefreq': 'yearly', 'priority': '0.3'},
|
|
{'path': '/terms', 'changefreq': 'yearly', 'priority': '0.3'},
|
|
{'path': '/pricing', 'changefreq': 'monthly', 'priority': '0.7'},
|
|
{'path': '/blog', 'changefreq': 'weekly', 'priority': '0.6'},
|
|
]
|
|
|
|
# PDF Tools
|
|
PDF_TOOLS = [
|
|
{'slug': 'pdf-to-word', 'priority': '0.9'},
|
|
{'slug': 'word-to-pdf', 'priority': '0.9'},
|
|
{'slug': 'compress-pdf', 'priority': '0.9'},
|
|
{'slug': 'merge-pdf', 'priority': '0.9'},
|
|
{'slug': 'split-pdf', 'priority': '0.8'},
|
|
{'slug': 'rotate-pdf', 'priority': '0.7'},
|
|
{'slug': 'pdf-to-images', 'priority': '0.8'},
|
|
{'slug': 'images-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'watermark-pdf', 'priority': '0.7'},
|
|
{'slug': 'remove-watermark-pdf','priority': '0.7'},
|
|
{'slug': 'protect-pdf', 'priority': '0.8'},
|
|
{'slug': 'unlock-pdf', 'priority': '0.8'},
|
|
{'slug': 'page-numbers', 'priority': '0.7'},
|
|
{'slug': 'reorder-pdf', 'priority': '0.7'},
|
|
{'slug': 'extract-pages', 'priority': '0.7'},
|
|
{'slug': 'pdf-editor', 'priority': '0.8'},
|
|
{'slug': 'pdf-flowchart', 'priority': '0.7'},
|
|
{'slug': 'pdf-to-excel', 'priority': '0.8'},
|
|
# Phase 2
|
|
{'slug': 'sign-pdf', 'priority': '0.8'},
|
|
{'slug': 'crop-pdf', 'priority': '0.7'},
|
|
{'slug': 'flatten-pdf', 'priority': '0.7'},
|
|
{'slug': 'repair-pdf', 'priority': '0.7'},
|
|
{'slug': 'pdf-metadata', 'priority': '0.6'},
|
|
]
|
|
|
|
# Image Tools
|
|
IMAGE_TOOLS = [
|
|
{'slug': 'image-converter', 'priority': '0.8'},
|
|
{'slug': 'image-resize', 'priority': '0.8'},
|
|
{'slug': 'compress-image', 'priority': '0.8'},
|
|
{'slug': 'remove-background', 'priority': '0.8'},
|
|
# Phase 2
|
|
{'slug': 'image-crop', 'priority': '0.7'},
|
|
{'slug': 'image-rotate-flip', 'priority': '0.7'},
|
|
]
|
|
|
|
# AI Tools
|
|
AI_TOOLS = [
|
|
{'slug': 'ocr', 'priority': '0.8'},
|
|
{'slug': 'chat-pdf', 'priority': '0.8'},
|
|
{'slug': 'summarize-pdf', 'priority': '0.8'},
|
|
{'slug': 'translate-pdf', 'priority': '0.8'},
|
|
{'slug': 'extract-tables', 'priority': '0.8'},
|
|
]
|
|
|
|
# Convert / Utility Tools
|
|
UTILITY_TOOLS = [
|
|
{'slug': 'html-to-pdf', 'priority': '0.7'},
|
|
{'slug': 'qr-code', 'priority': '0.7'},
|
|
{'slug': 'video-to-gif', 'priority': '0.7'},
|
|
{'slug': 'word-counter', 'priority': '0.6'},
|
|
{'slug': 'text-cleaner', 'priority': '0.6'},
|
|
# Phase 2
|
|
{'slug': 'pdf-to-pptx', 'priority': '0.8'},
|
|
{'slug': 'excel-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'pptx-to-pdf', 'priority': '0.8'},
|
|
{'slug': 'barcode-generator', 'priority': '0.7'},
|
|
]
|
|
|
|
TOOL_GROUPS = [
|
|
('PDF Tools', PDF_TOOLS),
|
|
('Image Tools', IMAGE_TOOLS),
|
|
('AI Tools', AI_TOOLS),
|
|
('Utility Tools', UTILITY_TOOLS),
|
|
]
|
|
|
|
|
|
def get_seo_landing_paths() -> tuple[list[str], list[str]]:
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
seo_config_path = repo_root / 'frontend' / 'src' / 'config' / 'seo-tools.json'
|
|
|
|
if not seo_config_path.exists():
|
|
return [], []
|
|
|
|
raw = json.loads(seo_config_path.read_text(encoding='utf-8'))
|
|
tool_pages = [entry.get('slug', '').strip() for entry in raw.get('toolPages', []) if entry.get('slug')]
|
|
collection_pages = [entry.get('slug', '').strip() for entry in raw.get('collectionPages', []) if entry.get('slug')]
|
|
return tool_pages, collection_pages
|
|
|
|
|
|
def get_blog_slugs() -> list[str]:
|
|
repo_root = Path(__file__).resolve().parents[1]
|
|
blog_articles_path = repo_root / 'frontend' / 'src' / 'content' / 'blogArticles.ts'
|
|
|
|
if not blog_articles_path.exists():
|
|
return []
|
|
|
|
content = blog_articles_path.read_text(encoding='utf-8')
|
|
return list(dict.fromkeys(re.findall(r"slug:\s*'([^']+)'", content)))
|
|
|
|
|
|
def generate_sitemap(domain: str) -> str:
|
|
today = datetime.now().strftime('%Y-%m-%d')
|
|
urls = []
|
|
blog_slugs = get_blog_slugs()
|
|
seo_tool_pages, seo_collection_pages = get_seo_landing_paths()
|
|
|
|
# Static pages
|
|
for page in PAGES:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}{page["path"]}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>{page["changefreq"]}</changefreq>
|
|
<priority>{page["priority"]}</priority>
|
|
</url>''')
|
|
|
|
if blog_slugs:
|
|
urls.append('\n <!-- Blog Posts -->')
|
|
for slug in blog_slugs:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/blog/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>monthly</changefreq>
|
|
<priority>0.6</priority>
|
|
</url>''')
|
|
|
|
# Tool pages by category
|
|
for label, routes in TOOL_GROUPS:
|
|
urls.append(f'\n <!-- {label} -->')
|
|
for route in routes:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/tools/{route["slug"]}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>{route["priority"]}</priority>
|
|
</url>''')
|
|
|
|
if seo_tool_pages:
|
|
urls.append('\n <!-- Programmatic SEO Tool Pages -->')
|
|
for slug in seo_tool_pages:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.88</priority>
|
|
</url>''')
|
|
|
|
if seo_collection_pages:
|
|
urls.append('\n <!-- SEO Collection Pages -->')
|
|
for slug in seo_collection_pages:
|
|
urls.append(f''' <url>
|
|
<loc>{domain}/{slug}</loc>
|
|
<lastmod>{today}</lastmod>
|
|
<changefreq>weekly</changefreq>
|
|
<priority>0.82</priority>
|
|
</url>''')
|
|
|
|
sitemap = f'''<?xml version="1.0" encoding="UTF-8"?>
|
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
|
{chr(10).join(urls)}
|
|
</urlset>'''
|
|
|
|
return sitemap
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Generate sitemap.xml')
|
|
parser.add_argument('--domain', type=str, default=os.environ.get('SITE_DOMAIN', ''),
|
|
help='Site domain (e.g. https://dociva.io). Falls back to SITE_DOMAIN env var.')
|
|
parser.add_argument('--output', type=str, default='frontend/public/sitemap.xml', help='Output file path')
|
|
args = parser.parse_args()
|
|
|
|
if not args.domain:
|
|
parser.error('--domain is required (or set SITE_DOMAIN env var)')
|
|
|
|
domain = args.domain.rstrip('/')
|
|
sitemap = generate_sitemap(domain)
|
|
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
f.write(sitemap)
|
|
|
|
seo_tool_pages, seo_collection_pages = get_seo_landing_paths()
|
|
total = (
|
|
len(PAGES)
|
|
+ len(get_blog_slugs())
|
|
+ sum(len(routes) for _, routes in TOOL_GROUPS)
|
|
+ len(seo_tool_pages)
|
|
+ len(seo_collection_pages)
|
|
)
|
|
print(f"Sitemap generated: {args.output}")
|
|
print(f"Total URLs: {total}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|