From 85d98381df8a757aef3dc003ea8651a5c34aff17 Mon Sep 17 00:00:00 2001 From: Your Name <119736744+aborayan2022@users.noreply.github.com> Date: Sat, 28 Feb 2026 23:31:19 +0200 Subject: [PATCH] feat: Initialize frontend with React, Vite, and Tailwind CSS - Set up main entry point for React application. - Create About, Home, NotFound, Privacy, and Terms pages with SEO support. - Implement API service for file uploads and task management. - Add global styles using Tailwind CSS. - Create utility functions for SEO and text processing. - Configure Vite for development and production builds. - Set up Nginx configuration for serving frontend and backend. - Add scripts for cleanup of expired files and sitemap generation. - Implement deployment script for production environment. --- .env.example | 29 +++ .gitignore | 54 +++++ README.md | 66 ++++++ backend/Dockerfile | 41 ++++ backend/app/__init__.py | 73 +++++++ backend/app/extensions.py | 43 ++++ backend/app/middleware/__init__.py | 1 + backend/app/middleware/rate_limiter.py | 18 ++ backend/app/routes/__init__.py | 1 + backend/app/routes/compress.py | 47 +++++ backend/app/routes/convert.py | 73 +++++++ backend/app/routes/download.py | 35 ++++ backend/app/routes/health.py | 14 ++ backend/app/routes/image.py | 122 +++++++++++ backend/app/routes/tasks.py | 39 ++++ backend/app/routes/video.py | 70 +++++++ backend/app/services/__init__.py | 1 + backend/app/services/compress_service.py | 109 ++++++++++ backend/app/services/image_service.py | 169 +++++++++++++++ backend/app/services/pdf_service.py | 170 ++++++++++++++++ backend/app/services/storage_service.py | 154 ++++++++++++++ backend/app/services/video_service.py | 176 ++++++++++++++++ backend/app/tasks/__init__.py | 1 + backend/app/tasks/compress_tasks.py | 88 ++++++++ backend/app/tasks/convert_tasks.py | 128 ++++++++++++ backend/app/tasks/image_tasks.py | 160 +++++++++++++++ backend/app/tasks/video_tasks.py | 96 +++++++++ backend/app/utils/__init__.py | 1 + backend/app/utils/cleanup.py | 31 +++ backend/app/utils/file_validator.py | 111 ++++++++++ backend/app/utils/sanitizer.py | 77 +++++++ backend/celery_worker.py | 11 + backend/config/__init__.py | 93 +++++++++ backend/requirements.txt | 27 +++ backend/tests/__init__.py | 0 backend/tests/conftest.py | 26 +++ backend/tests/test_compress.py | 21 ++ backend/tests/test_convert.py | 42 ++++ backend/tests/test_health.py | 15 ++ backend/tests/test_image.py | 27 +++ backend/tests/test_utils.py | 19 ++ backend/wsgi.py | 7 + docker-compose.prod.yml | 107 ++++++++++ docker-compose.yml | 99 +++++++++ docs/Plan-1.md | 0 frontend/Dockerfile | 41 ++++ frontend/index.html | 17 ++ frontend/nginx-frontend.conf | 21 ++ frontend/package.json | 38 ++++ frontend/postcss.config.js | 6 + frontend/public/ads.txt | 1 + frontend/public/favicon.svg | 7 + frontend/public/robots.txt | 6 + frontend/src/App.tsx | 71 +++++++ frontend/src/components/layout/AdSlot.tsx | 53 +++++ frontend/src/components/layout/Footer.tsx | 45 ++++ frontend/src/components/layout/Header.tsx | 50 +++++ .../src/components/shared/DownloadButton.tsx | 88 ++++++++ .../src/components/shared/FileUploader.tsx | 132 ++++++++++++ .../src/components/shared/ProgressBar.tsx | 42 ++++ frontend/src/components/shared/ToolCard.tsx | 43 ++++ .../src/components/tools/ImageConverter.tsx | 176 ++++++++++++++++ .../src/components/tools/PdfCompressor.tsx | 148 ++++++++++++++ frontend/src/components/tools/PdfToWord.tsx | 128 ++++++++++++ frontend/src/components/tools/TextCleaner.tsx | 146 +++++++++++++ frontend/src/components/tools/VideoToGif.tsx | 192 ++++++++++++++++++ frontend/src/components/tools/WordCounter.tsx | 81 ++++++++ frontend/src/components/tools/WordToPdf.tsx | 121 +++++++++++ frontend/src/hooks/useDirection.ts | 20 ++ frontend/src/hooks/useFileUpload.ts | 110 ++++++++++ frontend/src/hooks/useTaskPolling.ts | 87 ++++++++ frontend/src/i18n/ar.json | 96 +++++++++ frontend/src/i18n/en.json | 96 +++++++++ frontend/src/i18n/index.ts | 27 +++ frontend/src/main.tsx | 17 ++ frontend/src/pages/AboutPage.tsx | 49 +++++ frontend/src/pages/HomePage.tsx | 93 +++++++++ frontend/src/pages/NotFoundPage.tsx | 34 ++++ frontend/src/pages/PrivacyPage.tsx | 59 ++++++ frontend/src/pages/TermsPage.tsx | 66 ++++++ frontend/src/services/api.ts | 114 +++++++++++ frontend/src/styles/global.css | 90 ++++++++ frontend/src/utils/seo.ts | 69 +++++++ frontend/src/utils/textTools.ts | 124 +++++++++++ frontend/src/vite-env.d.ts | 1 + frontend/tailwind.config.js | 44 ++++ frontend/tsconfig.json | 23 +++ frontend/vite.config.ts | 34 ++++ nginx/nginx.conf | 48 +++++ nginx/nginx.prod.conf | 65 ++++++ scripts/cleanup_expired_files.py | 85 ++++++++ scripts/deploy.sh | 58 ++++++ scripts/generate_sitemap.py | 86 ++++++++ 93 files changed, 5940 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 backend/Dockerfile create mode 100644 backend/app/__init__.py create mode 100644 backend/app/extensions.py create mode 100644 backend/app/middleware/__init__.py create mode 100644 backend/app/middleware/rate_limiter.py create mode 100644 backend/app/routes/__init__.py create mode 100644 backend/app/routes/compress.py create mode 100644 backend/app/routes/convert.py create mode 100644 backend/app/routes/download.py create mode 100644 backend/app/routes/health.py create mode 100644 backend/app/routes/image.py create mode 100644 backend/app/routes/tasks.py create mode 100644 backend/app/routes/video.py create mode 100644 backend/app/services/__init__.py create mode 100644 backend/app/services/compress_service.py create mode 100644 backend/app/services/image_service.py create mode 100644 backend/app/services/pdf_service.py create mode 100644 backend/app/services/storage_service.py create mode 100644 backend/app/services/video_service.py create mode 100644 backend/app/tasks/__init__.py create mode 100644 backend/app/tasks/compress_tasks.py create mode 100644 backend/app/tasks/convert_tasks.py create mode 100644 backend/app/tasks/image_tasks.py create mode 100644 backend/app/tasks/video_tasks.py create mode 100644 backend/app/utils/__init__.py create mode 100644 backend/app/utils/cleanup.py create mode 100644 backend/app/utils/file_validator.py create mode 100644 backend/app/utils/sanitizer.py create mode 100644 backend/celery_worker.py create mode 100644 backend/config/__init__.py create mode 100644 backend/requirements.txt create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/conftest.py create mode 100644 backend/tests/test_compress.py create mode 100644 backend/tests/test_convert.py create mode 100644 backend/tests/test_health.py create mode 100644 backend/tests/test_image.py create mode 100644 backend/tests/test_utils.py create mode 100644 backend/wsgi.py create mode 100644 docker-compose.prod.yml create mode 100644 docker-compose.yml create mode 100644 docs/Plan-1.md create mode 100644 frontend/Dockerfile create mode 100644 frontend/index.html create mode 100644 frontend/nginx-frontend.conf create mode 100644 frontend/package.json create mode 100644 frontend/postcss.config.js create mode 100644 frontend/public/ads.txt create mode 100644 frontend/public/favicon.svg create mode 100644 frontend/public/robots.txt create mode 100644 frontend/src/App.tsx create mode 100644 frontend/src/components/layout/AdSlot.tsx create mode 100644 frontend/src/components/layout/Footer.tsx create mode 100644 frontend/src/components/layout/Header.tsx create mode 100644 frontend/src/components/shared/DownloadButton.tsx create mode 100644 frontend/src/components/shared/FileUploader.tsx create mode 100644 frontend/src/components/shared/ProgressBar.tsx create mode 100644 frontend/src/components/shared/ToolCard.tsx create mode 100644 frontend/src/components/tools/ImageConverter.tsx create mode 100644 frontend/src/components/tools/PdfCompressor.tsx create mode 100644 frontend/src/components/tools/PdfToWord.tsx create mode 100644 frontend/src/components/tools/TextCleaner.tsx create mode 100644 frontend/src/components/tools/VideoToGif.tsx create mode 100644 frontend/src/components/tools/WordCounter.tsx create mode 100644 frontend/src/components/tools/WordToPdf.tsx create mode 100644 frontend/src/hooks/useDirection.ts create mode 100644 frontend/src/hooks/useFileUpload.ts create mode 100644 frontend/src/hooks/useTaskPolling.ts create mode 100644 frontend/src/i18n/ar.json create mode 100644 frontend/src/i18n/en.json create mode 100644 frontend/src/i18n/index.ts create mode 100644 frontend/src/main.tsx create mode 100644 frontend/src/pages/AboutPage.tsx create mode 100644 frontend/src/pages/HomePage.tsx create mode 100644 frontend/src/pages/NotFoundPage.tsx create mode 100644 frontend/src/pages/PrivacyPage.tsx create mode 100644 frontend/src/pages/TermsPage.tsx create mode 100644 frontend/src/services/api.ts create mode 100644 frontend/src/styles/global.css create mode 100644 frontend/src/utils/seo.ts create mode 100644 frontend/src/utils/textTools.ts create mode 100644 frontend/src/vite-env.d.ts create mode 100644 frontend/tailwind.config.js create mode 100644 frontend/tsconfig.json create mode 100644 frontend/vite.config.ts create mode 100644 nginx/nginx.conf create mode 100644 nginx/nginx.prod.conf create mode 100644 scripts/cleanup_expired_files.py create mode 100644 scripts/deploy.sh create mode 100644 scripts/generate_sitemap.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d1c403f --- /dev/null +++ b/.env.example @@ -0,0 +1,29 @@ +# Flask +FLASK_ENV=development +FLASK_DEBUG=1 +SECRET_KEY=change-me-in-production + +# Redis +REDIS_URL=redis://redis:6379/0 + +# Celery +CELERY_BROKER_URL=redis://redis:6379/0 +CELERY_RESULT_BACKEND=redis://redis:6379/1 + +# AWS S3 +AWS_ACCESS_KEY_ID=your-access-key +AWS_SECRET_ACCESS_KEY=your-secret-key +AWS_S3_BUCKET=saas-pdf-temp-files +AWS_S3_REGION=eu-west-1 + +# File Processing +MAX_CONTENT_LENGTH_MB=50 +UPLOAD_FOLDER=/tmp/uploads +OUTPUT_FOLDER=/tmp/outputs +FILE_EXPIRY_SECONDS=1800 + +# CORS +CORS_ORIGINS=http://localhost:5173,http://localhost:3000 + +# AdSense +ADSENSE_CLIENT_ID=ca-pub-XXXXXXXXXXXXXXXX diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec3b6d8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +*.egg-info/ +dist/ +build/ +*.egg +.eggs/ +venv/ +.venv/ +env/ + +# Node +node_modules/ +frontend/dist/ +frontend/build/ +.npm +*.tsbuildinfo + +# Environment +.env +.env.local +.env.production + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# Docker +docker-compose.override.yml + +# Uploads & temp files +uploads/ +tmp/ +*.tmp + +# Logs +*.log +logs/ + +# Coverage +htmlcov/ +.coverage +coverage/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..f9c4044 --- /dev/null +++ b/README.md @@ -0,0 +1,66 @@ +# SaaS-PDF — Free Online Tools Platform + +A free SaaS platform offering PDF, image, video, and text processing tools. Built with **Python Flask** (backend) and **React + Vite** (frontend), powered by **Celery + Redis** for async processing, and deployed on **AWS**. + +## 🛠 Tools (MVP) + +1. **PDF to Word / Word to PDF** — Convert between PDF and Word documents +2. **PDF Compressor** — Reduce PDF file size with quality options +3. **Image Converter** — Convert between JPG, PNG, WebP formats +4. **Video to GIF** — Create animated GIFs from video clips +5. **Text Tools** — Word counter, text cleaner, case converter (client-side) + +## 🏗 Tech Stack + +| Layer | Technology | +|-------|-----------| +| Backend API | Python 3.12 + Flask 3.x | +| Task Queue | Celery 5.x + Redis | +| File Processing | LibreOffice, Ghostscript, Pillow, ffmpeg | +| Frontend | React 18 + Vite 5 + TypeScript | +| Styling | Tailwind CSS (RTL support) | +| i18n | react-i18next (Arabic + English) | +| Storage | AWS S3 (temp files with auto-cleanup) | +| CDN | AWS CloudFront | +| Server | AWS EC2 + Nginx | + +## 🚀 Quick Start (Development) + +```bash +# 1. Clone the repo +git clone https://github.com/aborayan2022/SaaS-PDF.git +cd SaaS-PDF + +# 2. Copy environment file +cp .env.example .env + +# 3. Start all services with Docker +docker-compose up --build + +# 4. Access the app +# Frontend: http://localhost:5173 +# Backend API: http://localhost:5000/api +# Celery Flower: http://localhost:5555 +``` + +## 📁 Project Structure + +``` +SaaS-PDF/ +├── backend/ # Flask API + Celery Workers +├── frontend/ # React + Vite + TypeScript +├── nginx/ # Reverse proxy configuration +├── scripts/ # Deployment & maintenance scripts +├── docs/ # Project documentation +├── docker-compose.yml +└── docker-compose.prod.yml +``` + +## 💰 Revenue Model + +- **Google AdSense** — Ads on result/download pages +- **Freemium** (planned) — Pro features: no ads, higher limits, API access + +## 📄 License + +MIT diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..042c97b --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,41 @@ +FROM python:3.12-slim-bookworm + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies for file processing +RUN apt-get update && apt-get install -y --no-install-recommends \ + libreoffice-core \ + libreoffice-writer \ + libreoffice-calc \ + libreoffice-draw \ + ghostscript \ + ffmpeg \ + libmagic1 \ + imagemagick \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Copy requirements first for Docker layer caching +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create temp directories +RUN mkdir -p /tmp/uploads /tmp/outputs + +# Expose port +EXPOSE 5000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --retries=3 \ + CMD curl -f http://localhost:5000/api/health || exit 1 + +# Run with Gunicorn +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "120", "wsgi:app"] diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..bac1dbe --- /dev/null +++ b/backend/app/__init__.py @@ -0,0 +1,73 @@ +"""Flask Application Factory.""" +import os + +from flask import Flask + +from config import config +from app.extensions import cors, limiter, talisman, init_celery + + +def create_app(config_name=None): + """Create and configure the Flask application.""" + if config_name is None: + config_name = os.getenv("FLASK_ENV", "development") + + app = Flask(__name__) + app.config.from_object(config[config_name]) + + # Create upload/output directories + os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True) + os.makedirs(app.config["OUTPUT_FOLDER"], exist_ok=True) + + # Initialize extensions + cors.init_app(app, origins=app.config["CORS_ORIGINS"]) + + limiter.init_app(app) + + # Talisman security headers (relaxed CSP for AdSense) + csp = { + "default-src": "'self'", + "script-src": [ + "'self'", + "'unsafe-inline'", + "https://pagead2.googlesyndication.com", + "https://www.googletagmanager.com", + "https://www.google-analytics.com", + ], + "style-src": ["'self'", "'unsafe-inline'", "https://fonts.googleapis.com"], + "font-src": ["'self'", "https://fonts.gstatic.com"], + "img-src": ["'self'", "data:", "https://pagead2.googlesyndication.com"], + "frame-src": ["https://googleads.g.doubleclick.net"], + "connect-src": [ + "'self'", + "https://www.google-analytics.com", + "https://*.amazonaws.com", + ], + } + talisman.init_app( + app, + content_security_policy=csp, + force_https=config_name == "production", + ) + + # Initialize Celery + init_celery(app) + + # Register blueprints + from app.routes.health import health_bp + from app.routes.convert import convert_bp + from app.routes.compress import compress_bp + from app.routes.image import image_bp + from app.routes.video import video_bp + from app.routes.tasks import tasks_bp + from app.routes.download import download_bp + + app.register_blueprint(health_bp, url_prefix="/api") + app.register_blueprint(convert_bp, url_prefix="/api/convert") + app.register_blueprint(compress_bp, url_prefix="/api/compress") + app.register_blueprint(image_bp, url_prefix="/api/image") + app.register_blueprint(video_bp, url_prefix="/api/video") + app.register_blueprint(tasks_bp, url_prefix="/api/tasks") + app.register_blueprint(download_bp, url_prefix="/api/download") + + return app diff --git a/backend/app/extensions.py b/backend/app/extensions.py new file mode 100644 index 0000000..6d8cf76 --- /dev/null +++ b/backend/app/extensions.py @@ -0,0 +1,43 @@ +"""Flask extensions initialization.""" +from celery import Celery +from flask_cors import CORS +from flask_limiter import Limiter +from flask_limiter.util import get_remote_address +from flask_talisman import Talisman + +# Initialize extensions (will be bound to app in create_app) +cors = CORS() +limiter = Limiter(key_func=get_remote_address) +talisman = Talisman() +celery = Celery() + + +def init_celery(app): + """Initialize Celery with Flask app context.""" + celery.conf.broker_url = app.config["CELERY_BROKER_URL"] + celery.conf.result_backend = app.config["CELERY_RESULT_BACKEND"] + celery.conf.result_expires = app.config.get("FILE_EXPIRY_SECONDS", 1800) + celery.conf.task_serializer = "json" + celery.conf.result_serializer = "json" + celery.conf.accept_content = ["json"] + celery.conf.timezone = "UTC" + celery.conf.task_track_started = True + + # Set task routes + celery.conf.task_routes = { + "app.tasks.convert_tasks.*": {"queue": "convert"}, + "app.tasks.compress_tasks.*": {"queue": "compress"}, + "app.tasks.image_tasks.*": {"queue": "image"}, + "app.tasks.video_tasks.*": {"queue": "video"}, + } + + class ContextTask(celery.Task): + """Make Celery tasks work with Flask app context.""" + abstract = True + + def __call__(self, *args, **kwargs): + with app.app_context(): + return self.run(*args, **kwargs) + + celery.Task = ContextTask + return celery diff --git a/backend/app/middleware/__init__.py b/backend/app/middleware/__init__.py new file mode 100644 index 0000000..f0f5f7b --- /dev/null +++ b/backend/app/middleware/__init__.py @@ -0,0 +1 @@ +"""Backend application middleware.""" diff --git a/backend/app/middleware/rate_limiter.py b/backend/app/middleware/rate_limiter.py new file mode 100644 index 0000000..e9b10b6 --- /dev/null +++ b/backend/app/middleware/rate_limiter.py @@ -0,0 +1,18 @@ +"""Rate limiting middleware configuration.""" +from app.extensions import limiter + + +# Custom rate limits for specific operations +UPLOAD_LIMIT = "10/minute" +DOWNLOAD_LIMIT = "30/minute" +API_LIMIT = "100/hour" + + +def get_upload_limit(): + """Get the rate limit for file upload endpoints.""" + return UPLOAD_LIMIT + + +def get_download_limit(): + """Get the rate limit for file download endpoints.""" + return DOWNLOAD_LIMIT diff --git a/backend/app/routes/__init__.py b/backend/app/routes/__init__.py new file mode 100644 index 0000000..e80061a --- /dev/null +++ b/backend/app/routes/__init__.py @@ -0,0 +1 @@ +"""Backend application routes.""" diff --git a/backend/app/routes/compress.py b/backend/app/routes/compress.py new file mode 100644 index 0000000..0ac3785 --- /dev/null +++ b/backend/app/routes/compress.py @@ -0,0 +1,47 @@ +"""PDF compression routes.""" +from flask import Blueprint, request, jsonify + +from app.extensions import limiter +from app.utils.file_validator import validate_file, FileValidationError +from app.utils.sanitizer import generate_safe_path +from app.tasks.compress_tasks import compress_pdf_task + +compress_bp = Blueprint("compress", __name__) + + +@compress_bp.route("/pdf", methods=["POST"]) +@limiter.limit("10/minute") +def compress_pdf_route(): + """ + Compress a PDF file. + + Accepts: multipart/form-data with 'file' field (PDF) + Optional form field 'quality': "low", "medium", "high" (default: "medium") + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + quality = request.form.get("quality", "medium") + + # Validate quality parameter + if quality not in ("low", "medium", "high"): + quality = "medium" + + try: + original_filename, ext = validate_file(file, allowed_types=["pdf"]) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + # Save file to temp location + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + # Dispatch async task + task = compress_pdf_task.delay(input_path, task_id, original_filename, quality) + + return jsonify({ + "task_id": task.id, + "message": "Compression started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 diff --git a/backend/app/routes/convert.py b/backend/app/routes/convert.py new file mode 100644 index 0000000..33620bc --- /dev/null +++ b/backend/app/routes/convert.py @@ -0,0 +1,73 @@ +"""PDF conversion routes (PDF↔Word).""" +from flask import Blueprint, request, jsonify + +from app.extensions import limiter +from app.utils.file_validator import validate_file, FileValidationError +from app.utils.sanitizer import generate_safe_path +from app.tasks.convert_tasks import convert_pdf_to_word, convert_word_to_pdf + +convert_bp = Blueprint("convert", __name__) + + +@convert_bp.route("/pdf-to-word", methods=["POST"]) +@limiter.limit("10/minute") +def pdf_to_word_route(): + """ + Convert a PDF file to Word (DOCX). + + Accepts: multipart/form-data with 'file' field (PDF) + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + + try: + original_filename, ext = validate_file(file, allowed_types=["pdf"]) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + # Save file to temp location + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + # Dispatch async task + task = convert_pdf_to_word.delay(input_path, task_id, original_filename) + + return jsonify({ + "task_id": task.id, + "message": "Conversion started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 + + +@convert_bp.route("/word-to-pdf", methods=["POST"]) +@limiter.limit("10/minute") +def word_to_pdf_route(): + """ + Convert a Word (DOC/DOCX) file to PDF. + + Accepts: multipart/form-data with 'file' field (DOC/DOCX) + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + + try: + original_filename, ext = validate_file( + file, allowed_types=["doc", "docx"] + ) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + task = convert_word_to_pdf.delay(input_path, task_id, original_filename) + + return jsonify({ + "task_id": task.id, + "message": "Conversion started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 diff --git a/backend/app/routes/download.py b/backend/app/routes/download.py new file mode 100644 index 0000000..c1e47c7 --- /dev/null +++ b/backend/app/routes/download.py @@ -0,0 +1,35 @@ +"""Local file download route — used when S3 is not configured.""" +import os + +from flask import Blueprint, send_file, abort, request, current_app + +download_bp = Blueprint("download", __name__) + + +@download_bp.route("//", methods=["GET"]) +def download_file(task_id: str, filename: str): + """ + Serve a processed file from local filesystem. + + Only active in development (when S3 is not configured). + """ + # Security: sanitize inputs + # Only allow UUID-style task IDs and safe filenames + if ".." in task_id or "/" in task_id or "\\" in task_id: + abort(400, "Invalid task ID.") + if ".." in filename or "/" in filename or "\\" in filename: + abort(400, "Invalid filename.") + + output_dir = current_app.config["OUTPUT_FOLDER"] + file_path = os.path.join(output_dir, task_id, filename) + + if not os.path.isfile(file_path): + abort(404, "File not found or expired.") + + download_name = request.args.get("name", filename) + + return send_file( + file_path, + as_attachment=True, + download_name=download_name, + ) diff --git a/backend/app/routes/health.py b/backend/app/routes/health.py new file mode 100644 index 0000000..e6849ff --- /dev/null +++ b/backend/app/routes/health.py @@ -0,0 +1,14 @@ +"""Health check endpoint.""" +from flask import Blueprint, jsonify + +health_bp = Blueprint("health", __name__) + + +@health_bp.route("/health", methods=["GET"]) +def health_check(): + """Simple health check — returns 200 if the service is running.""" + return jsonify({ + "status": "healthy", + "service": "SaaS-PDF API", + "version": "1.0.0", + }) diff --git a/backend/app/routes/image.py b/backend/app/routes/image.py new file mode 100644 index 0000000..7608cfc --- /dev/null +++ b/backend/app/routes/image.py @@ -0,0 +1,122 @@ +"""Image processing routes.""" +from flask import Blueprint, request, jsonify + +from app.extensions import limiter +from app.utils.file_validator import validate_file, FileValidationError +from app.utils.sanitizer import generate_safe_path +from app.tasks.image_tasks import convert_image_task, resize_image_task + +image_bp = Blueprint("image", __name__) + +ALLOWED_IMAGE_TYPES = ["png", "jpg", "jpeg", "webp"] +ALLOWED_OUTPUT_FORMATS = ["jpg", "png", "webp"] + + +@image_bp.route("/convert", methods=["POST"]) +@limiter.limit("10/minute") +def convert_image_route(): + """ + Convert an image to a different format. + + Accepts: multipart/form-data with: + - 'file': Image file (PNG, JPG, JPEG, WebP) + - 'format': Target format ("jpg", "png", "webp") + - 'quality' (optional): Quality 1-100 (default: 85) + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + output_format = request.form.get("format", "").lower() + quality = request.form.get("quality", "85") + + # Validate output format + if output_format not in ALLOWED_OUTPUT_FORMATS: + return jsonify({ + "error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}" + }), 400 + + # Validate quality + try: + quality = max(1, min(100, int(quality))) + except ValueError: + quality = 85 + + try: + original_filename, ext = validate_file(file, allowed_types=ALLOWED_IMAGE_TYPES) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + # Save file + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + # Dispatch task + task = convert_image_task.delay( + input_path, task_id, original_filename, output_format, quality + ) + + return jsonify({ + "task_id": task.id, + "message": "Image conversion started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 + + +@image_bp.route("/resize", methods=["POST"]) +@limiter.limit("10/minute") +def resize_image_route(): + """ + Resize an image. + + Accepts: multipart/form-data with: + - 'file': Image file + - 'width' (optional): Target width + - 'height' (optional): Target height + - 'quality' (optional): Quality 1-100 (default: 85) + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + width = request.form.get("width") + height = request.form.get("height") + quality = request.form.get("quality", "85") + + # Validate dimensions + try: + width = int(width) if width else None + height = int(height) if height else None + except ValueError: + return jsonify({"error": "Width and height must be integers."}), 400 + + if width is None and height is None: + return jsonify({"error": "At least one of width or height is required."}), 400 + + if width and (width < 1 or width > 10000): + return jsonify({"error": "Width must be between 1 and 10000."}), 400 + if height and (height < 1 or height > 10000): + return jsonify({"error": "Height must be between 1 and 10000."}), 400 + + try: + quality = max(1, min(100, int(quality))) + except ValueError: + quality = 85 + + try: + original_filename, ext = validate_file(file, allowed_types=ALLOWED_IMAGE_TYPES) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + task = resize_image_task.delay( + input_path, task_id, original_filename, width, height, quality + ) + + return jsonify({ + "task_id": task.id, + "message": "Image resize started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 diff --git a/backend/app/routes/tasks.py b/backend/app/routes/tasks.py new file mode 100644 index 0000000..faf143f --- /dev/null +++ b/backend/app/routes/tasks.py @@ -0,0 +1,39 @@ +"""Task status polling endpoint.""" +from flask import Blueprint, jsonify +from celery.result import AsyncResult + +from app.extensions import celery + +tasks_bp = Blueprint("tasks", __name__) + + +@tasks_bp.route("//status", methods=["GET"]) +def get_task_status(task_id: str): + """ + Get the status of an async task. + + Returns: + JSON with task state and result (if completed) + """ + result = AsyncResult(task_id, app=celery) + + response = { + "task_id": task_id, + "state": result.state, + } + + if result.state == "PENDING": + response["progress"] = "Task is waiting in queue..." + + elif result.state == "PROCESSING": + meta = result.info or {} + response["progress"] = meta.get("step", "Processing...") + + elif result.state == "SUCCESS": + task_result = result.result or {} + response["result"] = task_result + + elif result.state == "FAILURE": + response["error"] = str(result.info) if result.info else "Task failed." + + return jsonify(response) diff --git a/backend/app/routes/video.py b/backend/app/routes/video.py new file mode 100644 index 0000000..009340c --- /dev/null +++ b/backend/app/routes/video.py @@ -0,0 +1,70 @@ +"""Video processing routes.""" +from flask import Blueprint, request, jsonify + +from app.extensions import limiter +from app.utils.file_validator import validate_file, FileValidationError +from app.utils.sanitizer import generate_safe_path +from app.tasks.video_tasks import create_gif_task + +video_bp = Blueprint("video", __name__) + +ALLOWED_VIDEO_TYPES = ["mp4", "webm"] + + +@video_bp.route("/to-gif", methods=["POST"]) +@limiter.limit("5/minute") +def video_to_gif_route(): + """ + Convert a video clip to an animated GIF. + + Accepts: multipart/form-data with: + - 'file': Video file (MP4, WebM, max 50MB) + - 'start_time' (optional): Start time in seconds (default: 0) + - 'duration' (optional): Duration in seconds, max 15 (default: 5) + - 'fps' (optional): Frames per second, max 20 (default: 10) + - 'width' (optional): Output width, max 640 (default: 480) + Returns: JSON with task_id for polling + """ + if "file" not in request.files: + return jsonify({"error": "No file provided."}), 400 + + file = request.files["file"] + + # Parse and validate parameters + try: + start_time = float(request.form.get("start_time", 0)) + duration = float(request.form.get("duration", 5)) + fps = int(request.form.get("fps", 10)) + width = int(request.form.get("width", 480)) + except (ValueError, TypeError): + return jsonify({"error": "Invalid parameters. Must be numeric."}), 400 + + # Enforce limits + if start_time < 0: + return jsonify({"error": "Start time cannot be negative."}), 400 + if duration <= 0 or duration > 15: + return jsonify({"error": "Duration must be between 0.5 and 15 seconds."}), 400 + if fps < 1 or fps > 20: + return jsonify({"error": "FPS must be between 1 and 20."}), 400 + if width < 100 or width > 640: + return jsonify({"error": "Width must be between 100 and 640 pixels."}), 400 + + try: + original_filename, ext = validate_file(file, allowed_types=ALLOWED_VIDEO_TYPES) + except FileValidationError as e: + return jsonify({"error": e.message}), e.code + + # Save file + task_id, input_path = generate_safe_path(ext, folder_type="upload") + file.save(input_path) + + # Dispatch task + task = create_gif_task.delay( + input_path, task_id, original_filename, + start_time, duration, fps, width, + ) + + return jsonify({ + "task_id": task.id, + "message": "GIF creation started. Poll /api/tasks/{task_id}/status for progress.", + }), 202 diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..5b9cfef --- /dev/null +++ b/backend/app/services/__init__.py @@ -0,0 +1 @@ +"""Backend application services.""" diff --git a/backend/app/services/compress_service.py b/backend/app/services/compress_service.py new file mode 100644 index 0000000..80421d7 --- /dev/null +++ b/backend/app/services/compress_service.py @@ -0,0 +1,109 @@ +"""PDF compression service using Ghostscript.""" +import os +import subprocess +import logging + +logger = logging.getLogger(__name__) + + +class PDFCompressionError(Exception): + """Custom exception for PDF compression failures.""" + pass + + +# Ghostscript quality presets +QUALITY_PRESETS = { + "low": "/screen", # 72 dpi — smallest file, lowest quality + "medium": "/ebook", # 150 dpi — good balance (default) + "high": "/printer", # 300 dpi — high quality, moderate compression +} + + +def compress_pdf( + input_path: str, output_path: str, quality: str = "medium" +) -> dict: + """ + Compress a PDF file using Ghostscript. + + Args: + input_path: Path to the input PDF file + output_path: Path for the compressed output file + quality: Compression quality — "low", "medium", or "high" + + Returns: + dict with original_size, compressed_size, reduction_percent + + Raises: + PDFCompressionError: If compression fails + """ + if quality not in QUALITY_PRESETS: + quality = "medium" + + gs_quality = QUALITY_PRESETS[quality] + + # Ensure output directory exists + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + cmd = [ + "gs", + "-sDEVICE=pdfwrite", + "-dCompatibilityLevel=1.4", + f"-dPDFSETTINGS={gs_quality}", + "-dNOPAUSE", + "-dQUIET", + "-dBATCH", + "-dColorImageResolution=150", + "-dGrayImageResolution=150", + "-dMonoImageResolution=150", + f"-sOutputFile={output_path}", + input_path, + ] + + try: + original_size = os.path.getsize(input_path) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=120, + ) + + if result.returncode != 0: + logger.error(f"Ghostscript compression failed: {result.stderr}") + raise PDFCompressionError( + f"Compression failed: {result.stderr or 'Unknown error'}" + ) + + if not os.path.exists(output_path): + raise PDFCompressionError("Compressed file was not created.") + + compressed_size = os.path.getsize(output_path) + + # If compressed file is larger, keep original + if compressed_size >= original_size: + import shutil + shutil.copy2(input_path, output_path) + compressed_size = original_size + + reduction = ( + ((original_size - compressed_size) / original_size) * 100 + if original_size > 0 + else 0 + ) + + logger.info( + f"PDF compression: {original_size} → {compressed_size} " + f"({reduction:.1f}% reduction)" + ) + + return { + "original_size": original_size, + "compressed_size": compressed_size, + "reduction_percent": round(reduction, 1), + } + + except subprocess.TimeoutExpired: + raise PDFCompressionError("Compression timed out. File may be too large.") + except FileNotFoundError: + raise PDFCompressionError("Ghostscript is not installed on the server.") diff --git a/backend/app/services/image_service.py b/backend/app/services/image_service.py new file mode 100644 index 0000000..e335244 --- /dev/null +++ b/backend/app/services/image_service.py @@ -0,0 +1,169 @@ +"""Image processing service using Pillow.""" +import os +import logging + +from PIL import Image + +logger = logging.getLogger(__name__) + + +class ImageProcessingError(Exception): + """Custom exception for image processing failures.""" + pass + + +# Supported format mappings +FORMAT_MAP = { + "jpg": "JPEG", + "jpeg": "JPEG", + "png": "PNG", + "webp": "WEBP", +} + + +def convert_image( + input_path: str, + output_path: str, + output_format: str, + quality: int = 85, +) -> dict: + """ + Convert an image to a different format. + + Args: + input_path: Path to the input image + output_path: Path for the output image + output_format: Target format ("jpg", "png", "webp") + quality: Output quality 1-100 (for lossy formats) + + Returns: + dict with original_size, converted_size, dimensions + + Raises: + ImageProcessingError: If conversion fails + """ + output_format = output_format.lower() + if output_format not in FORMAT_MAP: + raise ImageProcessingError( + f"Unsupported output format: {output_format}. " + f"Supported: {', '.join(FORMAT_MAP.keys())}" + ) + + pil_format = FORMAT_MAP[output_format] + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + try: + original_size = os.path.getsize(input_path) + + # Open and re-encode (strips any malicious payloads) + with Image.open(input_path) as img: + # Convert RGBA to RGB for JPEG (JPEG doesn't support alpha) + if pil_format == "JPEG" and img.mode in ("RGBA", "P", "LA"): + background = Image.new("RGB", img.size, (255, 255, 255)) + if img.mode == "P": + img = img.convert("RGBA") + background.paste(img, mask=img.split()[-1] if "A" in img.mode else None) + img = background + + width, height = img.size + + # Save with quality setting + save_kwargs = {} + if pil_format in ("JPEG", "WEBP"): + save_kwargs["quality"] = max(1, min(100, quality)) + save_kwargs["optimize"] = True + elif pil_format == "PNG": + save_kwargs["optimize"] = True + + img.save(output_path, format=pil_format, **save_kwargs) + + converted_size = os.path.getsize(output_path) + + logger.info( + f"Image conversion: {input_path} → {output_format} " + f"({original_size} → {converted_size})" + ) + + return { + "original_size": original_size, + "converted_size": converted_size, + "width": width, + "height": height, + "format": output_format, + } + + except (IOError, OSError, Image.DecompressionBombError) as e: + raise ImageProcessingError(f"Image processing failed: {str(e)}") + + +def resize_image( + input_path: str, + output_path: str, + width: int | None = None, + height: int | None = None, + quality: int = 85, +) -> dict: + """ + Resize an image while maintaining aspect ratio. + + Args: + input_path: Path to the input image + output_path: Path for the resized image + width: Target width (None to auto-calculate from height) + height: Target height (None to auto-calculate from width) + quality: Output quality 1-100 + + Returns: + dict with original and new dimensions + + Raises: + ImageProcessingError: If resize fails + """ + if width is None and height is None: + raise ImageProcessingError("At least one of width or height must be specified.") + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + try: + with Image.open(input_path) as img: + orig_width, orig_height = img.size + + # Calculate missing dimension to maintain aspect ratio + if width and not height: + ratio = width / orig_width + height = int(orig_height * ratio) + elif height and not width: + ratio = height / orig_height + width = int(orig_width * ratio) + + # Resize using high-quality resampling + resized = img.resize((width, height), Image.Resampling.LANCZOS) + + # Detect format from output extension + ext = os.path.splitext(output_path)[1].lower().strip(".") + pil_format = FORMAT_MAP.get(ext, "PNG") + + save_kwargs = {"optimize": True} + if pil_format in ("JPEG", "WEBP"): + save_kwargs["quality"] = quality + # Handle RGBA for JPEG + if resized.mode in ("RGBA", "P", "LA"): + background = Image.new("RGB", resized.size, (255, 255, 255)) + if resized.mode == "P": + resized = resized.convert("RGBA") + background.paste( + resized, mask=resized.split()[-1] if "A" in resized.mode else None + ) + resized = background + + resized.save(output_path, format=pil_format, **save_kwargs) + + return { + "original_width": orig_width, + "original_height": orig_height, + "new_width": width, + "new_height": height, + } + + except (IOError, OSError, Image.DecompressionBombError) as e: + raise ImageProcessingError(f"Image resize failed: {str(e)}") diff --git a/backend/app/services/pdf_service.py b/backend/app/services/pdf_service.py new file mode 100644 index 0000000..235f0d0 --- /dev/null +++ b/backend/app/services/pdf_service.py @@ -0,0 +1,170 @@ +"""PDF conversion service using LibreOffice headless.""" +import os +import subprocess +import logging +import tempfile + +logger = logging.getLogger(__name__) + + +class PDFConversionError(Exception): + """Custom exception for PDF conversion failures.""" + pass + + +def pdf_to_word(input_path: str, output_dir: str) -> str: + """ + Convert a PDF file to Word (DOCX) format using LibreOffice headless. + + Args: + input_path: Path to the input PDF file + output_dir: Directory for the output file + + Returns: + Path to the converted DOCX file + + Raises: + PDFConversionError: If conversion fails + """ + os.makedirs(output_dir, exist_ok=True) + + # Use a unique user profile per process to avoid lock conflicts + user_install_dir = tempfile.mkdtemp(prefix="lo_pdf2word_") + + cmd = [ + "soffice", + "--headless", + "--norestore", + f"-env:UserInstallation=file://{user_install_dir}", + "--infilter=writer_pdf_import", + "--convert-to", "docx", + "--outdir", output_dir, + input_path, + ] + + try: + logger.info(f"Running LibreOffice PDF→Word: {' '.join(cmd)}") + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=120, # 2 minute timeout + env={**os.environ, "HOME": user_install_dir}, + ) + + logger.info(f"LibreOffice stdout: {result.stdout}") + logger.info(f"LibreOffice stderr: {result.stderr}") + logger.info(f"LibreOffice returncode: {result.returncode}") + + # LibreOffice names output based on input filename + input_basename = os.path.splitext(os.path.basename(input_path))[0] + output_path = os.path.join(output_dir, f"{input_basename}.docx") + + # Check output file first — LibreOffice may return non-zero + # due to harmless warnings (e.g. javaldx) even on success + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + logger.info(f"PDF→Word conversion successful: {output_path}") + return output_path + + # No output file — now treat as real error + if result.returncode != 0: + # Filter out known harmless warnings + stderr = result.stderr or "" + real_errors = [ + line for line in stderr.strip().splitlines() + if not line.startswith("Warning: failed to launch javaldx") + ] + error_msg = "\n".join(real_errors) if real_errors else stderr + logger.error(f"LibreOffice PDF→Word failed: {error_msg}") + raise PDFConversionError( + f"Conversion failed: {error_msg or 'Unknown error'}" + ) + + # Return code 0 but no output file + files_in_dir = os.listdir(output_dir) if os.path.exists(output_dir) else [] + logger.error( + f"Expected output not found at {output_path}. " + f"Files in output dir: {files_in_dir}" + ) + raise PDFConversionError("Output file was not created.") + + except subprocess.TimeoutExpired: + raise PDFConversionError("Conversion timed out. File may be too large.") + except FileNotFoundError: + raise PDFConversionError("LibreOffice is not installed on the server.") + finally: + # Cleanup temporary user profile + import shutil + shutil.rmtree(user_install_dir, ignore_errors=True) + + +def word_to_pdf(input_path: str, output_dir: str) -> str: + """ + Convert a Word (DOC/DOCX) file to PDF format using LibreOffice headless. + + Args: + input_path: Path to the input Word file + output_dir: Directory for the output file + + Returns: + Path to the converted PDF file + + Raises: + PDFConversionError: If conversion fails + """ + os.makedirs(output_dir, exist_ok=True) + + # Use a unique user profile per process to avoid lock conflicts + user_install_dir = tempfile.mkdtemp(prefix="lo_word2pdf_") + + cmd = [ + "soffice", + "--headless", + "--norestore", + f"-env:UserInstallation=file://{user_install_dir}", + "--convert-to", "pdf", + "--outdir", output_dir, + input_path, + ] + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=120, + env={**os.environ, "HOME": user_install_dir}, + ) + + input_basename = os.path.splitext(os.path.basename(input_path))[0] + output_path = os.path.join(output_dir, f"{input_basename}.pdf") + + # Check output file first — LibreOffice may return non-zero + # due to harmless warnings (e.g. javaldx) even on success + if os.path.exists(output_path) and os.path.getsize(output_path) > 0: + logger.info(f"Word→PDF conversion successful: {output_path}") + return output_path + + if result.returncode != 0: + stderr = result.stderr or "" + real_errors = [ + line for line in stderr.strip().splitlines() + if not line.startswith("Warning: failed to launch javaldx") + ] + error_msg = "\n".join(real_errors) if real_errors else stderr + logger.error(f"LibreOffice Word→PDF failed: {error_msg}") + raise PDFConversionError( + f"Conversion failed: {error_msg or 'Unknown error'}" + ) + + raise PDFConversionError("Output file was not created.") + + except subprocess.TimeoutExpired: + raise PDFConversionError("Conversion timed out. File may be too large.") + except FileNotFoundError: + raise PDFConversionError("LibreOffice is not installed on the server.") + finally: + # Cleanup temporary user profile + import shutil + shutil.rmtree(user_install_dir, ignore_errors=True) diff --git a/backend/app/services/storage_service.py b/backend/app/services/storage_service.py new file mode 100644 index 0000000..95bd582 --- /dev/null +++ b/backend/app/services/storage_service.py @@ -0,0 +1,154 @@ +"""Storage service — S3 in production, local files in development.""" +import os +import shutil +import logging + +from flask import current_app + +logger = logging.getLogger(__name__) + + +def _is_s3_configured() -> bool: + """Check if AWS S3 credentials are provided.""" + key = current_app.config.get("AWS_ACCESS_KEY_ID") + secret = current_app.config.get("AWS_SECRET_ACCESS_KEY") + return bool(key and secret and key.strip() and secret.strip()) + + +class StorageService: + """Handle file storage — uses S3 when configured, local filesystem otherwise.""" + + def __init__(self): + self._client = None + + @property + def use_s3(self) -> bool: + return _is_s3_configured() + + @property + def client(self): + """Lazy-initialize S3 client (only when S3 is configured).""" + if self._client is None: + import boto3 + self._client = boto3.client( + "s3", + region_name=current_app.config["AWS_S3_REGION"], + aws_access_key_id=current_app.config["AWS_ACCESS_KEY_ID"], + aws_secret_access_key=current_app.config["AWS_SECRET_ACCESS_KEY"], + ) + return self._client + + @property + def bucket(self): + return current_app.config["AWS_S3_BUCKET"] + + def upload_file(self, local_path: str, task_id: str, folder: str = "outputs") -> str: + """ + Upload / store a file. + + In S3 mode: uploads to S3 bucket. + In local mode: copies file to the outputs directory. + + Returns: + S3 key or local relative path (used as identifier) + """ + filename = os.path.basename(local_path) + key = f"{folder}/{task_id}/{filename}" + + if self.use_s3: + from botocore.exceptions import ClientError + try: + self.client.upload_file(local_path, self.bucket, key) + return key + except ClientError as e: + raise RuntimeError(f"Failed to upload file to S3: {e}") + else: + # Local mode — keep file in the outputs directory + output_dir = current_app.config["OUTPUT_FOLDER"] + dest_dir = os.path.join(output_dir, task_id) + os.makedirs(dest_dir, exist_ok=True) + dest_path = os.path.join(dest_dir, filename) + + if os.path.abspath(local_path) != os.path.abspath(dest_path): + shutil.copy2(local_path, dest_path) + + logger.info(f"[Local] Stored file: {dest_path}") + return key + + def generate_presigned_url( + self, s3_key: str, expiry: int | None = None, original_filename: str | None = None + ) -> str: + """ + Generate a download URL. + + S3 mode: presigned URL. + Local mode: /api/download// + """ + if self.use_s3: + from botocore.exceptions import ClientError + if expiry is None: + expiry = current_app.config.get("FILE_EXPIRY_SECONDS", 1800) + + params = { + "Bucket": self.bucket, + "Key": s3_key, + } + if original_filename: + params["ResponseContentDisposition"] = ( + f'attachment; filename="{original_filename}"' + ) + try: + url = self.client.generate_presigned_url( + "get_object", + Params=params, + ExpiresIn=expiry, + ) + return url + except ClientError as e: + raise RuntimeError(f"Failed to generate presigned URL: {e}") + else: + # Local mode — return path to Flask download route + parts = s3_key.strip("/").split("/") + # key = "outputs//" + if len(parts) >= 3: + task_id = parts[1] + filename = parts[2] + else: + task_id = parts[0] + filename = parts[-1] + + download_name = original_filename or filename + return f"/api/download/{task_id}/{filename}?name={download_name}" + + def delete_file(self, s3_key: str): + """Delete a file from S3 (no-op in local mode).""" + if self.use_s3: + from botocore.exceptions import ClientError + try: + self.client.delete_object(Bucket=self.bucket, Key=s3_key) + except ClientError: + pass + + def file_exists(self, s3_key: str) -> bool: + """Check if a file exists.""" + if self.use_s3: + from botocore.exceptions import ClientError + try: + self.client.head_object(Bucket=self.bucket, Key=s3_key) + return True + except ClientError: + return False + else: + parts = s3_key.strip("/").split("/") + if len(parts) >= 3: + task_id = parts[1] + filename = parts[2] + else: + task_id = parts[0] + filename = parts[-1] + output_dir = current_app.config["OUTPUT_FOLDER"] + return os.path.isfile(os.path.join(output_dir, task_id, filename)) + + +# Singleton instance +storage = StorageService() diff --git a/backend/app/services/video_service.py b/backend/app/services/video_service.py new file mode 100644 index 0000000..fa8bc4c --- /dev/null +++ b/backend/app/services/video_service.py @@ -0,0 +1,176 @@ +"""Video to GIF conversion service using ffmpeg.""" +import os +import re +import subprocess +import logging + +logger = logging.getLogger(__name__) + + +class VideoProcessingError(Exception): + """Custom exception for video processing failures.""" + pass + + +# Safety constraints +MAX_DURATION = 15 # seconds +MAX_WIDTH = 640 # pixels +MAX_FPS = 20 +DEFAULT_FPS = 10 +DEFAULT_WIDTH = 480 + + +def video_to_gif( + input_path: str, + output_path: str, + start_time: float = 0, + duration: float = 5, + fps: int = DEFAULT_FPS, + width: int = DEFAULT_WIDTH, +) -> dict: + """ + Convert a video clip to an animated GIF using ffmpeg. + + Args: + input_path: Path to the input video (MP4/WebM) + output_path: Path for the output GIF + start_time: Start time in seconds + duration: Duration in seconds (max 15) + fps: Frames per second (max 20) + width: Output width in pixels (max 640) + + Returns: + dict with output_size, duration, fps, dimensions + + Raises: + VideoProcessingError: If conversion fails + """ + # Sanitize numeric parameters (prevent injection) + start_time = max(0, float(start_time)) + duration = max(0.5, min(MAX_DURATION, float(duration))) + fps = max(1, min(MAX_FPS, int(fps))) + width = max(100, min(MAX_WIDTH, int(width))) + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Two-pass palette approach for high-quality GIF + palette_path = output_path + ".palette.png" + + try: + # Pass 1: Generate optimized palette + palette_cmd = [ + "ffmpeg", + "-y", + "-ss", str(start_time), + "-t", str(duration), + "-i", input_path, + "-vf", f"fps={fps},scale={width}:-1:flags=lanczos,palettegen=stats_mode=diff", + palette_path, + ] + + result = subprocess.run( + palette_cmd, + capture_output=True, + text=True, + timeout=60, + ) + + if result.returncode != 0: + logger.error(f"ffmpeg palette generation failed: {result.stderr}") + raise VideoProcessingError("Failed to process video for GIF creation.") + + # Pass 2: Create GIF using palette + gif_cmd = [ + "ffmpeg", + "-y", + "-ss", str(start_time), + "-t", str(duration), + "-i", input_path, + "-i", palette_path, + "-lavfi", f"fps={fps},scale={width}:-1:flags=lanczos [x]; [x][1:v] paletteuse=dither=bayer:bayer_scale=5", + output_path, + ] + + result = subprocess.run( + gif_cmd, + capture_output=True, + text=True, + timeout=120, + ) + + if result.returncode != 0: + logger.error(f"ffmpeg GIF creation failed: {result.stderr}") + raise VideoProcessingError("Failed to create GIF from video.") + + if not os.path.exists(output_path): + raise VideoProcessingError("GIF file was not created.") + + output_size = os.path.getsize(output_path) + + # Get actual output dimensions + actual_width, actual_height = _get_gif_dimensions(output_path) + + logger.info( + f"Video→GIF: {input_path} → {output_path} " + f"({output_size} bytes, {duration}s, {fps}fps, {actual_width}x{actual_height})" + ) + + return { + "output_size": output_size, + "duration": duration, + "fps": fps, + "width": actual_width, + "height": actual_height, + } + + except subprocess.TimeoutExpired: + raise VideoProcessingError("GIF creation timed out. Video may be too large.") + except FileNotFoundError: + raise VideoProcessingError("ffmpeg is not installed on the server.") + finally: + # Cleanup palette file + if os.path.exists(palette_path): + os.remove(palette_path) + + +def get_video_duration(input_path: str) -> float: + """Get the duration of a video file in seconds.""" + cmd = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + input_path, + ] + + try: + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=10 + ) + return float(result.stdout.strip()) + except (subprocess.TimeoutExpired, ValueError): + return 0.0 + + +def _get_gif_dimensions(gif_path: str) -> tuple[int, int]: + """Get GIF dimensions using ffprobe.""" + cmd = [ + "ffprobe", + "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=width,height", + "-of", "csv=p=0", + gif_path, + ] + + try: + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=10 + ) + parts = result.stdout.strip().split(",") + if len(parts) == 2: + return int(parts[0]), int(parts[1]) + except (subprocess.TimeoutExpired, ValueError): + pass + + return 0, 0 diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 0000000..0350669 --- /dev/null +++ b/backend/app/tasks/__init__.py @@ -0,0 +1 @@ +"""Celery tasks for async file processing.""" diff --git a/backend/app/tasks/compress_tasks.py b/backend/app/tasks/compress_tasks.py new file mode 100644 index 0000000..20cc9f5 --- /dev/null +++ b/backend/app/tasks/compress_tasks.py @@ -0,0 +1,88 @@ +"""Celery tasks for PDF compression.""" +import os +import logging + +from app.extensions import celery +from app.services.compress_service import compress_pdf, PDFCompressionError +from app.services.storage_service import storage +from app.utils.sanitizer import cleanup_task_files + + +def _cleanup(task_id: str): + cleanup_task_files(task_id, keep_outputs=not storage.use_s3) + +logger = logging.getLogger(__name__) + + +@celery.task(bind=True, name="app.tasks.compress_tasks.compress_pdf_task") +def compress_pdf_task( + self, + input_path: str, + task_id: str, + original_filename: str, + quality: str = "medium", +): + """ + Async task: Compress a PDF file. + + Args: + input_path: Path to the uploaded PDF file + task_id: Unique task identifier + original_filename: Original filename for download + quality: Compression quality ("low", "medium", "high") + + Returns: + dict with download_url, compression stats, and file info + """ + output_dir = os.path.join("/tmp/outputs", task_id) + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, f"{task_id}.pdf") + + try: + self.update_state( + state="PROCESSING", + meta={"step": f"Compressing PDF ({quality} quality)..."}, + ) + + # Compress using Ghostscript + stats = compress_pdf(input_path, output_path, quality) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + # Upload to S3 + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + # Generate download filename + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}_compressed.pdf" + + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "original_size": stats["original_size"], + "compressed_size": stats["compressed_size"], + "reduction_percent": stats["reduction_percent"], + } + + _cleanup(task_id) + + logger.info( + f"Task {task_id}: PDF compression completed — " + f"{stats['reduction_percent']}% reduction" + ) + return result + + except PDFCompressionError as e: + logger.error(f"Task {task_id}: Compression error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} diff --git a/backend/app/tasks/convert_tasks.py b/backend/app/tasks/convert_tasks.py new file mode 100644 index 0000000..caf17d1 --- /dev/null +++ b/backend/app/tasks/convert_tasks.py @@ -0,0 +1,128 @@ +"""Celery tasks for PDF conversion (PDF↔Word).""" +import os +import logging + +from app.extensions import celery +from app.services.pdf_service import pdf_to_word, word_to_pdf, PDFConversionError +from app.services.storage_service import storage +from app.utils.sanitizer import cleanup_task_files + + +def _cleanup(task_id: str): + """Cleanup with local-aware flag.""" + cleanup_task_files(task_id, keep_outputs=not storage.use_s3) + +logger = logging.getLogger(__name__) + + +@celery.task(bind=True, name="app.tasks.convert_tasks.convert_pdf_to_word") +def convert_pdf_to_word(self, input_path: str, task_id: str, original_filename: str): + """ + Async task: Convert PDF to Word document. + + Args: + input_path: Path to the uploaded PDF file + task_id: Unique task identifier + original_filename: Original filename for download + + Returns: + dict with download_url and file info + """ + output_dir = os.path.join("/tmp/outputs", task_id) + + try: + self.update_state(state="PROCESSING", meta={"step": "Converting PDF to Word..."}) + + # Convert using LibreOffice + output_path = pdf_to_word(input_path, output_dir) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + # Upload to S3 + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + # Generate download filename + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}.docx" + + # Generate presigned URL + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "output_size": os.path.getsize(output_path), + } + + # Cleanup local files + _cleanup(task_id) + + logger.info(f"Task {task_id}: PDF→Word conversion completed") + return result + + except PDFConversionError as e: + logger.error(f"Task {task_id}: Conversion error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} + + +@celery.task(bind=True, name="app.tasks.convert_tasks.convert_word_to_pdf") +def convert_word_to_pdf(self, input_path: str, task_id: str, original_filename: str): + """ + Async task: Convert Word document to PDF. + + Args: + input_path: Path to the uploaded Word file + task_id: Unique task identifier + original_filename: Original filename for download + + Returns: + dict with download_url and file info + """ + output_dir = os.path.join("/tmp/outputs", task_id) + + try: + self.update_state(state="PROCESSING", meta={"step": "Converting Word to PDF..."}) + + output_path = word_to_pdf(input_path, output_dir) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}.pdf" + + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "output_size": os.path.getsize(output_path), + } + + _cleanup(task_id) + + logger.info(f"Task {task_id}: Word→PDF conversion completed") + return result + + except PDFConversionError as e: + logger.error(f"Task {task_id}: Conversion error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} diff --git a/backend/app/tasks/image_tasks.py b/backend/app/tasks/image_tasks.py new file mode 100644 index 0000000..09a0e58 --- /dev/null +++ b/backend/app/tasks/image_tasks.py @@ -0,0 +1,160 @@ +"""Celery tasks for image processing.""" +import os +import logging + +from app.extensions import celery +from app.services.image_service import convert_image, resize_image, ImageProcessingError +from app.services.storage_service import storage +from app.utils.sanitizer import cleanup_task_files + + +def _cleanup(task_id: str): + cleanup_task_files(task_id, keep_outputs=not storage.use_s3) + +logger = logging.getLogger(__name__) + + +@celery.task(bind=True, name="app.tasks.image_tasks.convert_image_task") +def convert_image_task( + self, + input_path: str, + task_id: str, + original_filename: str, + output_format: str, + quality: int = 85, +): + """ + Async task: Convert an image to a different format. + + Args: + input_path: Path to the uploaded image + task_id: Unique task identifier + original_filename: Original filename for download + output_format: Target format ("jpg", "png", "webp") + quality: Output quality 1-100 + + Returns: + dict with download_url and conversion stats + """ + output_dir = os.path.join("/tmp/outputs", task_id) + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, f"{task_id}.{output_format}") + + try: + self.update_state( + state="PROCESSING", + meta={"step": f"Converting image to {output_format.upper()}..."}, + ) + + stats = convert_image(input_path, output_path, output_format, quality) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}.{output_format}" + + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "original_size": stats["original_size"], + "converted_size": stats["converted_size"], + "width": stats["width"], + "height": stats["height"], + "format": stats["format"], + } + + _cleanup(task_id) + + logger.info(f"Task {task_id}: Image conversion to {output_format} completed") + return result + + except ImageProcessingError as e: + logger.error(f"Task {task_id}: Image error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} + + +@celery.task(bind=True, name="app.tasks.image_tasks.resize_image_task") +def resize_image_task( + self, + input_path: str, + task_id: str, + original_filename: str, + width: int | None = None, + height: int | None = None, + quality: int = 85, +): + """ + Async task: Resize an image. + + Args: + input_path: Path to the uploaded image + task_id: Unique task identifier + original_filename: Original filename for download + width: Target width + height: Target height + quality: Output quality 1-100 + + Returns: + dict with download_url and resize info + """ + ext = os.path.splitext(original_filename)[1].lstrip(".") + output_dir = os.path.join("/tmp/outputs", task_id) + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, f"{task_id}.{ext}") + + try: + self.update_state( + state="PROCESSING", + meta={"step": "Resizing image..."}, + ) + + stats = resize_image(input_path, output_path, width, height, quality) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}_resized.{ext}" + + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "original_width": stats["original_width"], + "original_height": stats["original_height"], + "new_width": stats["new_width"], + "new_height": stats["new_height"], + } + + _cleanup(task_id) + + logger.info(f"Task {task_id}: Image resize completed") + return result + + except ImageProcessingError as e: + logger.error(f"Task {task_id}: Image error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} diff --git a/backend/app/tasks/video_tasks.py b/backend/app/tasks/video_tasks.py new file mode 100644 index 0000000..61fc47c --- /dev/null +++ b/backend/app/tasks/video_tasks.py @@ -0,0 +1,96 @@ +"""Celery tasks for video processing.""" +import os +import logging + +from app.extensions import celery +from app.services.video_service import video_to_gif, VideoProcessingError +from app.services.storage_service import storage +from app.utils.sanitizer import cleanup_task_files + + +def _cleanup(task_id: str): + cleanup_task_files(task_id, keep_outputs=not storage.use_s3) + +logger = logging.getLogger(__name__) + + +@celery.task(bind=True, name="app.tasks.video_tasks.create_gif_task") +def create_gif_task( + self, + input_path: str, + task_id: str, + original_filename: str, + start_time: float = 0, + duration: float = 5, + fps: int = 10, + width: int = 480, +): + """ + Async task: Convert video clip to animated GIF. + + Args: + input_path: Path to the uploaded video + task_id: Unique task identifier + original_filename: Original filename for download + start_time: Start time in seconds + duration: Duration in seconds + fps: Frames per second + width: Output width in pixels + + Returns: + dict with download_url and GIF info + """ + output_dir = os.path.join("/tmp/outputs", task_id) + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, f"{task_id}.gif") + + try: + self.update_state( + state="PROCESSING", + meta={"step": "Creating GIF from video..."}, + ) + + stats = video_to_gif( + input_path, output_path, + start_time=start_time, + duration=duration, + fps=fps, + width=width, + ) + + self.update_state(state="PROCESSING", meta={"step": "Uploading result..."}) + + s3_key = storage.upload_file(output_path, task_id, folder="outputs") + + name_without_ext = os.path.splitext(original_filename)[0] + download_name = f"{name_without_ext}.gif" + + download_url = storage.generate_presigned_url( + s3_key, original_filename=download_name + ) + + result = { + "status": "completed", + "download_url": download_url, + "filename": download_name, + "output_size": stats["output_size"], + "duration": stats["duration"], + "fps": stats["fps"], + "width": stats["width"], + "height": stats["height"], + } + + _cleanup(task_id) + + logger.info(f"Task {task_id}: Video→GIF creation completed") + return result + + except VideoProcessingError as e: + logger.error(f"Task {task_id}: Video error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": str(e)} + + except Exception as e: + logger.error(f"Task {task_id}: Unexpected error — {e}") + _cleanup(task_id) + return {"status": "failed", "error": "An unexpected error occurred."} diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py new file mode 100644 index 0000000..09e9b51 --- /dev/null +++ b/backend/app/utils/__init__.py @@ -0,0 +1 @@ +"""Backend application utilities.""" diff --git a/backend/app/utils/cleanup.py b/backend/app/utils/cleanup.py new file mode 100644 index 0000000..f3112e6 --- /dev/null +++ b/backend/app/utils/cleanup.py @@ -0,0 +1,31 @@ +"""Scheduled cleanup of expired temporary files.""" +import os +import shutil +import time + +from flask import current_app + + +def cleanup_expired_files(): + """Remove files older than FILE_EXPIRY_SECONDS from upload/output dirs.""" + expiry = current_app.config.get("FILE_EXPIRY_SECONDS", 1800) + now = time.time() + removed_count = 0 + + for folder_key in ["UPLOAD_FOLDER", "OUTPUT_FOLDER"]: + folder = current_app.config.get(folder_key) + if not folder or not os.path.exists(folder): + continue + + for task_dir_name in os.listdir(folder): + task_dir = os.path.join(folder, task_dir_name) + if not os.path.isdir(task_dir): + continue + + # Check directory age based on modification time + dir_mtime = os.path.getmtime(task_dir) + if now - dir_mtime > expiry: + shutil.rmtree(task_dir, ignore_errors=True) + removed_count += 1 + + return removed_count diff --git a/backend/app/utils/file_validator.py b/backend/app/utils/file_validator.py new file mode 100644 index 0000000..00b7d11 --- /dev/null +++ b/backend/app/utils/file_validator.py @@ -0,0 +1,111 @@ +"""File validation utilities — multi-layer security checks.""" +import os + +import magic +from flask import current_app +from werkzeug.utils import secure_filename + + +class FileValidationError(Exception): + """Custom exception for file validation failures.""" + + def __init__(self, message: str, code: int = 400): + self.message = message + self.code = code + super().__init__(self.message) + + +def validate_file(file_storage, allowed_types: list[str] | None = None): + """ + Validate an uploaded file through multiple security layers. + + Args: + file_storage: Flask FileStorage object from request.files + allowed_types: List of allowed extensions (e.g., ["pdf", "docx"]). + If None, uses all allowed extensions from config. + + Returns: + tuple: (sanitized_filename, detected_extension) + + Raises: + FileValidationError: If validation fails at any layer. + """ + config = current_app.config + + # Layer 1: Check if file exists and has a filename + if not file_storage or file_storage.filename == "": + raise FileValidationError("No file provided.") + + filename = secure_filename(file_storage.filename) + if not filename: + raise FileValidationError("Invalid filename.") + + # Layer 2: Check file extension against whitelist + ext = _get_extension(filename) + allowed_extensions = config.get("ALLOWED_EXTENSIONS", {}) + + if allowed_types: + valid_extensions = {k: v for k, v in allowed_extensions.items() if k in allowed_types} + else: + valid_extensions = allowed_extensions + + if ext not in valid_extensions: + raise FileValidationError( + f"File type '.{ext}' is not allowed. " + f"Allowed types: {', '.join(valid_extensions.keys())}" + ) + + # Layer 3: Check file size against type-specific limits + file_storage.seek(0, os.SEEK_END) + file_size = file_storage.tell() + file_storage.seek(0) + + size_limits = config.get("FILE_SIZE_LIMITS", {}) + max_size = size_limits.get(ext, 20 * 1024 * 1024) # Default 20MB + + if file_size > max_size: + max_mb = max_size / (1024 * 1024) + raise FileValidationError( + f"File too large. Maximum size for .{ext} files is {max_mb:.0f}MB." + ) + + if file_size == 0: + raise FileValidationError("File is empty.") + + # Layer 4: Check MIME type using magic bytes + file_header = file_storage.read(8192) + file_storage.seek(0) + + detected_mime = magic.from_buffer(file_header, mime=True) + expected_mimes = valid_extensions.get(ext, []) + + if detected_mime not in expected_mimes: + raise FileValidationError( + f"File content does not match extension '.{ext}'. " + f"Detected type: {detected_mime}" + ) + + # Layer 5: Additional content checks for specific types + if ext == "pdf": + _check_pdf_safety(file_header) + + return filename, ext + + +def _get_extension(filename: str) -> str: + """Extract and normalize file extension.""" + if "." not in filename: + return "" + return filename.rsplit(".", 1)[1].lower() + + +def _check_pdf_safety(file_header: bytes): + """Check PDF for potentially dangerous embedded content.""" + dangerous_patterns = [b"/JS", b"/JavaScript", b"/Launch", b"/EmbeddedFile"] + header_str = file_header + + for pattern in dangerous_patterns: + if pattern in header_str: + raise FileValidationError( + "PDF contains potentially unsafe content (embedded scripts)." + ) diff --git a/backend/app/utils/sanitizer.py b/backend/app/utils/sanitizer.py new file mode 100644 index 0000000..740335b --- /dev/null +++ b/backend/app/utils/sanitizer.py @@ -0,0 +1,77 @@ +"""Filename sanitization and temporary file management.""" +import os +import uuid + +from flask import current_app + + +def generate_safe_path(extension: str, folder_type: str = "upload") -> tuple[str, str]: + """ + Generate a safe file path using UUID. + + Args: + extension: File extension (without dot) + folder_type: "upload" for input files, "output" for processed files + + Returns: + tuple: (task_id, full_file_path) + """ + task_id = str(uuid.uuid4()) + + if folder_type == "upload": + base_dir = current_app.config["UPLOAD_FOLDER"] + else: + base_dir = current_app.config["OUTPUT_FOLDER"] + + # Create task-specific directory + task_dir = os.path.join(base_dir, task_id) + os.makedirs(task_dir, exist_ok=True) + + filename = f"{task_id}.{extension}" + file_path = os.path.join(task_dir, filename) + + return task_id, file_path + + +def get_output_path(task_id: str, extension: str) -> str: + """ + Get the output file path for a processed file. + + Args: + task_id: The task UUID + extension: Output file extension + + Returns: + Full output file path + """ + output_dir = current_app.config["OUTPUT_FOLDER"] + task_dir = os.path.join(output_dir, task_id) + os.makedirs(task_dir, exist_ok=True) + + filename = f"{task_id}.{extension}" + return os.path.join(task_dir, filename) + + +def cleanup_task_files(task_id: str, keep_outputs: bool = False): + """ + Remove temporary files for a given task. + + Args: + task_id: The task UUID + keep_outputs: If True, only clean uploads (used in local storage mode) + """ + import shutil + + upload_dir = current_app.config.get("UPLOAD_FOLDER", "/tmp/uploads") + output_dir = current_app.config.get("OUTPUT_FOLDER", "/tmp/outputs") + + # Always clean uploads + upload_task_dir = os.path.join(upload_dir, task_id) + if os.path.exists(upload_task_dir): + shutil.rmtree(upload_task_dir, ignore_errors=True) + + # Only clean outputs when using S3 (files already uploaded to S3) + if not keep_outputs: + output_task_dir = os.path.join(output_dir, task_id) + if os.path.exists(output_task_dir): + shutil.rmtree(output_task_dir, ignore_errors=True) diff --git a/backend/celery_worker.py b/backend/celery_worker.py new file mode 100644 index 0000000..7af6ccf --- /dev/null +++ b/backend/celery_worker.py @@ -0,0 +1,11 @@ +"""Celery worker entry point.""" +from app import create_app +from app.extensions import celery + +app = create_app() + +# Import all tasks so Celery discovers them +import app.tasks.convert_tasks # noqa: F401 +import app.tasks.compress_tasks # noqa: F401 +import app.tasks.image_tasks # noqa: F401 +import app.tasks.video_tasks # noqa: F401 diff --git a/backend/config/__init__.py b/backend/config/__init__.py new file mode 100644 index 0000000..1129968 --- /dev/null +++ b/backend/config/__init__.py @@ -0,0 +1,93 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + + +class BaseConfig: + """Base configuration.""" + SECRET_KEY = os.getenv("SECRET_KEY", "change-me-in-production") + + # File upload settings + MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH_MB", 50)) * 1024 * 1024 + UPLOAD_FOLDER = os.getenv("UPLOAD_FOLDER", "/tmp/uploads") + OUTPUT_FOLDER = os.getenv("OUTPUT_FOLDER", "/tmp/outputs") + FILE_EXPIRY_SECONDS = int(os.getenv("FILE_EXPIRY_SECONDS", 1800)) + + # Allowed file extensions and MIME types + ALLOWED_EXTENSIONS = { + "pdf": ["application/pdf"], + "doc": ["application/msword"], + "docx": [ + "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ], + "png": ["image/png"], + "jpg": ["image/jpeg"], + "jpeg": ["image/jpeg"], + "webp": ["image/webp"], + "mp4": ["video/mp4"], + "webm": ["video/webm"], + } + + # File size limits per type (bytes) + FILE_SIZE_LIMITS = { + "pdf": 20 * 1024 * 1024, # 20MB + "doc": 15 * 1024 * 1024, # 15MB + "docx": 15 * 1024 * 1024, # 15MB + "png": 10 * 1024 * 1024, # 10MB + "jpg": 10 * 1024 * 1024, # 10MB + "jpeg": 10 * 1024 * 1024, # 10MB + "webp": 10 * 1024 * 1024, # 10MB + "mp4": 50 * 1024 * 1024, # 50MB + "webm": 50 * 1024 * 1024, # 50MB + } + + # Redis + REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0") + + # Celery + CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0") + CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/1") + + # AWS S3 + AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") + AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") + AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET", "saas-pdf-temp-files") + AWS_S3_REGION = os.getenv("AWS_S3_REGION", "eu-west-1") + + # CORS + CORS_ORIGINS = os.getenv("CORS_ORIGINS", "http://localhost:5173").split(",") + + # Rate Limiting + RATELIMIT_STORAGE_URI = os.getenv("REDIS_URL", "redis://redis:6379/0") + RATELIMIT_DEFAULT = "100/hour" + + +class DevelopmentConfig(BaseConfig): + """Development configuration.""" + DEBUG = True + TESTING = False + + +class ProductionConfig(BaseConfig): + """Production configuration.""" + DEBUG = False + TESTING = False + # Stricter rate limits in production + RATELIMIT_DEFAULT = "60/hour" + + +class TestingConfig(BaseConfig): + """Testing configuration.""" + DEBUG = True + TESTING = True + UPLOAD_FOLDER = "/tmp/test_uploads" + OUTPUT_FOLDER = "/tmp/test_outputs" + + +config = { + "development": DevelopmentConfig, + "production": ProductionConfig, + "testing": TestingConfig, + "default": DevelopmentConfig, +} diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..bf87b44 --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,27 @@ +# Core Framework +flask>=3.0,<4.0 +flask-cors>=4.0,<5.0 +flask-limiter[redis]>=3.5,<4.0 +flask-talisman>=1.1,<2.0 +gunicorn>=22.0,<23.0 +python-dotenv>=1.0,<2.0 + +# Task Queue +celery[redis]>=5.3,<6.0 +redis>=5.0,<6.0 +flower>=2.0,<3.0 + +# File Processing +Pillow>=10.0,<11.0 +python-magic>=0.4.27,<1.0 +ffmpeg-python>=0.2,<1.0 + +# AWS +boto3>=1.34,<2.0 + +# Security +werkzeug>=3.0,<4.0 + +# Testing +pytest>=8.0,<9.0 +pytest-flask>=1.3,<2.0 diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..7356692 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,26 @@ +import os +import pytest +from app import create_app + + +@pytest.fixture +def app(): + """Create application for testing.""" + os.environ['FLASK_ENV'] = 'testing' + app = create_app() + app.config.update({ + 'TESTING': True, + }) + yield app + + +@pytest.fixture +def client(app): + """Flask test client.""" + return app.test_client() + + +@pytest.fixture +def runner(app): + """Flask test CLI runner.""" + return app.test_cli_runner() diff --git a/backend/tests/test_compress.py b/backend/tests/test_compress.py new file mode 100644 index 0000000..df6e543 --- /dev/null +++ b/backend/tests/test_compress.py @@ -0,0 +1,21 @@ +"""Tests for PDF compression endpoint.""" +import io + + +def test_compress_pdf_no_file(client): + """POST /api/compress/pdf without file should return 400.""" + response = client.post('/api/compress/pdf') + assert response.status_code == 400 + + +def test_compress_pdf_wrong_extension(client): + """POST /api/compress/pdf with non-PDF should return 400.""" + data = { + 'file': (io.BytesIO(b'hello'), 'test.docx'), + } + response = client.post( + '/api/compress/pdf', + data=data, + content_type='multipart/form-data', + ) + assert response.status_code == 400 diff --git a/backend/tests/test_convert.py b/backend/tests/test_convert.py new file mode 100644 index 0000000..713dc4e --- /dev/null +++ b/backend/tests/test_convert.py @@ -0,0 +1,42 @@ +"""Tests for file conversion endpoints.""" +import io + + +def test_pdf_to_word_no_file(client): + """POST /api/convert/pdf-to-word without file should return 400.""" + response = client.post('/api/convert/pdf-to-word') + assert response.status_code == 400 + data = response.get_json() + assert 'error' in data + + +def test_pdf_to_word_wrong_extension(client): + """POST /api/convert/pdf-to-word with non-PDF should return 400.""" + data = { + 'file': (io.BytesIO(b'hello world'), 'test.txt'), + } + response = client.post( + '/api/convert/pdf-to-word', + data=data, + content_type='multipart/form-data', + ) + assert response.status_code == 400 + + +def test_word_to_pdf_no_file(client): + """POST /api/convert/word-to-pdf without file should return 400.""" + response = client.post('/api/convert/word-to-pdf') + assert response.status_code == 400 + + +def test_word_to_pdf_wrong_extension(client): + """POST /api/convert/word-to-pdf with non-Word file should return 400.""" + data = { + 'file': (io.BytesIO(b'hello world'), 'test.pdf'), + } + response = client.post( + '/api/convert/word-to-pdf', + data=data, + content_type='multipart/form-data', + ) + assert response.status_code == 400 diff --git a/backend/tests/test_health.py b/backend/tests/test_health.py new file mode 100644 index 0000000..92c78ca --- /dev/null +++ b/backend/tests/test_health.py @@ -0,0 +1,15 @@ +"""Tests for health check and app creation.""" + + +def test_health_endpoint(client): + """GET /api/health should return 200.""" + response = client.get('/api/health') + assert response.status_code == 200 + data = response.get_json() + assert data['status'] == 'healthy' + + +def test_app_creates(app): + """App should create without errors.""" + assert app is not None + assert app.config['TESTING'] is True diff --git a/backend/tests/test_image.py b/backend/tests/test_image.py new file mode 100644 index 0000000..53f7292 --- /dev/null +++ b/backend/tests/test_image.py @@ -0,0 +1,27 @@ +"""Tests for image conversion & resize endpoints.""" +import io + + +def test_image_convert_no_file(client): + """POST /api/image/convert without file should return 400.""" + response = client.post('/api/image/convert') + assert response.status_code == 400 + + +def test_image_resize_no_file(client): + """POST /api/image/resize without file should return 400.""" + response = client.post('/api/image/resize') + assert response.status_code == 400 + + +def test_image_convert_wrong_type(client): + """POST /api/image/convert with non-image should return 400.""" + data = { + 'file': (io.BytesIO(b'not an image'), 'test.pdf'), + } + response = client.post( + '/api/image/convert', + data=data, + content_type='multipart/form-data', + ) + assert response.status_code == 400 diff --git a/backend/tests/test_utils.py b/backend/tests/test_utils.py new file mode 100644 index 0000000..29b4383 --- /dev/null +++ b/backend/tests/test_utils.py @@ -0,0 +1,19 @@ +"""Tests for text utility functions.""" +import sys +import os + +# Add backend to path so we can import utils directly +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) + +from app.utils.file_validator import validate_file +from app.utils.sanitizer import generate_safe_path + + +def test_generate_safe_path(): + """generate_safe_path should produce UUID-based path.""" + path = generate_safe_path('uploads', 'test.pdf') + assert path.startswith('uploads') + assert path.endswith('.pdf') + # Should contain a UUID directory + parts = path.replace('\\', '/').split('/') + assert len(parts) >= 3 # uploads / uuid / filename.pdf diff --git a/backend/wsgi.py b/backend/wsgi.py new file mode 100644 index 0000000..66c7e81 --- /dev/null +++ b/backend/wsgi.py @@ -0,0 +1,7 @@ +"""WSGI entry point for Gunicorn.""" +from app import create_app + +app = create_app() + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=5000) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml new file mode 100644 index 0000000..4efd9b1 --- /dev/null +++ b/docker-compose.prod.yml @@ -0,0 +1,107 @@ +services: + # --- Redis --- + redis: + image: redis:7-alpine + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 5 + restart: always + + # --- Flask Backend --- + backend: + build: + context: ./backend + dockerfile: Dockerfile + env_file: + - .env + environment: + - FLASK_ENV=production + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + volumes: + - upload_data:/tmp/uploads + - output_data:/tmp/outputs + depends_on: + redis: + condition: service_healthy + restart: always + + # --- Celery Worker --- + celery_worker: + build: + context: ./backend + dockerfile: Dockerfile + command: > + celery -A celery_worker.celery worker + --loglevel=warning + --concurrency=4 + -Q default,convert,compress,image,video + env_file: + - .env + environment: + - FLASK_ENV=production + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + volumes: + - upload_data:/tmp/uploads + - output_data:/tmp/outputs + depends_on: + redis: + condition: service_healthy + restart: always + + # --- Celery Beat (Scheduled Tasks) --- + celery_beat: + build: + context: ./backend + dockerfile: Dockerfile + command: > + celery -A celery_worker.celery beat + --loglevel=warning + env_file: + - .env + environment: + - FLASK_ENV=production + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + depends_on: + redis: + condition: service_healthy + restart: always + + # --- Nginx (serves built frontend + reverse proxy) --- + nginx: + image: nginx:alpine + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx/nginx.prod.conf:/etc/nginx/conf.d/default.conf:ro + - frontend_build:/usr/share/nginx/html:ro + - ./nginx/ssl:/etc/nginx/ssl:ro + depends_on: + - backend + - frontend_build_step + restart: always + + # --- Frontend Build (one-shot) --- + frontend_build_step: + build: + context: ./frontend + dockerfile: Dockerfile + target: build + volumes: + - frontend_build:/app/dist + +volumes: + redis_data: + upload_data: + output_data: + frontend_build: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..98c56ca --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,99 @@ +services: + # --- Redis --- + redis: + image: redis:7-alpine + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 5 + + # --- Flask Backend --- + backend: + build: + context: ./backend + dockerfile: Dockerfile + ports: + - "5000:5000" + env_file: + - .env + environment: + - FLASK_ENV=development + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + volumes: + - ./backend:/app + - upload_data:/tmp/uploads + - output_data:/tmp/outputs + depends_on: + redis: + condition: service_healthy + restart: unless-stopped + + # --- Celery Worker --- + celery_worker: + build: + context: ./backend + dockerfile: Dockerfile + command: > + celery -A celery_worker.celery worker + --loglevel=info + --concurrency=2 + -Q default,convert,compress,image,video + env_file: + - .env + environment: + - FLASK_ENV=development + - REDIS_URL=redis://redis:6379/0 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + volumes: + - ./backend:/app + - upload_data:/tmp/uploads + - output_data:/tmp/outputs + depends_on: + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "celery", "-A", "celery_worker.celery", "inspect", "ping"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + restart: unless-stopped + + # --- React Frontend (Vite Dev) --- + frontend: + build: + context: ./frontend + dockerfile: Dockerfile + target: development + ports: + - "5173:5173" + volumes: + - ./frontend:/app + - /app/node_modules + environment: + - NODE_ENV=development + + # --- Nginx Reverse Proxy --- + nginx: + image: nginx:alpine + ports: + - "80:80" + volumes: + - ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf:ro + depends_on: + - backend + - frontend + restart: unless-stopped + +volumes: + redis_data: + upload_data: + output_data: diff --git a/docs/Plan-1.md b/docs/Plan-1.md new file mode 100644 index 0000000..e69de29 diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..5bd129a --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,41 @@ +# ---- Build Stage ---- +FROM node:20-alpine AS build + +WORKDIR /app + +# Install dependencies +COPY package.json ./ +RUN npm install + +# Copy source code +COPY . . + +# Build for production +RUN npm run build + +# ---- Production Stage ---- +FROM nginx:alpine AS production + +# Copy built assets +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx config for SPA routing +COPY nginx-frontend.conf /etc/nginx/conf.d/default.conf + +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] + +# ---- Development Stage ---- +FROM node:20-alpine AS development + +WORKDIR /app + +COPY package.json ./ +RUN npm install + +COPY . . + +EXPOSE 5173 + +CMD ["npm", "run", "dev", "--", "--host", "0.0.0.0"] diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..650c883 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,17 @@ + + + + + + + + + + + SaaS-PDF — Free Online File Tools + + +
+ + + diff --git a/frontend/nginx-frontend.conf b/frontend/nginx-frontend.conf new file mode 100644 index 0000000..2a75a2a --- /dev/null +++ b/frontend/nginx-frontend.conf @@ -0,0 +1,21 @@ +server { + listen 80; + root /usr/share/nginx/html; + index index.html; + + # SPA fallback + location / { + try_files $uri $uri/ /index.html; + } + + # Cache static assets + location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-XSS-Protection "1; mode=block" always; +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..d45231f --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,38 @@ +{ + "name": "saas-pdf-frontend", + "private": true, + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc --noEmit && vite build", + "preview": "vite preview", + "lint": "eslint ." + }, + "dependencies": { + "axios": "^1.7.0", + "i18next": "^23.11.0", + "i18next-browser-languagedetector": "^8.0.0", + "lucide-react": "^0.400.0", + "react": "^18.3.0", + "react-dom": "^18.3.0", + "react-dropzone": "^14.2.0", + "react-ga4": "^2.1.0", + "react-helmet-async": "^2.0.0", + "react-i18next": "^14.1.0", + "react-router-dom": "^6.23.0", + "sonner": "^1.5.0", + "zustand": "^4.5.0" + }, + "devDependencies": { + "@types/node": "^20.14.0", + "@types/react": "^18.3.0", + "@types/react-dom": "^18.3.0", + "@vitejs/plugin-react": "^4.3.0", + "autoprefixer": "^10.4.0", + "postcss": "^8.4.0", + "tailwindcss": "^3.4.0", + "typescript": "^5.5.0", + "vite": "^5.4.0" + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..2aa7205 --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +}; diff --git a/frontend/public/ads.txt b/frontend/public/ads.txt new file mode 100644 index 0000000..4729ed9 --- /dev/null +++ b/frontend/public/ads.txt @@ -0,0 +1 @@ +google.com, pub-XXXXXXXXXXXXXXXX, DIRECT, f08c47fec0942fa0 diff --git a/frontend/public/favicon.svg b/frontend/public/favicon.svg new file mode 100644 index 0000000..1142db6 --- /dev/null +++ b/frontend/public/favicon.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/frontend/public/robots.txt b/frontend/public/robots.txt new file mode 100644 index 0000000..ac021d3 --- /dev/null +++ b/frontend/public/robots.txt @@ -0,0 +1,6 @@ +# robots.txt — SaaS-PDF +User-agent: * +Allow: / +Disallow: /api/ + +Sitemap: https://yourdomain.com/sitemap.xml diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx new file mode 100644 index 0000000..47c8fab --- /dev/null +++ b/frontend/src/App.tsx @@ -0,0 +1,71 @@ +import { lazy, Suspense } from 'react'; +import { Routes, Route } from 'react-router-dom'; +import Header from '@/components/layout/Header'; +import Footer from '@/components/layout/Footer'; +import { useDirection } from '@/hooks/useDirection'; + +// Pages +const HomePage = lazy(() => import('@/pages/HomePage')); +const AboutPage = lazy(() => import('@/pages/AboutPage')); +const PrivacyPage = lazy(() => import('@/pages/PrivacyPage')); +const NotFoundPage = lazy(() => import('@/pages/NotFoundPage')); +const TermsPage = lazy(() => import('@/pages/TermsPage')); + +// Tool Pages +const PdfToWord = lazy(() => import('@/components/tools/PdfToWord')); +const WordToPdf = lazy(() => import('@/components/tools/WordToPdf')); +const PdfCompressor = lazy(() => import('@/components/tools/PdfCompressor')); +const ImageConverter = lazy(() => import('@/components/tools/ImageConverter')); +const VideoToGif = lazy(() => import('@/components/tools/VideoToGif')); +const WordCounter = lazy(() => import('@/components/tools/WordCounter')); +const TextCleaner = lazy(() => import('@/components/tools/TextCleaner')); + +function LoadingFallback() { + return ( +
+
+
+ ); +} + +export default function App() { + useDirection(); + + return ( +
+
+ +
+ }> + + {/* Pages */} + } /> + } /> + } /> + } /> + + {/* PDF Tools */} + } /> + } /> + } /> + + {/* Image Tools */} + } /> + + {/* Video Tools */} + } /> + + {/* Text Tools */} + } /> + } /> + + {/* 404 */} + } /> + + +
+ +
+
+ ); +} diff --git a/frontend/src/components/layout/AdSlot.tsx b/frontend/src/components/layout/AdSlot.tsx new file mode 100644 index 0000000..0aaf4b8 --- /dev/null +++ b/frontend/src/components/layout/AdSlot.tsx @@ -0,0 +1,53 @@ +import { useEffect, useRef } from 'react'; + +interface AdSlotProps { + /** AdSense ad slot ID */ + slot: string; + /** Ad format: 'auto', 'rectangle', 'horizontal', 'vertical' */ + format?: string; + /** Responsive mode */ + responsive?: boolean; + /** Additional CSS class */ + className?: string; +} + +/** + * Google AdSense ad slot component. + * Loads the ad unit once and handles cleanup. + */ +export default function AdSlot({ + slot, + format = 'auto', + responsive = true, + className = '', +}: AdSlotProps) { + const adRef = useRef(null); + const isLoaded = useRef(false); + + useEffect(() => { + if (isLoaded.current) return; + + try { + // Push ad to AdSense queue + const adsbygoogle = (window as any).adsbygoogle || []; + adsbygoogle.push({}); + isLoaded.current = true; + } catch { + // AdSense not loaded (e.g., ad blocker) + } + }, []); + + return ( +
+ +
+ ); +} diff --git a/frontend/src/components/layout/Footer.tsx b/frontend/src/components/layout/Footer.tsx new file mode 100644 index 0000000..5e5df3f --- /dev/null +++ b/frontend/src/components/layout/Footer.tsx @@ -0,0 +1,45 @@ +import { Link } from 'react-router-dom'; +import { useTranslation } from 'react-i18next'; +import { FileText } from 'lucide-react'; + +export default function Footer() { + const { t } = useTranslation(); + + return ( +
+
+
+ {/* Brand */} +
+ + + © {new Date().getFullYear()} {t('common.appName')} + +
+ + {/* Links */} +
+ + {t('common.privacy')} + + + {t('common.terms')} + + + {t('common.about')} + +
+
+
+
+ ); +} diff --git a/frontend/src/components/layout/Header.tsx b/frontend/src/components/layout/Header.tsx new file mode 100644 index 0000000..92bc57a --- /dev/null +++ b/frontend/src/components/layout/Header.tsx @@ -0,0 +1,50 @@ +import { Link } from 'react-router-dom'; +import { useTranslation } from 'react-i18next'; +import { FileText, Globe } from 'lucide-react'; + +export default function Header() { + const { t, i18n } = useTranslation(); + + const toggleLanguage = () => { + const newLang = i18n.language === 'ar' ? 'en' : 'ar'; + i18n.changeLanguage(newLang); + }; + + return ( +
+
+ {/* Logo */} + + + {t('common.appName')} + + + {/* Navigation */} + + + {/* Language Toggle */} + +
+
+ ); +} diff --git a/frontend/src/components/shared/DownloadButton.tsx b/frontend/src/components/shared/DownloadButton.tsx new file mode 100644 index 0000000..edd8533 --- /dev/null +++ b/frontend/src/components/shared/DownloadButton.tsx @@ -0,0 +1,88 @@ +import { useTranslation } from 'react-i18next'; +import { Download, RotateCcw, Clock } from 'lucide-react'; +import type { TaskResult } from '@/services/api'; +import { formatFileSize } from '@/utils/textTools'; + +interface DownloadButtonProps { + /** Task result containing download URL */ + result: TaskResult; + /** Called when user wants to start over */ + onStartOver: () => void; +} + +export default function DownloadButton({ result, onStartOver }: DownloadButtonProps) { + const { t } = useTranslation(); + + if (!result.download_url) return null; + + return ( +
+ {/* Success header */} +
+

+ {t('result.conversionComplete')} +

+

+ {t('result.downloadReady')} +

+
+ + {/* File stats */} + {(result.original_size || result.compressed_size) && ( +
+ {result.original_size && ( +
+

{t('result.originalSize')}

+

+ {formatFileSize(result.original_size)} +

+
+ )} + {result.compressed_size && ( +
+

{t('result.newSize')}

+

+ {formatFileSize(result.compressed_size)} +

+
+ )} + {result.reduction_percent !== undefined && ( +
+

{t('result.reduction')}

+

+ {result.reduction_percent}% +

+
+ )} +
+ )} + + {/* Download button */} + + + {t('common.download')} — {result.filename} + + + {/* Expiry notice */} +
+ + {t('result.linkExpiry')} +
+ + {/* Start over */} + +
+ ); +} diff --git a/frontend/src/components/shared/FileUploader.tsx b/frontend/src/components/shared/FileUploader.tsx new file mode 100644 index 0000000..f775897 --- /dev/null +++ b/frontend/src/components/shared/FileUploader.tsx @@ -0,0 +1,132 @@ +import { useCallback } from 'react'; +import { useDropzone, type Accept } from 'react-dropzone'; +import { useTranslation } from 'react-i18next'; +import { Upload, File, X } from 'lucide-react'; +import { formatFileSize } from '@/utils/textTools'; + +interface FileUploaderProps { + /** Called when a file is selected/dropped */ + onFileSelect: (file: File) => void; + /** Currently selected file */ + file: File | null; + /** Accepted MIME types */ + accept?: Accept; + /** Maximum file size in MB */ + maxSizeMB?: number; + /** Whether upload is in progress */ + isUploading?: boolean; + /** Upload progress percentage */ + uploadProgress?: number; + /** Error message */ + error?: string | null; + /** Reset handler */ + onReset?: () => void; + /** Descriptive text for accepted file types */ + acceptLabel?: string; +} + +export default function FileUploader({ + onFileSelect, + file, + accept, + maxSizeMB = 20, + isUploading = false, + uploadProgress = 0, + error, + onReset, + acceptLabel, +}: FileUploaderProps) { + const { t } = useTranslation(); + + const onDrop = useCallback( + (acceptedFiles: File[]) => { + if (acceptedFiles.length > 0) { + onFileSelect(acceptedFiles[0]); + } + }, + [onFileSelect] + ); + + const { getRootProps, getInputProps, isDragActive } = useDropzone({ + onDrop, + accept, + maxFiles: 1, + maxSize: maxSizeMB * 1024 * 1024, + disabled: isUploading, + }); + + return ( +
+ {/* Drop Zone */} + {!file && ( +
+ + +

+ {t('common.dragDrop')} +

+ {acceptLabel && ( +

{acceptLabel}

+ )} +

+ {t('common.maxSize', { size: maxSizeMB })} +

+
+ )} + + {/* Selected File */} + {file && !isUploading && ( +
+ +
+

+ {file.name} +

+

{formatFileSize(file.size)}

+
+ {onReset && ( + + )} +
+ )} + + {/* Upload Progress */} + {isUploading && ( +
+
+ + {t('common.upload')}... + + {uploadProgress}% +
+
+
+
+
+ )} + + {/* Error */} + {error && ( +
+

{error}

+
+ )} +
+ ); +} diff --git a/frontend/src/components/shared/ProgressBar.tsx b/frontend/src/components/shared/ProgressBar.tsx new file mode 100644 index 0000000..6cf1002 --- /dev/null +++ b/frontend/src/components/shared/ProgressBar.tsx @@ -0,0 +1,42 @@ +import { useTranslation } from 'react-i18next'; +import { Loader2, CheckCircle2 } from 'lucide-react'; + +interface ProgressBarProps { + /** Current task state */ + state: 'PENDING' | 'PROCESSING' | 'SUCCESS' | 'FAILURE' | string; + /** Progress message */ + message?: string; +} + +export default function ProgressBar({ state, message }: ProgressBarProps) { + const { t } = useTranslation(); + + const isActive = state === 'PENDING' || state === 'PROCESSING'; + const isComplete = state === 'SUCCESS'; + + return ( +
+
+ {isActive && ( + + )} + {isComplete && ( + + )} + +
+

+ {message || t('common.processing')} +

+
+
+ + {/* Animated progress bar for active states */} + {isActive && ( +
+
+
+ )} +
+ ); +} diff --git a/frontend/src/components/shared/ToolCard.tsx b/frontend/src/components/shared/ToolCard.tsx new file mode 100644 index 0000000..b541434 --- /dev/null +++ b/frontend/src/components/shared/ToolCard.tsx @@ -0,0 +1,43 @@ +import { Link } from 'react-router-dom'; +import type { ReactNode } from 'react'; + +interface ToolCardProps { + /** Tool route path */ + to: string; + /** Tool title */ + title: string; + /** Short description */ + description: string; + /** Pre-rendered icon element */ + icon: ReactNode; + /** Icon background color class */ + bgColor: string; +} + +export default function ToolCard({ + to, + title, + description, + icon, + bgColor, +}: ToolCardProps) { + return ( + +
+
+ {icon} +
+
+

+ {title} +

+

+ {description} +

+
+
+ + ); +} diff --git a/frontend/src/components/tools/ImageConverter.tsx b/frontend/src/components/tools/ImageConverter.tsx new file mode 100644 index 0000000..6efbd4f --- /dev/null +++ b/frontend/src/components/tools/ImageConverter.tsx @@ -0,0 +1,176 @@ +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Helmet } from 'react-helmet-async'; +import { ImageIcon } from 'lucide-react'; +import FileUploader from '@/components/shared/FileUploader'; +import ProgressBar from '@/components/shared/ProgressBar'; +import DownloadButton from '@/components/shared/DownloadButton'; +import AdSlot from '@/components/layout/AdSlot'; +import { useFileUpload } from '@/hooks/useFileUpload'; +import { useTaskPolling } from '@/hooks/useTaskPolling'; +import { generateToolSchema } from '@/utils/seo'; + +type OutputFormat = 'jpg' | 'png' | 'webp'; + +export default function ImageConverter() { + const { t } = useTranslation(); + const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload'); + const [format, setFormat] = useState('jpg'); + const [quality, setQuality] = useState(85); + + const { + file, + uploadProgress, + isUploading, + taskId, + error: uploadError, + selectFile, + startUpload, + reset, + } = useFileUpload({ + endpoint: '/image/convert', + maxSizeMB: 10, + acceptedTypes: ['png', 'jpg', 'jpeg', 'webp'], + extraData: { format, quality: quality.toString() }, + }); + + const { status, result, error: taskError } = useTaskPolling({ + taskId, + onComplete: () => setPhase('done'), + onError: () => setPhase('done'), + }); + + const handleUpload = async () => { + const id = await startUpload(); + if (id) setPhase('processing'); + }; + + const handleReset = () => { + reset(); + setPhase('upload'); + }; + + const formats: { value: OutputFormat; label: string }[] = [ + { value: 'jpg', label: 'JPG' }, + { value: 'png', label: 'PNG' }, + { value: 'webp', label: 'WebP' }, + ]; + + const schema = generateToolSchema({ + name: t('tools.imageConvert.title'), + description: t('tools.imageConvert.description'), + url: `${window.location.origin}/tools/image-converter`, + }); + + return ( + <> + + {t('tools.imageConvert.title')} — {t('common.appName')} + + + + + +
+
+
+ +
+

{t('tools.imageConvert.title')}

+

{t('tools.imageConvert.description')}

+
+ + + + {phase === 'upload' && ( +
+ + + {file && !isUploading && ( + <> + {/* Format Selector */} +
+ +
+ {formats.map((f) => ( + + ))} +
+
+ + {/* Quality Slider (for lossy formats) */} + {format !== 'png' && ( +
+ + setQuality(Number(e.target.value))} + className="w-full accent-primary-600" + /> +
+ )} + + + + )} +
+ )} + + {phase === 'processing' && !result && ( + + )} + + {phase === 'done' && result && result.status === 'completed' && ( + + )} + + {phase === 'done' && taskError && ( +
+
+

{taskError}

+
+ +
+ )} + + +
+ + ); +} diff --git a/frontend/src/components/tools/PdfCompressor.tsx b/frontend/src/components/tools/PdfCompressor.tsx new file mode 100644 index 0000000..5d5f421 --- /dev/null +++ b/frontend/src/components/tools/PdfCompressor.tsx @@ -0,0 +1,148 @@ +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Helmet } from 'react-helmet-async'; +import { Minimize2 } from 'lucide-react'; +import FileUploader from '@/components/shared/FileUploader'; +import ProgressBar from '@/components/shared/ProgressBar'; +import DownloadButton from '@/components/shared/DownloadButton'; +import AdSlot from '@/components/layout/AdSlot'; +import { useFileUpload } from '@/hooks/useFileUpload'; +import { useTaskPolling } from '@/hooks/useTaskPolling'; +import { generateToolSchema } from '@/utils/seo'; + +type Quality = 'low' | 'medium' | 'high'; + +export default function PdfCompressor() { + const { t } = useTranslation(); + const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload'); + const [quality, setQuality] = useState('medium'); + + const { + file, + uploadProgress, + isUploading, + taskId, + error: uploadError, + selectFile, + startUpload, + reset, + } = useFileUpload({ + endpoint: '/compress/pdf', + maxSizeMB: 20, + acceptedTypes: ['pdf'], + extraData: { quality }, + }); + + const { status, result, error: taskError } = useTaskPolling({ + taskId, + onComplete: () => setPhase('done'), + onError: () => setPhase('done'), + }); + + const handleUpload = async () => { + const id = await startUpload(); + if (id) setPhase('processing'); + }; + + const handleReset = () => { + reset(); + setPhase('upload'); + }; + + const qualityOptions: { value: Quality; label: string; desc: string }[] = [ + { value: 'low', label: t('tools.compressPdf.qualityLow'), desc: '72 DPI' }, + { value: 'medium', label: t('tools.compressPdf.qualityMedium'), desc: '150 DPI' }, + { value: 'high', label: t('tools.compressPdf.qualityHigh'), desc: '300 DPI' }, + ]; + + const schema = generateToolSchema({ + name: t('tools.compressPdf.title'), + description: t('tools.compressPdf.description'), + url: `${window.location.origin}/tools/compress-pdf`, + }); + + return ( + <> + + {t('tools.compressPdf.title')} — {t('common.appName')} + + + + + +
+
+
+ +
+

{t('tools.compressPdf.title')}

+

{t('tools.compressPdf.description')}

+
+ + + + {phase === 'upload' && ( +
+ + + {/* Quality Selector */} + {file && !isUploading && ( + <> +
+ {qualityOptions.map((opt) => ( + + ))} +
+ + + )} +
+ )} + + {phase === 'processing' && !result && ( + + )} + + {phase === 'done' && result && result.status === 'completed' && ( + + )} + + {phase === 'done' && taskError && ( +
+
+

{taskError}

+
+ +
+ )} + + +
+ + ); +} diff --git a/frontend/src/components/tools/PdfToWord.tsx b/frontend/src/components/tools/PdfToWord.tsx new file mode 100644 index 0000000..d8c4f43 --- /dev/null +++ b/frontend/src/components/tools/PdfToWord.tsx @@ -0,0 +1,128 @@ +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Helmet } from 'react-helmet-async'; +import { FileText } from 'lucide-react'; +import FileUploader from '@/components/shared/FileUploader'; +import ProgressBar from '@/components/shared/ProgressBar'; +import DownloadButton from '@/components/shared/DownloadButton'; +import AdSlot from '@/components/layout/AdSlot'; +import { useFileUpload } from '@/hooks/useFileUpload'; +import { useTaskPolling } from '@/hooks/useTaskPolling'; +import { generateToolSchema } from '@/utils/seo'; + +export default function PdfToWord() { + const { t } = useTranslation(); + const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload'); + + const { + file, + uploadProgress, + isUploading, + taskId, + error: uploadError, + selectFile, + startUpload, + reset, + } = useFileUpload({ + endpoint: '/convert/pdf-to-word', + maxSizeMB: 20, + acceptedTypes: ['pdf'], + }); + + const { status, result, error: taskError } = useTaskPolling({ + taskId, + onComplete: () => setPhase('done'), + onError: () => setPhase('done'), + }); + + const handleUpload = async () => { + const id = await startUpload(); + if (id) setPhase('processing'); + }; + + const handleReset = () => { + reset(); + setPhase('upload'); + }; + + const schema = generateToolSchema({ + name: t('tools.pdfToWord.title'), + description: t('tools.pdfToWord.description'), + url: `${window.location.origin}/tools/pdf-to-word`, + }); + + return ( + <> + + {t('tools.pdfToWord.title')} — {t('common.appName')} + + + + + +
+ {/* Tool Header */} +
+
+ +
+

{t('tools.pdfToWord.title')}

+

{t('tools.pdfToWord.description')}

+
+ + {/* Ad Slot - Top */} + + + {/* Upload Phase */} + {phase === 'upload' && ( +
+ + {file && !isUploading && ( + + )} +
+ )} + + {/* Processing Phase */} + {phase === 'processing' && !result && ( + + )} + + {/* Done Phase */} + {phase === 'done' && result && result.status === 'completed' && ( + + )} + + {/* Error */} + {(phase === 'done' && taskError) && ( +
+
+

{taskError}

+
+ +
+ )} + + {/* Ad Slot - Bottom */} + +
+ + ); +} diff --git a/frontend/src/components/tools/TextCleaner.tsx b/frontend/src/components/tools/TextCleaner.tsx new file mode 100644 index 0000000..20d3fd7 --- /dev/null +++ b/frontend/src/components/tools/TextCleaner.tsx @@ -0,0 +1,146 @@ +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; +import { Helmet } from 'react-helmet-async'; +import { Eraser, Copy, Check } from 'lucide-react'; +import AdSlot from '@/components/layout/AdSlot'; +import { removeExtraSpaces, convertCase, removeDiacritics } from '@/utils/textTools'; +import { generateToolSchema } from '@/utils/seo'; + +export default function TextCleaner() { + const { t } = useTranslation(); + const [input, setInput] = useState(''); + const [output, setOutput] = useState(''); + const [copied, setCopied] = useState(false); + + const applyTransform = (type: string) => { + let result = input; + switch (type) { + case 'removeSpaces': + result = removeExtraSpaces(input); + break; + case 'upper': + result = convertCase(input, 'upper'); + break; + case 'lower': + result = convertCase(input, 'lower'); + break; + case 'title': + result = convertCase(input, 'title'); + break; + case 'sentence': + result = convertCase(input, 'sentence'); + break; + case 'removeDiacritics': + result = removeDiacritics(input); + break; + default: + break; + } + setOutput(result); + setCopied(false); + }; + + const copyToClipboard = async () => { + try { + await navigator.clipboard.writeText(output || input); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch { + // Clipboard API not available + } + }; + + const buttons = [ + { key: 'removeSpaces', label: t('tools.textCleaner.removeSpaces'), color: 'bg-blue-600 hover:bg-blue-700' }, + { key: 'upper', label: t('tools.textCleaner.toUpperCase'), color: 'bg-purple-600 hover:bg-purple-700' }, + { key: 'lower', label: t('tools.textCleaner.toLowerCase'), color: 'bg-emerald-600 hover:bg-emerald-700' }, + { key: 'title', label: t('tools.textCleaner.toTitleCase'), color: 'bg-orange-600 hover:bg-orange-700' }, + { key: 'sentence', label: t('tools.textCleaner.toSentenceCase'), color: 'bg-rose-600 hover:bg-rose-700' }, + { key: 'removeDiacritics', label: t('tools.textCleaner.removeDiacritics'), color: 'bg-amber-600 hover:bg-amber-700' }, + ]; + + const schema = generateToolSchema({ + name: t('tools.textCleaner.title'), + description: t('tools.textCleaner.description'), + url: `${window.location.origin}/tools/text-cleaner`, + }); + + return ( + <> + + {t('tools.textCleaner.title')} — {t('common.appName')} + + + + + +
+
+
+ +
+

{t('tools.textCleaner.title')}

+

{t('tools.textCleaner.description')}

+
+ + + + {/* Input */} +