feat: Initialize frontend with React, Vite, and Tailwind CSS
- Set up main entry point for React application. - Create About, Home, NotFound, Privacy, and Terms pages with SEO support. - Implement API service for file uploads and task management. - Add global styles using Tailwind CSS. - Create utility functions for SEO and text processing. - Configure Vite for development and production builds. - Set up Nginx configuration for serving frontend and backend. - Add scripts for cleanup of expired files and sitemap generation. - Implement deployment script for production environment.
This commit is contained in:
41
backend/Dockerfile
Normal file
41
backend/Dockerfile
Normal file
@@ -0,0 +1,41 @@
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
# Prevent interactive prompts during package installation
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# Install system dependencies for file processing
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libreoffice-core \
|
||||
libreoffice-writer \
|
||||
libreoffice-calc \
|
||||
libreoffice-draw \
|
||||
ghostscript \
|
||||
ffmpeg \
|
||||
libmagic1 \
|
||||
imagemagick \
|
||||
curl \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements first for Docker layer caching
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
# Create temp directories
|
||||
RUN mkdir -p /tmp/uploads /tmp/outputs
|
||||
|
||||
# Expose port
|
||||
EXPOSE 5000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --retries=3 \
|
||||
CMD curl -f http://localhost:5000/api/health || exit 1
|
||||
|
||||
# Run with Gunicorn
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "120", "wsgi:app"]
|
||||
73
backend/app/__init__.py
Normal file
73
backend/app/__init__.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Flask Application Factory."""
|
||||
import os
|
||||
|
||||
from flask import Flask
|
||||
|
||||
from config import config
|
||||
from app.extensions import cors, limiter, talisman, init_celery
|
||||
|
||||
|
||||
def create_app(config_name=None):
|
||||
"""Create and configure the Flask application."""
|
||||
if config_name is None:
|
||||
config_name = os.getenv("FLASK_ENV", "development")
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config.from_object(config[config_name])
|
||||
|
||||
# Create upload/output directories
|
||||
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
|
||||
os.makedirs(app.config["OUTPUT_FOLDER"], exist_ok=True)
|
||||
|
||||
# Initialize extensions
|
||||
cors.init_app(app, origins=app.config["CORS_ORIGINS"])
|
||||
|
||||
limiter.init_app(app)
|
||||
|
||||
# Talisman security headers (relaxed CSP for AdSense)
|
||||
csp = {
|
||||
"default-src": "'self'",
|
||||
"script-src": [
|
||||
"'self'",
|
||||
"'unsafe-inline'",
|
||||
"https://pagead2.googlesyndication.com",
|
||||
"https://www.googletagmanager.com",
|
||||
"https://www.google-analytics.com",
|
||||
],
|
||||
"style-src": ["'self'", "'unsafe-inline'", "https://fonts.googleapis.com"],
|
||||
"font-src": ["'self'", "https://fonts.gstatic.com"],
|
||||
"img-src": ["'self'", "data:", "https://pagead2.googlesyndication.com"],
|
||||
"frame-src": ["https://googleads.g.doubleclick.net"],
|
||||
"connect-src": [
|
||||
"'self'",
|
||||
"https://www.google-analytics.com",
|
||||
"https://*.amazonaws.com",
|
||||
],
|
||||
}
|
||||
talisman.init_app(
|
||||
app,
|
||||
content_security_policy=csp,
|
||||
force_https=config_name == "production",
|
||||
)
|
||||
|
||||
# Initialize Celery
|
||||
init_celery(app)
|
||||
|
||||
# Register blueprints
|
||||
from app.routes.health import health_bp
|
||||
from app.routes.convert import convert_bp
|
||||
from app.routes.compress import compress_bp
|
||||
from app.routes.image import image_bp
|
||||
from app.routes.video import video_bp
|
||||
from app.routes.tasks import tasks_bp
|
||||
from app.routes.download import download_bp
|
||||
|
||||
app.register_blueprint(health_bp, url_prefix="/api")
|
||||
app.register_blueprint(convert_bp, url_prefix="/api/convert")
|
||||
app.register_blueprint(compress_bp, url_prefix="/api/compress")
|
||||
app.register_blueprint(image_bp, url_prefix="/api/image")
|
||||
app.register_blueprint(video_bp, url_prefix="/api/video")
|
||||
app.register_blueprint(tasks_bp, url_prefix="/api/tasks")
|
||||
app.register_blueprint(download_bp, url_prefix="/api/download")
|
||||
|
||||
return app
|
||||
43
backend/app/extensions.py
Normal file
43
backend/app/extensions.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Flask extensions initialization."""
|
||||
from celery import Celery
|
||||
from flask_cors import CORS
|
||||
from flask_limiter import Limiter
|
||||
from flask_limiter.util import get_remote_address
|
||||
from flask_talisman import Talisman
|
||||
|
||||
# Initialize extensions (will be bound to app in create_app)
|
||||
cors = CORS()
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
talisman = Talisman()
|
||||
celery = Celery()
|
||||
|
||||
|
||||
def init_celery(app):
|
||||
"""Initialize Celery with Flask app context."""
|
||||
celery.conf.broker_url = app.config["CELERY_BROKER_URL"]
|
||||
celery.conf.result_backend = app.config["CELERY_RESULT_BACKEND"]
|
||||
celery.conf.result_expires = app.config.get("FILE_EXPIRY_SECONDS", 1800)
|
||||
celery.conf.task_serializer = "json"
|
||||
celery.conf.result_serializer = "json"
|
||||
celery.conf.accept_content = ["json"]
|
||||
celery.conf.timezone = "UTC"
|
||||
celery.conf.task_track_started = True
|
||||
|
||||
# Set task routes
|
||||
celery.conf.task_routes = {
|
||||
"app.tasks.convert_tasks.*": {"queue": "convert"},
|
||||
"app.tasks.compress_tasks.*": {"queue": "compress"},
|
||||
"app.tasks.image_tasks.*": {"queue": "image"},
|
||||
"app.tasks.video_tasks.*": {"queue": "video"},
|
||||
}
|
||||
|
||||
class ContextTask(celery.Task):
|
||||
"""Make Celery tasks work with Flask app context."""
|
||||
abstract = True
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
with app.app_context():
|
||||
return self.run(*args, **kwargs)
|
||||
|
||||
celery.Task = ContextTask
|
||||
return celery
|
||||
1
backend/app/middleware/__init__.py
Normal file
1
backend/app/middleware/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application middleware."""
|
||||
18
backend/app/middleware/rate_limiter.py
Normal file
18
backend/app/middleware/rate_limiter.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Rate limiting middleware configuration."""
|
||||
from app.extensions import limiter
|
||||
|
||||
|
||||
# Custom rate limits for specific operations
|
||||
UPLOAD_LIMIT = "10/minute"
|
||||
DOWNLOAD_LIMIT = "30/minute"
|
||||
API_LIMIT = "100/hour"
|
||||
|
||||
|
||||
def get_upload_limit():
|
||||
"""Get the rate limit for file upload endpoints."""
|
||||
return UPLOAD_LIMIT
|
||||
|
||||
|
||||
def get_download_limit():
|
||||
"""Get the rate limit for file download endpoints."""
|
||||
return DOWNLOAD_LIMIT
|
||||
1
backend/app/routes/__init__.py
Normal file
1
backend/app/routes/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application routes."""
|
||||
47
backend/app/routes/compress.py
Normal file
47
backend/app/routes/compress.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""PDF compression routes."""
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from app.extensions import limiter
|
||||
from app.utils.file_validator import validate_file, FileValidationError
|
||||
from app.utils.sanitizer import generate_safe_path
|
||||
from app.tasks.compress_tasks import compress_pdf_task
|
||||
|
||||
compress_bp = Blueprint("compress", __name__)
|
||||
|
||||
|
||||
@compress_bp.route("/pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def compress_pdf_route():
|
||||
"""
|
||||
Compress a PDF file.
|
||||
|
||||
Accepts: multipart/form-data with 'file' field (PDF)
|
||||
Optional form field 'quality': "low", "medium", "high" (default: "medium")
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
quality = request.form.get("quality", "medium")
|
||||
|
||||
# Validate quality parameter
|
||||
if quality not in ("low", "medium", "high"):
|
||||
quality = "medium"
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(file, allowed_types=["pdf"])
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
# Save file to temp location
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
# Dispatch async task
|
||||
task = compress_pdf_task.delay(input_path, task_id, original_filename, quality)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Compression started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
73
backend/app/routes/convert.py
Normal file
73
backend/app/routes/convert.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""PDF conversion routes (PDF↔Word)."""
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from app.extensions import limiter
|
||||
from app.utils.file_validator import validate_file, FileValidationError
|
||||
from app.utils.sanitizer import generate_safe_path
|
||||
from app.tasks.convert_tasks import convert_pdf_to_word, convert_word_to_pdf
|
||||
|
||||
convert_bp = Blueprint("convert", __name__)
|
||||
|
||||
|
||||
@convert_bp.route("/pdf-to-word", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def pdf_to_word_route():
|
||||
"""
|
||||
Convert a PDF file to Word (DOCX).
|
||||
|
||||
Accepts: multipart/form-data with 'file' field (PDF)
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(file, allowed_types=["pdf"])
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
# Save file to temp location
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
# Dispatch async task
|
||||
task = convert_pdf_to_word.delay(input_path, task_id, original_filename)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Conversion started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
|
||||
|
||||
@convert_bp.route("/word-to-pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def word_to_pdf_route():
|
||||
"""
|
||||
Convert a Word (DOC/DOCX) file to PDF.
|
||||
|
||||
Accepts: multipart/form-data with 'file' field (DOC/DOCX)
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(
|
||||
file, allowed_types=["doc", "docx"]
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
task = convert_word_to_pdf.delay(input_path, task_id, original_filename)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Conversion started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
35
backend/app/routes/download.py
Normal file
35
backend/app/routes/download.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""Local file download route — used when S3 is not configured."""
|
||||
import os
|
||||
|
||||
from flask import Blueprint, send_file, abort, request, current_app
|
||||
|
||||
download_bp = Blueprint("download", __name__)
|
||||
|
||||
|
||||
@download_bp.route("/<task_id>/<filename>", methods=["GET"])
|
||||
def download_file(task_id: str, filename: str):
|
||||
"""
|
||||
Serve a processed file from local filesystem.
|
||||
|
||||
Only active in development (when S3 is not configured).
|
||||
"""
|
||||
# Security: sanitize inputs
|
||||
# Only allow UUID-style task IDs and safe filenames
|
||||
if ".." in task_id or "/" in task_id or "\\" in task_id:
|
||||
abort(400, "Invalid task ID.")
|
||||
if ".." in filename or "/" in filename or "\\" in filename:
|
||||
abort(400, "Invalid filename.")
|
||||
|
||||
output_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
file_path = os.path.join(output_dir, task_id, filename)
|
||||
|
||||
if not os.path.isfile(file_path):
|
||||
abort(404, "File not found or expired.")
|
||||
|
||||
download_name = request.args.get("name", filename)
|
||||
|
||||
return send_file(
|
||||
file_path,
|
||||
as_attachment=True,
|
||||
download_name=download_name,
|
||||
)
|
||||
14
backend/app/routes/health.py
Normal file
14
backend/app/routes/health.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Health check endpoint."""
|
||||
from flask import Blueprint, jsonify
|
||||
|
||||
health_bp = Blueprint("health", __name__)
|
||||
|
||||
|
||||
@health_bp.route("/health", methods=["GET"])
|
||||
def health_check():
|
||||
"""Simple health check — returns 200 if the service is running."""
|
||||
return jsonify({
|
||||
"status": "healthy",
|
||||
"service": "SaaS-PDF API",
|
||||
"version": "1.0.0",
|
||||
})
|
||||
122
backend/app/routes/image.py
Normal file
122
backend/app/routes/image.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Image processing routes."""
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from app.extensions import limiter
|
||||
from app.utils.file_validator import validate_file, FileValidationError
|
||||
from app.utils.sanitizer import generate_safe_path
|
||||
from app.tasks.image_tasks import convert_image_task, resize_image_task
|
||||
|
||||
image_bp = Blueprint("image", __name__)
|
||||
|
||||
ALLOWED_IMAGE_TYPES = ["png", "jpg", "jpeg", "webp"]
|
||||
ALLOWED_OUTPUT_FORMATS = ["jpg", "png", "webp"]
|
||||
|
||||
|
||||
@image_bp.route("/convert", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def convert_image_route():
|
||||
"""
|
||||
Convert an image to a different format.
|
||||
|
||||
Accepts: multipart/form-data with:
|
||||
- 'file': Image file (PNG, JPG, JPEG, WebP)
|
||||
- 'format': Target format ("jpg", "png", "webp")
|
||||
- 'quality' (optional): Quality 1-100 (default: 85)
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
output_format = request.form.get("format", "").lower()
|
||||
quality = request.form.get("quality", "85")
|
||||
|
||||
# Validate output format
|
||||
if output_format not in ALLOWED_OUTPUT_FORMATS:
|
||||
return jsonify({
|
||||
"error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}"
|
||||
}), 400
|
||||
|
||||
# Validate quality
|
||||
try:
|
||||
quality = max(1, min(100, int(quality)))
|
||||
except ValueError:
|
||||
quality = 85
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(file, allowed_types=ALLOWED_IMAGE_TYPES)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
# Save file
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
# Dispatch task
|
||||
task = convert_image_task.delay(
|
||||
input_path, task_id, original_filename, output_format, quality
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Image conversion started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
|
||||
|
||||
@image_bp.route("/resize", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def resize_image_route():
|
||||
"""
|
||||
Resize an image.
|
||||
|
||||
Accepts: multipart/form-data with:
|
||||
- 'file': Image file
|
||||
- 'width' (optional): Target width
|
||||
- 'height' (optional): Target height
|
||||
- 'quality' (optional): Quality 1-100 (default: 85)
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
width = request.form.get("width")
|
||||
height = request.form.get("height")
|
||||
quality = request.form.get("quality", "85")
|
||||
|
||||
# Validate dimensions
|
||||
try:
|
||||
width = int(width) if width else None
|
||||
height = int(height) if height else None
|
||||
except ValueError:
|
||||
return jsonify({"error": "Width and height must be integers."}), 400
|
||||
|
||||
if width is None and height is None:
|
||||
return jsonify({"error": "At least one of width or height is required."}), 400
|
||||
|
||||
if width and (width < 1 or width > 10000):
|
||||
return jsonify({"error": "Width must be between 1 and 10000."}), 400
|
||||
if height and (height < 1 or height > 10000):
|
||||
return jsonify({"error": "Height must be between 1 and 10000."}), 400
|
||||
|
||||
try:
|
||||
quality = max(1, min(100, int(quality)))
|
||||
except ValueError:
|
||||
quality = 85
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(file, allowed_types=ALLOWED_IMAGE_TYPES)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
task = resize_image_task.delay(
|
||||
input_path, task_id, original_filename, width, height, quality
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Image resize started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
39
backend/app/routes/tasks.py
Normal file
39
backend/app/routes/tasks.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Task status polling endpoint."""
|
||||
from flask import Blueprint, jsonify
|
||||
from celery.result import AsyncResult
|
||||
|
||||
from app.extensions import celery
|
||||
|
||||
tasks_bp = Blueprint("tasks", __name__)
|
||||
|
||||
|
||||
@tasks_bp.route("/<task_id>/status", methods=["GET"])
|
||||
def get_task_status(task_id: str):
|
||||
"""
|
||||
Get the status of an async task.
|
||||
|
||||
Returns:
|
||||
JSON with task state and result (if completed)
|
||||
"""
|
||||
result = AsyncResult(task_id, app=celery)
|
||||
|
||||
response = {
|
||||
"task_id": task_id,
|
||||
"state": result.state,
|
||||
}
|
||||
|
||||
if result.state == "PENDING":
|
||||
response["progress"] = "Task is waiting in queue..."
|
||||
|
||||
elif result.state == "PROCESSING":
|
||||
meta = result.info or {}
|
||||
response["progress"] = meta.get("step", "Processing...")
|
||||
|
||||
elif result.state == "SUCCESS":
|
||||
task_result = result.result or {}
|
||||
response["result"] = task_result
|
||||
|
||||
elif result.state == "FAILURE":
|
||||
response["error"] = str(result.info) if result.info else "Task failed."
|
||||
|
||||
return jsonify(response)
|
||||
70
backend/app/routes/video.py
Normal file
70
backend/app/routes/video.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Video processing routes."""
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from app.extensions import limiter
|
||||
from app.utils.file_validator import validate_file, FileValidationError
|
||||
from app.utils.sanitizer import generate_safe_path
|
||||
from app.tasks.video_tasks import create_gif_task
|
||||
|
||||
video_bp = Blueprint("video", __name__)
|
||||
|
||||
ALLOWED_VIDEO_TYPES = ["mp4", "webm"]
|
||||
|
||||
|
||||
@video_bp.route("/to-gif", methods=["POST"])
|
||||
@limiter.limit("5/minute")
|
||||
def video_to_gif_route():
|
||||
"""
|
||||
Convert a video clip to an animated GIF.
|
||||
|
||||
Accepts: multipart/form-data with:
|
||||
- 'file': Video file (MP4, WebM, max 50MB)
|
||||
- 'start_time' (optional): Start time in seconds (default: 0)
|
||||
- 'duration' (optional): Duration in seconds, max 15 (default: 5)
|
||||
- 'fps' (optional): Frames per second, max 20 (default: 10)
|
||||
- 'width' (optional): Output width, max 640 (default: 480)
|
||||
Returns: JSON with task_id for polling
|
||||
"""
|
||||
if "file" not in request.files:
|
||||
return jsonify({"error": "No file provided."}), 400
|
||||
|
||||
file = request.files["file"]
|
||||
|
||||
# Parse and validate parameters
|
||||
try:
|
||||
start_time = float(request.form.get("start_time", 0))
|
||||
duration = float(request.form.get("duration", 5))
|
||||
fps = int(request.form.get("fps", 10))
|
||||
width = int(request.form.get("width", 480))
|
||||
except (ValueError, TypeError):
|
||||
return jsonify({"error": "Invalid parameters. Must be numeric."}), 400
|
||||
|
||||
# Enforce limits
|
||||
if start_time < 0:
|
||||
return jsonify({"error": "Start time cannot be negative."}), 400
|
||||
if duration <= 0 or duration > 15:
|
||||
return jsonify({"error": "Duration must be between 0.5 and 15 seconds."}), 400
|
||||
if fps < 1 or fps > 20:
|
||||
return jsonify({"error": "FPS must be between 1 and 20."}), 400
|
||||
if width < 100 or width > 640:
|
||||
return jsonify({"error": "Width must be between 100 and 640 pixels."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_file(file, allowed_types=ALLOWED_VIDEO_TYPES)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
# Save file
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
|
||||
# Dispatch task
|
||||
task = create_gif_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
start_time, duration, fps, width,
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "GIF creation started. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
1
backend/app/services/__init__.py
Normal file
1
backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application services."""
|
||||
109
backend/app/services/compress_service.py
Normal file
109
backend/app/services/compress_service.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""PDF compression service using Ghostscript."""
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFCompressionError(Exception):
|
||||
"""Custom exception for PDF compression failures."""
|
||||
pass
|
||||
|
||||
|
||||
# Ghostscript quality presets
|
||||
QUALITY_PRESETS = {
|
||||
"low": "/screen", # 72 dpi — smallest file, lowest quality
|
||||
"medium": "/ebook", # 150 dpi — good balance (default)
|
||||
"high": "/printer", # 300 dpi — high quality, moderate compression
|
||||
}
|
||||
|
||||
|
||||
def compress_pdf(
|
||||
input_path: str, output_path: str, quality: str = "medium"
|
||||
) -> dict:
|
||||
"""
|
||||
Compress a PDF file using Ghostscript.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF file
|
||||
output_path: Path for the compressed output file
|
||||
quality: Compression quality — "low", "medium", or "high"
|
||||
|
||||
Returns:
|
||||
dict with original_size, compressed_size, reduction_percent
|
||||
|
||||
Raises:
|
||||
PDFCompressionError: If compression fails
|
||||
"""
|
||||
if quality not in QUALITY_PRESETS:
|
||||
quality = "medium"
|
||||
|
||||
gs_quality = QUALITY_PRESETS[quality]
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
cmd = [
|
||||
"gs",
|
||||
"-sDEVICE=pdfwrite",
|
||||
"-dCompatibilityLevel=1.4",
|
||||
f"-dPDFSETTINGS={gs_quality}",
|
||||
"-dNOPAUSE",
|
||||
"-dQUIET",
|
||||
"-dBATCH",
|
||||
"-dColorImageResolution=150",
|
||||
"-dGrayImageResolution=150",
|
||||
"-dMonoImageResolution=150",
|
||||
f"-sOutputFile={output_path}",
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
original_size = os.path.getsize(input_path)
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Ghostscript compression failed: {result.stderr}")
|
||||
raise PDFCompressionError(
|
||||
f"Compression failed: {result.stderr or 'Unknown error'}"
|
||||
)
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
raise PDFCompressionError("Compressed file was not created.")
|
||||
|
||||
compressed_size = os.path.getsize(output_path)
|
||||
|
||||
# If compressed file is larger, keep original
|
||||
if compressed_size >= original_size:
|
||||
import shutil
|
||||
shutil.copy2(input_path, output_path)
|
||||
compressed_size = original_size
|
||||
|
||||
reduction = (
|
||||
((original_size - compressed_size) / original_size) * 100
|
||||
if original_size > 0
|
||||
else 0
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"PDF compression: {original_size} → {compressed_size} "
|
||||
f"({reduction:.1f}% reduction)"
|
||||
)
|
||||
|
||||
return {
|
||||
"original_size": original_size,
|
||||
"compressed_size": compressed_size,
|
||||
"reduction_percent": round(reduction, 1),
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise PDFCompressionError("Compression timed out. File may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise PDFCompressionError("Ghostscript is not installed on the server.")
|
||||
169
backend/app/services/image_service.py
Normal file
169
backend/app/services/image_service.py
Normal file
@@ -0,0 +1,169 @@
|
||||
"""Image processing service using Pillow."""
|
||||
import os
|
||||
import logging
|
||||
|
||||
from PIL import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ImageProcessingError(Exception):
|
||||
"""Custom exception for image processing failures."""
|
||||
pass
|
||||
|
||||
|
||||
# Supported format mappings
|
||||
FORMAT_MAP = {
|
||||
"jpg": "JPEG",
|
||||
"jpeg": "JPEG",
|
||||
"png": "PNG",
|
||||
"webp": "WEBP",
|
||||
}
|
||||
|
||||
|
||||
def convert_image(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
output_format: str,
|
||||
quality: int = 85,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert an image to a different format.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input image
|
||||
output_path: Path for the output image
|
||||
output_format: Target format ("jpg", "png", "webp")
|
||||
quality: Output quality 1-100 (for lossy formats)
|
||||
|
||||
Returns:
|
||||
dict with original_size, converted_size, dimensions
|
||||
|
||||
Raises:
|
||||
ImageProcessingError: If conversion fails
|
||||
"""
|
||||
output_format = output_format.lower()
|
||||
if output_format not in FORMAT_MAP:
|
||||
raise ImageProcessingError(
|
||||
f"Unsupported output format: {output_format}. "
|
||||
f"Supported: {', '.join(FORMAT_MAP.keys())}"
|
||||
)
|
||||
|
||||
pil_format = FORMAT_MAP[output_format]
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
try:
|
||||
original_size = os.path.getsize(input_path)
|
||||
|
||||
# Open and re-encode (strips any malicious payloads)
|
||||
with Image.open(input_path) as img:
|
||||
# Convert RGBA to RGB for JPEG (JPEG doesn't support alpha)
|
||||
if pil_format == "JPEG" and img.mode in ("RGBA", "P", "LA"):
|
||||
background = Image.new("RGB", img.size, (255, 255, 255))
|
||||
if img.mode == "P":
|
||||
img = img.convert("RGBA")
|
||||
background.paste(img, mask=img.split()[-1] if "A" in img.mode else None)
|
||||
img = background
|
||||
|
||||
width, height = img.size
|
||||
|
||||
# Save with quality setting
|
||||
save_kwargs = {}
|
||||
if pil_format in ("JPEG", "WEBP"):
|
||||
save_kwargs["quality"] = max(1, min(100, quality))
|
||||
save_kwargs["optimize"] = True
|
||||
elif pil_format == "PNG":
|
||||
save_kwargs["optimize"] = True
|
||||
|
||||
img.save(output_path, format=pil_format, **save_kwargs)
|
||||
|
||||
converted_size = os.path.getsize(output_path)
|
||||
|
||||
logger.info(
|
||||
f"Image conversion: {input_path} → {output_format} "
|
||||
f"({original_size} → {converted_size})"
|
||||
)
|
||||
|
||||
return {
|
||||
"original_size": original_size,
|
||||
"converted_size": converted_size,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"format": output_format,
|
||||
}
|
||||
|
||||
except (IOError, OSError, Image.DecompressionBombError) as e:
|
||||
raise ImageProcessingError(f"Image processing failed: {str(e)}")
|
||||
|
||||
|
||||
def resize_image(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
width: int | None = None,
|
||||
height: int | None = None,
|
||||
quality: int = 85,
|
||||
) -> dict:
|
||||
"""
|
||||
Resize an image while maintaining aspect ratio.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input image
|
||||
output_path: Path for the resized image
|
||||
width: Target width (None to auto-calculate from height)
|
||||
height: Target height (None to auto-calculate from width)
|
||||
quality: Output quality 1-100
|
||||
|
||||
Returns:
|
||||
dict with original and new dimensions
|
||||
|
||||
Raises:
|
||||
ImageProcessingError: If resize fails
|
||||
"""
|
||||
if width is None and height is None:
|
||||
raise ImageProcessingError("At least one of width or height must be specified.")
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
try:
|
||||
with Image.open(input_path) as img:
|
||||
orig_width, orig_height = img.size
|
||||
|
||||
# Calculate missing dimension to maintain aspect ratio
|
||||
if width and not height:
|
||||
ratio = width / orig_width
|
||||
height = int(orig_height * ratio)
|
||||
elif height and not width:
|
||||
ratio = height / orig_height
|
||||
width = int(orig_width * ratio)
|
||||
|
||||
# Resize using high-quality resampling
|
||||
resized = img.resize((width, height), Image.Resampling.LANCZOS)
|
||||
|
||||
# Detect format from output extension
|
||||
ext = os.path.splitext(output_path)[1].lower().strip(".")
|
||||
pil_format = FORMAT_MAP.get(ext, "PNG")
|
||||
|
||||
save_kwargs = {"optimize": True}
|
||||
if pil_format in ("JPEG", "WEBP"):
|
||||
save_kwargs["quality"] = quality
|
||||
# Handle RGBA for JPEG
|
||||
if resized.mode in ("RGBA", "P", "LA"):
|
||||
background = Image.new("RGB", resized.size, (255, 255, 255))
|
||||
if resized.mode == "P":
|
||||
resized = resized.convert("RGBA")
|
||||
background.paste(
|
||||
resized, mask=resized.split()[-1] if "A" in resized.mode else None
|
||||
)
|
||||
resized = background
|
||||
|
||||
resized.save(output_path, format=pil_format, **save_kwargs)
|
||||
|
||||
return {
|
||||
"original_width": orig_width,
|
||||
"original_height": orig_height,
|
||||
"new_width": width,
|
||||
"new_height": height,
|
||||
}
|
||||
|
||||
except (IOError, OSError, Image.DecompressionBombError) as e:
|
||||
raise ImageProcessingError(f"Image resize failed: {str(e)}")
|
||||
170
backend/app/services/pdf_service.py
Normal file
170
backend/app/services/pdf_service.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""PDF conversion service using LibreOffice headless."""
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
import tempfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFConversionError(Exception):
|
||||
"""Custom exception for PDF conversion failures."""
|
||||
pass
|
||||
|
||||
|
||||
def pdf_to_word(input_path: str, output_dir: str) -> str:
|
||||
"""
|
||||
Convert a PDF file to Word (DOCX) format using LibreOffice headless.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF file
|
||||
output_dir: Directory for the output file
|
||||
|
||||
Returns:
|
||||
Path to the converted DOCX file
|
||||
|
||||
Raises:
|
||||
PDFConversionError: If conversion fails
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Use a unique user profile per process to avoid lock conflicts
|
||||
user_install_dir = tempfile.mkdtemp(prefix="lo_pdf2word_")
|
||||
|
||||
cmd = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--norestore",
|
||||
f"-env:UserInstallation=file://{user_install_dir}",
|
||||
"--infilter=writer_pdf_import",
|
||||
"--convert-to", "docx",
|
||||
"--outdir", output_dir,
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
logger.info(f"Running LibreOffice PDF→Word: {' '.join(cmd)}")
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # 2 minute timeout
|
||||
env={**os.environ, "HOME": user_install_dir},
|
||||
)
|
||||
|
||||
logger.info(f"LibreOffice stdout: {result.stdout}")
|
||||
logger.info(f"LibreOffice stderr: {result.stderr}")
|
||||
logger.info(f"LibreOffice returncode: {result.returncode}")
|
||||
|
||||
# LibreOffice names output based on input filename
|
||||
input_basename = os.path.splitext(os.path.basename(input_path))[0]
|
||||
output_path = os.path.join(output_dir, f"{input_basename}.docx")
|
||||
|
||||
# Check output file first — LibreOffice may return non-zero
|
||||
# due to harmless warnings (e.g. javaldx) even on success
|
||||
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
logger.info(f"PDF→Word conversion successful: {output_path}")
|
||||
return output_path
|
||||
|
||||
# No output file — now treat as real error
|
||||
if result.returncode != 0:
|
||||
# Filter out known harmless warnings
|
||||
stderr = result.stderr or ""
|
||||
real_errors = [
|
||||
line for line in stderr.strip().splitlines()
|
||||
if not line.startswith("Warning: failed to launch javaldx")
|
||||
]
|
||||
error_msg = "\n".join(real_errors) if real_errors else stderr
|
||||
logger.error(f"LibreOffice PDF→Word failed: {error_msg}")
|
||||
raise PDFConversionError(
|
||||
f"Conversion failed: {error_msg or 'Unknown error'}"
|
||||
)
|
||||
|
||||
# Return code 0 but no output file
|
||||
files_in_dir = os.listdir(output_dir) if os.path.exists(output_dir) else []
|
||||
logger.error(
|
||||
f"Expected output not found at {output_path}. "
|
||||
f"Files in output dir: {files_in_dir}"
|
||||
)
|
||||
raise PDFConversionError("Output file was not created.")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise PDFConversionError("Conversion timed out. File may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise PDFConversionError("LibreOffice is not installed on the server.")
|
||||
finally:
|
||||
# Cleanup temporary user profile
|
||||
import shutil
|
||||
shutil.rmtree(user_install_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def word_to_pdf(input_path: str, output_dir: str) -> str:
|
||||
"""
|
||||
Convert a Word (DOC/DOCX) file to PDF format using LibreOffice headless.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input Word file
|
||||
output_dir: Directory for the output file
|
||||
|
||||
Returns:
|
||||
Path to the converted PDF file
|
||||
|
||||
Raises:
|
||||
PDFConversionError: If conversion fails
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Use a unique user profile per process to avoid lock conflicts
|
||||
user_install_dir = tempfile.mkdtemp(prefix="lo_word2pdf_")
|
||||
|
||||
cmd = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--norestore",
|
||||
f"-env:UserInstallation=file://{user_install_dir}",
|
||||
"--convert-to", "pdf",
|
||||
"--outdir", output_dir,
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env={**os.environ, "HOME": user_install_dir},
|
||||
)
|
||||
|
||||
input_basename = os.path.splitext(os.path.basename(input_path))[0]
|
||||
output_path = os.path.join(output_dir, f"{input_basename}.pdf")
|
||||
|
||||
# Check output file first — LibreOffice may return non-zero
|
||||
# due to harmless warnings (e.g. javaldx) even on success
|
||||
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
logger.info(f"Word→PDF conversion successful: {output_path}")
|
||||
return output_path
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr or ""
|
||||
real_errors = [
|
||||
line for line in stderr.strip().splitlines()
|
||||
if not line.startswith("Warning: failed to launch javaldx")
|
||||
]
|
||||
error_msg = "\n".join(real_errors) if real_errors else stderr
|
||||
logger.error(f"LibreOffice Word→PDF failed: {error_msg}")
|
||||
raise PDFConversionError(
|
||||
f"Conversion failed: {error_msg or 'Unknown error'}"
|
||||
)
|
||||
|
||||
raise PDFConversionError("Output file was not created.")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise PDFConversionError("Conversion timed out. File may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise PDFConversionError("LibreOffice is not installed on the server.")
|
||||
finally:
|
||||
# Cleanup temporary user profile
|
||||
import shutil
|
||||
shutil.rmtree(user_install_dir, ignore_errors=True)
|
||||
154
backend/app/services/storage_service.py
Normal file
154
backend/app/services/storage_service.py
Normal file
@@ -0,0 +1,154 @@
|
||||
"""Storage service — S3 in production, local files in development."""
|
||||
import os
|
||||
import shutil
|
||||
import logging
|
||||
|
||||
from flask import current_app
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _is_s3_configured() -> bool:
|
||||
"""Check if AWS S3 credentials are provided."""
|
||||
key = current_app.config.get("AWS_ACCESS_KEY_ID")
|
||||
secret = current_app.config.get("AWS_SECRET_ACCESS_KEY")
|
||||
return bool(key and secret and key.strip() and secret.strip())
|
||||
|
||||
|
||||
class StorageService:
|
||||
"""Handle file storage — uses S3 when configured, local filesystem otherwise."""
|
||||
|
||||
def __init__(self):
|
||||
self._client = None
|
||||
|
||||
@property
|
||||
def use_s3(self) -> bool:
|
||||
return _is_s3_configured()
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""Lazy-initialize S3 client (only when S3 is configured)."""
|
||||
if self._client is None:
|
||||
import boto3
|
||||
self._client = boto3.client(
|
||||
"s3",
|
||||
region_name=current_app.config["AWS_S3_REGION"],
|
||||
aws_access_key_id=current_app.config["AWS_ACCESS_KEY_ID"],
|
||||
aws_secret_access_key=current_app.config["AWS_SECRET_ACCESS_KEY"],
|
||||
)
|
||||
return self._client
|
||||
|
||||
@property
|
||||
def bucket(self):
|
||||
return current_app.config["AWS_S3_BUCKET"]
|
||||
|
||||
def upload_file(self, local_path: str, task_id: str, folder: str = "outputs") -> str:
|
||||
"""
|
||||
Upload / store a file.
|
||||
|
||||
In S3 mode: uploads to S3 bucket.
|
||||
In local mode: copies file to the outputs directory.
|
||||
|
||||
Returns:
|
||||
S3 key or local relative path (used as identifier)
|
||||
"""
|
||||
filename = os.path.basename(local_path)
|
||||
key = f"{folder}/{task_id}/{filename}"
|
||||
|
||||
if self.use_s3:
|
||||
from botocore.exceptions import ClientError
|
||||
try:
|
||||
self.client.upload_file(local_path, self.bucket, key)
|
||||
return key
|
||||
except ClientError as e:
|
||||
raise RuntimeError(f"Failed to upload file to S3: {e}")
|
||||
else:
|
||||
# Local mode — keep file in the outputs directory
|
||||
output_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
dest_dir = os.path.join(output_dir, task_id)
|
||||
os.makedirs(dest_dir, exist_ok=True)
|
||||
dest_path = os.path.join(dest_dir, filename)
|
||||
|
||||
if os.path.abspath(local_path) != os.path.abspath(dest_path):
|
||||
shutil.copy2(local_path, dest_path)
|
||||
|
||||
logger.info(f"[Local] Stored file: {dest_path}")
|
||||
return key
|
||||
|
||||
def generate_presigned_url(
|
||||
self, s3_key: str, expiry: int | None = None, original_filename: str | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate a download URL.
|
||||
|
||||
S3 mode: presigned URL.
|
||||
Local mode: /api/download/<task_id>/<filename>
|
||||
"""
|
||||
if self.use_s3:
|
||||
from botocore.exceptions import ClientError
|
||||
if expiry is None:
|
||||
expiry = current_app.config.get("FILE_EXPIRY_SECONDS", 1800)
|
||||
|
||||
params = {
|
||||
"Bucket": self.bucket,
|
||||
"Key": s3_key,
|
||||
}
|
||||
if original_filename:
|
||||
params["ResponseContentDisposition"] = (
|
||||
f'attachment; filename="{original_filename}"'
|
||||
)
|
||||
try:
|
||||
url = self.client.generate_presigned_url(
|
||||
"get_object",
|
||||
Params=params,
|
||||
ExpiresIn=expiry,
|
||||
)
|
||||
return url
|
||||
except ClientError as e:
|
||||
raise RuntimeError(f"Failed to generate presigned URL: {e}")
|
||||
else:
|
||||
# Local mode — return path to Flask download route
|
||||
parts = s3_key.strip("/").split("/")
|
||||
# key = "outputs/<task_id>/<filename>"
|
||||
if len(parts) >= 3:
|
||||
task_id = parts[1]
|
||||
filename = parts[2]
|
||||
else:
|
||||
task_id = parts[0]
|
||||
filename = parts[-1]
|
||||
|
||||
download_name = original_filename or filename
|
||||
return f"/api/download/{task_id}/{filename}?name={download_name}"
|
||||
|
||||
def delete_file(self, s3_key: str):
|
||||
"""Delete a file from S3 (no-op in local mode)."""
|
||||
if self.use_s3:
|
||||
from botocore.exceptions import ClientError
|
||||
try:
|
||||
self.client.delete_object(Bucket=self.bucket, Key=s3_key)
|
||||
except ClientError:
|
||||
pass
|
||||
|
||||
def file_exists(self, s3_key: str) -> bool:
|
||||
"""Check if a file exists."""
|
||||
if self.use_s3:
|
||||
from botocore.exceptions import ClientError
|
||||
try:
|
||||
self.client.head_object(Bucket=self.bucket, Key=s3_key)
|
||||
return True
|
||||
except ClientError:
|
||||
return False
|
||||
else:
|
||||
parts = s3_key.strip("/").split("/")
|
||||
if len(parts) >= 3:
|
||||
task_id = parts[1]
|
||||
filename = parts[2]
|
||||
else:
|
||||
task_id = parts[0]
|
||||
filename = parts[-1]
|
||||
output_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
return os.path.isfile(os.path.join(output_dir, task_id, filename))
|
||||
|
||||
|
||||
# Singleton instance
|
||||
storage = StorageService()
|
||||
176
backend/app/services/video_service.py
Normal file
176
backend/app/services/video_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
"""Video to GIF conversion service using ffmpeg."""
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class VideoProcessingError(Exception):
|
||||
"""Custom exception for video processing failures."""
|
||||
pass
|
||||
|
||||
|
||||
# Safety constraints
|
||||
MAX_DURATION = 15 # seconds
|
||||
MAX_WIDTH = 640 # pixels
|
||||
MAX_FPS = 20
|
||||
DEFAULT_FPS = 10
|
||||
DEFAULT_WIDTH = 480
|
||||
|
||||
|
||||
def video_to_gif(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
start_time: float = 0,
|
||||
duration: float = 5,
|
||||
fps: int = DEFAULT_FPS,
|
||||
width: int = DEFAULT_WIDTH,
|
||||
) -> dict:
|
||||
"""
|
||||
Convert a video clip to an animated GIF using ffmpeg.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input video (MP4/WebM)
|
||||
output_path: Path for the output GIF
|
||||
start_time: Start time in seconds
|
||||
duration: Duration in seconds (max 15)
|
||||
fps: Frames per second (max 20)
|
||||
width: Output width in pixels (max 640)
|
||||
|
||||
Returns:
|
||||
dict with output_size, duration, fps, dimensions
|
||||
|
||||
Raises:
|
||||
VideoProcessingError: If conversion fails
|
||||
"""
|
||||
# Sanitize numeric parameters (prevent injection)
|
||||
start_time = max(0, float(start_time))
|
||||
duration = max(0.5, min(MAX_DURATION, float(duration)))
|
||||
fps = max(1, min(MAX_FPS, int(fps)))
|
||||
width = max(100, min(MAX_WIDTH, int(width)))
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
|
||||
# Two-pass palette approach for high-quality GIF
|
||||
palette_path = output_path + ".palette.png"
|
||||
|
||||
try:
|
||||
# Pass 1: Generate optimized palette
|
||||
palette_cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-ss", str(start_time),
|
||||
"-t", str(duration),
|
||||
"-i", input_path,
|
||||
"-vf", f"fps={fps},scale={width}:-1:flags=lanczos,palettegen=stats_mode=diff",
|
||||
palette_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
palette_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"ffmpeg palette generation failed: {result.stderr}")
|
||||
raise VideoProcessingError("Failed to process video for GIF creation.")
|
||||
|
||||
# Pass 2: Create GIF using palette
|
||||
gif_cmd = [
|
||||
"ffmpeg",
|
||||
"-y",
|
||||
"-ss", str(start_time),
|
||||
"-t", str(duration),
|
||||
"-i", input_path,
|
||||
"-i", palette_path,
|
||||
"-lavfi", f"fps={fps},scale={width}:-1:flags=lanczos [x]; [x][1:v] paletteuse=dither=bayer:bayer_scale=5",
|
||||
output_path,
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
gif_cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"ffmpeg GIF creation failed: {result.stderr}")
|
||||
raise VideoProcessingError("Failed to create GIF from video.")
|
||||
|
||||
if not os.path.exists(output_path):
|
||||
raise VideoProcessingError("GIF file was not created.")
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
|
||||
# Get actual output dimensions
|
||||
actual_width, actual_height = _get_gif_dimensions(output_path)
|
||||
|
||||
logger.info(
|
||||
f"Video→GIF: {input_path} → {output_path} "
|
||||
f"({output_size} bytes, {duration}s, {fps}fps, {actual_width}x{actual_height})"
|
||||
)
|
||||
|
||||
return {
|
||||
"output_size": output_size,
|
||||
"duration": duration,
|
||||
"fps": fps,
|
||||
"width": actual_width,
|
||||
"height": actual_height,
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise VideoProcessingError("GIF creation timed out. Video may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise VideoProcessingError("ffmpeg is not installed on the server.")
|
||||
finally:
|
||||
# Cleanup palette file
|
||||
if os.path.exists(palette_path):
|
||||
os.remove(palette_path)
|
||||
|
||||
|
||||
def get_video_duration(input_path: str) -> float:
|
||||
"""Get the duration of a video file in seconds."""
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=10
|
||||
)
|
||||
return float(result.stdout.strip())
|
||||
except (subprocess.TimeoutExpired, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _get_gif_dimensions(gif_path: str) -> tuple[int, int]:
|
||||
"""Get GIF dimensions using ffprobe."""
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=width,height",
|
||||
"-of", "csv=p=0",
|
||||
gif_path,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=10
|
||||
)
|
||||
parts = result.stdout.strip().split(",")
|
||||
if len(parts) == 2:
|
||||
return int(parts[0]), int(parts[1])
|
||||
except (subprocess.TimeoutExpired, ValueError):
|
||||
pass
|
||||
|
||||
return 0, 0
|
||||
1
backend/app/tasks/__init__.py
Normal file
1
backend/app/tasks/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Celery tasks for async file processing."""
|
||||
88
backend/app/tasks/compress_tasks.py
Normal file
88
backend/app/tasks/compress_tasks.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Celery tasks for PDF compression."""
|
||||
import os
|
||||
import logging
|
||||
|
||||
from app.extensions import celery
|
||||
from app.services.compress_service import compress_pdf, PDFCompressionError
|
||||
from app.services.storage_service import storage
|
||||
from app.utils.sanitizer import cleanup_task_files
|
||||
|
||||
|
||||
def _cleanup(task_id: str):
|
||||
cleanup_task_files(task_id, keep_outputs=not storage.use_s3)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.compress_tasks.compress_pdf_task")
|
||||
def compress_pdf_task(
|
||||
self,
|
||||
input_path: str,
|
||||
task_id: str,
|
||||
original_filename: str,
|
||||
quality: str = "medium",
|
||||
):
|
||||
"""
|
||||
Async task: Compress a PDF file.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded PDF file
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
quality: Compression quality ("low", "medium", "high")
|
||||
|
||||
Returns:
|
||||
dict with download_url, compression stats, and file info
|
||||
"""
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, f"{task_id}.pdf")
|
||||
|
||||
try:
|
||||
self.update_state(
|
||||
state="PROCESSING",
|
||||
meta={"step": f"Compressing PDF ({quality} quality)..."},
|
||||
)
|
||||
|
||||
# Compress using Ghostscript
|
||||
stats = compress_pdf(input_path, output_path, quality)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
# Upload to S3
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
# Generate download filename
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}_compressed.pdf"
|
||||
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"original_size": stats["original_size"],
|
||||
"compressed_size": stats["compressed_size"],
|
||||
"reduction_percent": stats["reduction_percent"],
|
||||
}
|
||||
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(
|
||||
f"Task {task_id}: PDF compression completed — "
|
||||
f"{stats['reduction_percent']}% reduction"
|
||||
)
|
||||
return result
|
||||
|
||||
except PDFCompressionError as e:
|
||||
logger.error(f"Task {task_id}: Compression error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
128
backend/app/tasks/convert_tasks.py
Normal file
128
backend/app/tasks/convert_tasks.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Celery tasks for PDF conversion (PDF↔Word)."""
|
||||
import os
|
||||
import logging
|
||||
|
||||
from app.extensions import celery
|
||||
from app.services.pdf_service import pdf_to_word, word_to_pdf, PDFConversionError
|
||||
from app.services.storage_service import storage
|
||||
from app.utils.sanitizer import cleanup_task_files
|
||||
|
||||
|
||||
def _cleanup(task_id: str):
|
||||
"""Cleanup with local-aware flag."""
|
||||
cleanup_task_files(task_id, keep_outputs=not storage.use_s3)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.convert_tasks.convert_pdf_to_word")
|
||||
def convert_pdf_to_word(self, input_path: str, task_id: str, original_filename: str):
|
||||
"""
|
||||
Async task: Convert PDF to Word document.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded PDF file
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
|
||||
Returns:
|
||||
dict with download_url and file info
|
||||
"""
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
|
||||
try:
|
||||
self.update_state(state="PROCESSING", meta={"step": "Converting PDF to Word..."})
|
||||
|
||||
# Convert using LibreOffice
|
||||
output_path = pdf_to_word(input_path, output_dir)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
# Upload to S3
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
# Generate download filename
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}.docx"
|
||||
|
||||
# Generate presigned URL
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"output_size": os.path.getsize(output_path),
|
||||
}
|
||||
|
||||
# Cleanup local files
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(f"Task {task_id}: PDF→Word conversion completed")
|
||||
return result
|
||||
|
||||
except PDFConversionError as e:
|
||||
logger.error(f"Task {task_id}: Conversion error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.convert_tasks.convert_word_to_pdf")
|
||||
def convert_word_to_pdf(self, input_path: str, task_id: str, original_filename: str):
|
||||
"""
|
||||
Async task: Convert Word document to PDF.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded Word file
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
|
||||
Returns:
|
||||
dict with download_url and file info
|
||||
"""
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
|
||||
try:
|
||||
self.update_state(state="PROCESSING", meta={"step": "Converting Word to PDF..."})
|
||||
|
||||
output_path = word_to_pdf(input_path, output_dir)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}.pdf"
|
||||
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"output_size": os.path.getsize(output_path),
|
||||
}
|
||||
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(f"Task {task_id}: Word→PDF conversion completed")
|
||||
return result
|
||||
|
||||
except PDFConversionError as e:
|
||||
logger.error(f"Task {task_id}: Conversion error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
160
backend/app/tasks/image_tasks.py
Normal file
160
backend/app/tasks/image_tasks.py
Normal file
@@ -0,0 +1,160 @@
|
||||
"""Celery tasks for image processing."""
|
||||
import os
|
||||
import logging
|
||||
|
||||
from app.extensions import celery
|
||||
from app.services.image_service import convert_image, resize_image, ImageProcessingError
|
||||
from app.services.storage_service import storage
|
||||
from app.utils.sanitizer import cleanup_task_files
|
||||
|
||||
|
||||
def _cleanup(task_id: str):
|
||||
cleanup_task_files(task_id, keep_outputs=not storage.use_s3)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.image_tasks.convert_image_task")
|
||||
def convert_image_task(
|
||||
self,
|
||||
input_path: str,
|
||||
task_id: str,
|
||||
original_filename: str,
|
||||
output_format: str,
|
||||
quality: int = 85,
|
||||
):
|
||||
"""
|
||||
Async task: Convert an image to a different format.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded image
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
output_format: Target format ("jpg", "png", "webp")
|
||||
quality: Output quality 1-100
|
||||
|
||||
Returns:
|
||||
dict with download_url and conversion stats
|
||||
"""
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, f"{task_id}.{output_format}")
|
||||
|
||||
try:
|
||||
self.update_state(
|
||||
state="PROCESSING",
|
||||
meta={"step": f"Converting image to {output_format.upper()}..."},
|
||||
)
|
||||
|
||||
stats = convert_image(input_path, output_path, output_format, quality)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}.{output_format}"
|
||||
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"original_size": stats["original_size"],
|
||||
"converted_size": stats["converted_size"],
|
||||
"width": stats["width"],
|
||||
"height": stats["height"],
|
||||
"format": stats["format"],
|
||||
}
|
||||
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(f"Task {task_id}: Image conversion to {output_format} completed")
|
||||
return result
|
||||
|
||||
except ImageProcessingError as e:
|
||||
logger.error(f"Task {task_id}: Image error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.image_tasks.resize_image_task")
|
||||
def resize_image_task(
|
||||
self,
|
||||
input_path: str,
|
||||
task_id: str,
|
||||
original_filename: str,
|
||||
width: int | None = None,
|
||||
height: int | None = None,
|
||||
quality: int = 85,
|
||||
):
|
||||
"""
|
||||
Async task: Resize an image.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded image
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
width: Target width
|
||||
height: Target height
|
||||
quality: Output quality 1-100
|
||||
|
||||
Returns:
|
||||
dict with download_url and resize info
|
||||
"""
|
||||
ext = os.path.splitext(original_filename)[1].lstrip(".")
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, f"{task_id}.{ext}")
|
||||
|
||||
try:
|
||||
self.update_state(
|
||||
state="PROCESSING",
|
||||
meta={"step": "Resizing image..."},
|
||||
)
|
||||
|
||||
stats = resize_image(input_path, output_path, width, height, quality)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}_resized.{ext}"
|
||||
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"original_width": stats["original_width"],
|
||||
"original_height": stats["original_height"],
|
||||
"new_width": stats["new_width"],
|
||||
"new_height": stats["new_height"],
|
||||
}
|
||||
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(f"Task {task_id}: Image resize completed")
|
||||
return result
|
||||
|
||||
except ImageProcessingError as e:
|
||||
logger.error(f"Task {task_id}: Image error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
96
backend/app/tasks/video_tasks.py
Normal file
96
backend/app/tasks/video_tasks.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Celery tasks for video processing."""
|
||||
import os
|
||||
import logging
|
||||
|
||||
from app.extensions import celery
|
||||
from app.services.video_service import video_to_gif, VideoProcessingError
|
||||
from app.services.storage_service import storage
|
||||
from app.utils.sanitizer import cleanup_task_files
|
||||
|
||||
|
||||
def _cleanup(task_id: str):
|
||||
cleanup_task_files(task_id, keep_outputs=not storage.use_s3)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@celery.task(bind=True, name="app.tasks.video_tasks.create_gif_task")
|
||||
def create_gif_task(
|
||||
self,
|
||||
input_path: str,
|
||||
task_id: str,
|
||||
original_filename: str,
|
||||
start_time: float = 0,
|
||||
duration: float = 5,
|
||||
fps: int = 10,
|
||||
width: int = 480,
|
||||
):
|
||||
"""
|
||||
Async task: Convert video clip to animated GIF.
|
||||
|
||||
Args:
|
||||
input_path: Path to the uploaded video
|
||||
task_id: Unique task identifier
|
||||
original_filename: Original filename for download
|
||||
start_time: Start time in seconds
|
||||
duration: Duration in seconds
|
||||
fps: Frames per second
|
||||
width: Output width in pixels
|
||||
|
||||
Returns:
|
||||
dict with download_url and GIF info
|
||||
"""
|
||||
output_dir = os.path.join("/tmp/outputs", task_id)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, f"{task_id}.gif")
|
||||
|
||||
try:
|
||||
self.update_state(
|
||||
state="PROCESSING",
|
||||
meta={"step": "Creating GIF from video..."},
|
||||
)
|
||||
|
||||
stats = video_to_gif(
|
||||
input_path, output_path,
|
||||
start_time=start_time,
|
||||
duration=duration,
|
||||
fps=fps,
|
||||
width=width,
|
||||
)
|
||||
|
||||
self.update_state(state="PROCESSING", meta={"step": "Uploading result..."})
|
||||
|
||||
s3_key = storage.upload_file(output_path, task_id, folder="outputs")
|
||||
|
||||
name_without_ext = os.path.splitext(original_filename)[0]
|
||||
download_name = f"{name_without_ext}.gif"
|
||||
|
||||
download_url = storage.generate_presigned_url(
|
||||
s3_key, original_filename=download_name
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"download_url": download_url,
|
||||
"filename": download_name,
|
||||
"output_size": stats["output_size"],
|
||||
"duration": stats["duration"],
|
||||
"fps": stats["fps"],
|
||||
"width": stats["width"],
|
||||
"height": stats["height"],
|
||||
}
|
||||
|
||||
_cleanup(task_id)
|
||||
|
||||
logger.info(f"Task {task_id}: Video→GIF creation completed")
|
||||
return result
|
||||
|
||||
except VideoProcessingError as e:
|
||||
logger.error(f"Task {task_id}: Video error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": str(e)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
_cleanup(task_id)
|
||||
return {"status": "failed", "error": "An unexpected error occurred."}
|
||||
1
backend/app/utils/__init__.py
Normal file
1
backend/app/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application utilities."""
|
||||
31
backend/app/utils/cleanup.py
Normal file
31
backend/app/utils/cleanup.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Scheduled cleanup of expired temporary files."""
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def cleanup_expired_files():
|
||||
"""Remove files older than FILE_EXPIRY_SECONDS from upload/output dirs."""
|
||||
expiry = current_app.config.get("FILE_EXPIRY_SECONDS", 1800)
|
||||
now = time.time()
|
||||
removed_count = 0
|
||||
|
||||
for folder_key in ["UPLOAD_FOLDER", "OUTPUT_FOLDER"]:
|
||||
folder = current_app.config.get(folder_key)
|
||||
if not folder or not os.path.exists(folder):
|
||||
continue
|
||||
|
||||
for task_dir_name in os.listdir(folder):
|
||||
task_dir = os.path.join(folder, task_dir_name)
|
||||
if not os.path.isdir(task_dir):
|
||||
continue
|
||||
|
||||
# Check directory age based on modification time
|
||||
dir_mtime = os.path.getmtime(task_dir)
|
||||
if now - dir_mtime > expiry:
|
||||
shutil.rmtree(task_dir, ignore_errors=True)
|
||||
removed_count += 1
|
||||
|
||||
return removed_count
|
||||
111
backend/app/utils/file_validator.py
Normal file
111
backend/app/utils/file_validator.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""File validation utilities — multi-layer security checks."""
|
||||
import os
|
||||
|
||||
import magic
|
||||
from flask import current_app
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
|
||||
class FileValidationError(Exception):
|
||||
"""Custom exception for file validation failures."""
|
||||
|
||||
def __init__(self, message: str, code: int = 400):
|
||||
self.message = message
|
||||
self.code = code
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
def validate_file(file_storage, allowed_types: list[str] | None = None):
|
||||
"""
|
||||
Validate an uploaded file through multiple security layers.
|
||||
|
||||
Args:
|
||||
file_storage: Flask FileStorage object from request.files
|
||||
allowed_types: List of allowed extensions (e.g., ["pdf", "docx"]).
|
||||
If None, uses all allowed extensions from config.
|
||||
|
||||
Returns:
|
||||
tuple: (sanitized_filename, detected_extension)
|
||||
|
||||
Raises:
|
||||
FileValidationError: If validation fails at any layer.
|
||||
"""
|
||||
config = current_app.config
|
||||
|
||||
# Layer 1: Check if file exists and has a filename
|
||||
if not file_storage or file_storage.filename == "":
|
||||
raise FileValidationError("No file provided.")
|
||||
|
||||
filename = secure_filename(file_storage.filename)
|
||||
if not filename:
|
||||
raise FileValidationError("Invalid filename.")
|
||||
|
||||
# Layer 2: Check file extension against whitelist
|
||||
ext = _get_extension(filename)
|
||||
allowed_extensions = config.get("ALLOWED_EXTENSIONS", {})
|
||||
|
||||
if allowed_types:
|
||||
valid_extensions = {k: v for k, v in allowed_extensions.items() if k in allowed_types}
|
||||
else:
|
||||
valid_extensions = allowed_extensions
|
||||
|
||||
if ext not in valid_extensions:
|
||||
raise FileValidationError(
|
||||
f"File type '.{ext}' is not allowed. "
|
||||
f"Allowed types: {', '.join(valid_extensions.keys())}"
|
||||
)
|
||||
|
||||
# Layer 3: Check file size against type-specific limits
|
||||
file_storage.seek(0, os.SEEK_END)
|
||||
file_size = file_storage.tell()
|
||||
file_storage.seek(0)
|
||||
|
||||
size_limits = config.get("FILE_SIZE_LIMITS", {})
|
||||
max_size = size_limits.get(ext, 20 * 1024 * 1024) # Default 20MB
|
||||
|
||||
if file_size > max_size:
|
||||
max_mb = max_size / (1024 * 1024)
|
||||
raise FileValidationError(
|
||||
f"File too large. Maximum size for .{ext} files is {max_mb:.0f}MB."
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
raise FileValidationError("File is empty.")
|
||||
|
||||
# Layer 4: Check MIME type using magic bytes
|
||||
file_header = file_storage.read(8192)
|
||||
file_storage.seek(0)
|
||||
|
||||
detected_mime = magic.from_buffer(file_header, mime=True)
|
||||
expected_mimes = valid_extensions.get(ext, [])
|
||||
|
||||
if detected_mime not in expected_mimes:
|
||||
raise FileValidationError(
|
||||
f"File content does not match extension '.{ext}'. "
|
||||
f"Detected type: {detected_mime}"
|
||||
)
|
||||
|
||||
# Layer 5: Additional content checks for specific types
|
||||
if ext == "pdf":
|
||||
_check_pdf_safety(file_header)
|
||||
|
||||
return filename, ext
|
||||
|
||||
|
||||
def _get_extension(filename: str) -> str:
|
||||
"""Extract and normalize file extension."""
|
||||
if "." not in filename:
|
||||
return ""
|
||||
return filename.rsplit(".", 1)[1].lower()
|
||||
|
||||
|
||||
def _check_pdf_safety(file_header: bytes):
|
||||
"""Check PDF for potentially dangerous embedded content."""
|
||||
dangerous_patterns = [b"/JS", b"/JavaScript", b"/Launch", b"/EmbeddedFile"]
|
||||
header_str = file_header
|
||||
|
||||
for pattern in dangerous_patterns:
|
||||
if pattern in header_str:
|
||||
raise FileValidationError(
|
||||
"PDF contains potentially unsafe content (embedded scripts)."
|
||||
)
|
||||
77
backend/app/utils/sanitizer.py
Normal file
77
backend/app/utils/sanitizer.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Filename sanitization and temporary file management."""
|
||||
import os
|
||||
import uuid
|
||||
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def generate_safe_path(extension: str, folder_type: str = "upload") -> tuple[str, str]:
|
||||
"""
|
||||
Generate a safe file path using UUID.
|
||||
|
||||
Args:
|
||||
extension: File extension (without dot)
|
||||
folder_type: "upload" for input files, "output" for processed files
|
||||
|
||||
Returns:
|
||||
tuple: (task_id, full_file_path)
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
if folder_type == "upload":
|
||||
base_dir = current_app.config["UPLOAD_FOLDER"]
|
||||
else:
|
||||
base_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
|
||||
# Create task-specific directory
|
||||
task_dir = os.path.join(base_dir, task_id)
|
||||
os.makedirs(task_dir, exist_ok=True)
|
||||
|
||||
filename = f"{task_id}.{extension}"
|
||||
file_path = os.path.join(task_dir, filename)
|
||||
|
||||
return task_id, file_path
|
||||
|
||||
|
||||
def get_output_path(task_id: str, extension: str) -> str:
|
||||
"""
|
||||
Get the output file path for a processed file.
|
||||
|
||||
Args:
|
||||
task_id: The task UUID
|
||||
extension: Output file extension
|
||||
|
||||
Returns:
|
||||
Full output file path
|
||||
"""
|
||||
output_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
task_dir = os.path.join(output_dir, task_id)
|
||||
os.makedirs(task_dir, exist_ok=True)
|
||||
|
||||
filename = f"{task_id}.{extension}"
|
||||
return os.path.join(task_dir, filename)
|
||||
|
||||
|
||||
def cleanup_task_files(task_id: str, keep_outputs: bool = False):
|
||||
"""
|
||||
Remove temporary files for a given task.
|
||||
|
||||
Args:
|
||||
task_id: The task UUID
|
||||
keep_outputs: If True, only clean uploads (used in local storage mode)
|
||||
"""
|
||||
import shutil
|
||||
|
||||
upload_dir = current_app.config.get("UPLOAD_FOLDER", "/tmp/uploads")
|
||||
output_dir = current_app.config.get("OUTPUT_FOLDER", "/tmp/outputs")
|
||||
|
||||
# Always clean uploads
|
||||
upload_task_dir = os.path.join(upload_dir, task_id)
|
||||
if os.path.exists(upload_task_dir):
|
||||
shutil.rmtree(upload_task_dir, ignore_errors=True)
|
||||
|
||||
# Only clean outputs when using S3 (files already uploaded to S3)
|
||||
if not keep_outputs:
|
||||
output_task_dir = os.path.join(output_dir, task_id)
|
||||
if os.path.exists(output_task_dir):
|
||||
shutil.rmtree(output_task_dir, ignore_errors=True)
|
||||
11
backend/celery_worker.py
Normal file
11
backend/celery_worker.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Celery worker entry point."""
|
||||
from app import create_app
|
||||
from app.extensions import celery
|
||||
|
||||
app = create_app()
|
||||
|
||||
# Import all tasks so Celery discovers them
|
||||
import app.tasks.convert_tasks # noqa: F401
|
||||
import app.tasks.compress_tasks # noqa: F401
|
||||
import app.tasks.image_tasks # noqa: F401
|
||||
import app.tasks.video_tasks # noqa: F401
|
||||
93
backend/config/__init__.py
Normal file
93
backend/config/__init__.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class BaseConfig:
|
||||
"""Base configuration."""
|
||||
SECRET_KEY = os.getenv("SECRET_KEY", "change-me-in-production")
|
||||
|
||||
# File upload settings
|
||||
MAX_CONTENT_LENGTH = int(os.getenv("MAX_CONTENT_LENGTH_MB", 50)) * 1024 * 1024
|
||||
UPLOAD_FOLDER = os.getenv("UPLOAD_FOLDER", "/tmp/uploads")
|
||||
OUTPUT_FOLDER = os.getenv("OUTPUT_FOLDER", "/tmp/outputs")
|
||||
FILE_EXPIRY_SECONDS = int(os.getenv("FILE_EXPIRY_SECONDS", 1800))
|
||||
|
||||
# Allowed file extensions and MIME types
|
||||
ALLOWED_EXTENSIONS = {
|
||||
"pdf": ["application/pdf"],
|
||||
"doc": ["application/msword"],
|
||||
"docx": [
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
],
|
||||
"png": ["image/png"],
|
||||
"jpg": ["image/jpeg"],
|
||||
"jpeg": ["image/jpeg"],
|
||||
"webp": ["image/webp"],
|
||||
"mp4": ["video/mp4"],
|
||||
"webm": ["video/webm"],
|
||||
}
|
||||
|
||||
# File size limits per type (bytes)
|
||||
FILE_SIZE_LIMITS = {
|
||||
"pdf": 20 * 1024 * 1024, # 20MB
|
||||
"doc": 15 * 1024 * 1024, # 15MB
|
||||
"docx": 15 * 1024 * 1024, # 15MB
|
||||
"png": 10 * 1024 * 1024, # 10MB
|
||||
"jpg": 10 * 1024 * 1024, # 10MB
|
||||
"jpeg": 10 * 1024 * 1024, # 10MB
|
||||
"webp": 10 * 1024 * 1024, # 10MB
|
||||
"mp4": 50 * 1024 * 1024, # 50MB
|
||||
"webm": 50 * 1024 * 1024, # 50MB
|
||||
}
|
||||
|
||||
# Redis
|
||||
REDIS_URL = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
|
||||
# Celery
|
||||
CELERY_BROKER_URL = os.getenv("CELERY_BROKER_URL", "redis://redis:6379/0")
|
||||
CELERY_RESULT_BACKEND = os.getenv("CELERY_RESULT_BACKEND", "redis://redis:6379/1")
|
||||
|
||||
# AWS S3
|
||||
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
|
||||
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
|
||||
AWS_S3_BUCKET = os.getenv("AWS_S3_BUCKET", "saas-pdf-temp-files")
|
||||
AWS_S3_REGION = os.getenv("AWS_S3_REGION", "eu-west-1")
|
||||
|
||||
# CORS
|
||||
CORS_ORIGINS = os.getenv("CORS_ORIGINS", "http://localhost:5173").split(",")
|
||||
|
||||
# Rate Limiting
|
||||
RATELIMIT_STORAGE_URI = os.getenv("REDIS_URL", "redis://redis:6379/0")
|
||||
RATELIMIT_DEFAULT = "100/hour"
|
||||
|
||||
|
||||
class DevelopmentConfig(BaseConfig):
|
||||
"""Development configuration."""
|
||||
DEBUG = True
|
||||
TESTING = False
|
||||
|
||||
|
||||
class ProductionConfig(BaseConfig):
|
||||
"""Production configuration."""
|
||||
DEBUG = False
|
||||
TESTING = False
|
||||
# Stricter rate limits in production
|
||||
RATELIMIT_DEFAULT = "60/hour"
|
||||
|
||||
|
||||
class TestingConfig(BaseConfig):
|
||||
"""Testing configuration."""
|
||||
DEBUG = True
|
||||
TESTING = True
|
||||
UPLOAD_FOLDER = "/tmp/test_uploads"
|
||||
OUTPUT_FOLDER = "/tmp/test_outputs"
|
||||
|
||||
|
||||
config = {
|
||||
"development": DevelopmentConfig,
|
||||
"production": ProductionConfig,
|
||||
"testing": TestingConfig,
|
||||
"default": DevelopmentConfig,
|
||||
}
|
||||
27
backend/requirements.txt
Normal file
27
backend/requirements.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
# Core Framework
|
||||
flask>=3.0,<4.0
|
||||
flask-cors>=4.0,<5.0
|
||||
flask-limiter[redis]>=3.5,<4.0
|
||||
flask-talisman>=1.1,<2.0
|
||||
gunicorn>=22.0,<23.0
|
||||
python-dotenv>=1.0,<2.0
|
||||
|
||||
# Task Queue
|
||||
celery[redis]>=5.3,<6.0
|
||||
redis>=5.0,<6.0
|
||||
flower>=2.0,<3.0
|
||||
|
||||
# File Processing
|
||||
Pillow>=10.0,<11.0
|
||||
python-magic>=0.4.27,<1.0
|
||||
ffmpeg-python>=0.2,<1.0
|
||||
|
||||
# AWS
|
||||
boto3>=1.34,<2.0
|
||||
|
||||
# Security
|
||||
werkzeug>=3.0,<4.0
|
||||
|
||||
# Testing
|
||||
pytest>=8.0,<9.0
|
||||
pytest-flask>=1.3,<2.0
|
||||
0
backend/tests/__init__.py
Normal file
0
backend/tests/__init__.py
Normal file
26
backend/tests/conftest.py
Normal file
26
backend/tests/conftest.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import os
|
||||
import pytest
|
||||
from app import create_app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app():
|
||||
"""Create application for testing."""
|
||||
os.environ['FLASK_ENV'] = 'testing'
|
||||
app = create_app()
|
||||
app.config.update({
|
||||
'TESTING': True,
|
||||
})
|
||||
yield app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(app):
|
||||
"""Flask test client."""
|
||||
return app.test_client()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def runner(app):
|
||||
"""Flask test CLI runner."""
|
||||
return app.test_cli_runner()
|
||||
21
backend/tests/test_compress.py
Normal file
21
backend/tests/test_compress.py
Normal file
@@ -0,0 +1,21 @@
|
||||
"""Tests for PDF compression endpoint."""
|
||||
import io
|
||||
|
||||
|
||||
def test_compress_pdf_no_file(client):
|
||||
"""POST /api/compress/pdf without file should return 400."""
|
||||
response = client.post('/api/compress/pdf')
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_compress_pdf_wrong_extension(client):
|
||||
"""POST /api/compress/pdf with non-PDF should return 400."""
|
||||
data = {
|
||||
'file': (io.BytesIO(b'hello'), 'test.docx'),
|
||||
}
|
||||
response = client.post(
|
||||
'/api/compress/pdf',
|
||||
data=data,
|
||||
content_type='multipart/form-data',
|
||||
)
|
||||
assert response.status_code == 400
|
||||
42
backend/tests/test_convert.py
Normal file
42
backend/tests/test_convert.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""Tests for file conversion endpoints."""
|
||||
import io
|
||||
|
||||
|
||||
def test_pdf_to_word_no_file(client):
|
||||
"""POST /api/convert/pdf-to-word without file should return 400."""
|
||||
response = client.post('/api/convert/pdf-to-word')
|
||||
assert response.status_code == 400
|
||||
data = response.get_json()
|
||||
assert 'error' in data
|
||||
|
||||
|
||||
def test_pdf_to_word_wrong_extension(client):
|
||||
"""POST /api/convert/pdf-to-word with non-PDF should return 400."""
|
||||
data = {
|
||||
'file': (io.BytesIO(b'hello world'), 'test.txt'),
|
||||
}
|
||||
response = client.post(
|
||||
'/api/convert/pdf-to-word',
|
||||
data=data,
|
||||
content_type='multipart/form-data',
|
||||
)
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_word_to_pdf_no_file(client):
|
||||
"""POST /api/convert/word-to-pdf without file should return 400."""
|
||||
response = client.post('/api/convert/word-to-pdf')
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_word_to_pdf_wrong_extension(client):
|
||||
"""POST /api/convert/word-to-pdf with non-Word file should return 400."""
|
||||
data = {
|
||||
'file': (io.BytesIO(b'hello world'), 'test.pdf'),
|
||||
}
|
||||
response = client.post(
|
||||
'/api/convert/word-to-pdf',
|
||||
data=data,
|
||||
content_type='multipart/form-data',
|
||||
)
|
||||
assert response.status_code == 400
|
||||
15
backend/tests/test_health.py
Normal file
15
backend/tests/test_health.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""Tests for health check and app creation."""
|
||||
|
||||
|
||||
def test_health_endpoint(client):
|
||||
"""GET /api/health should return 200."""
|
||||
response = client.get('/api/health')
|
||||
assert response.status_code == 200
|
||||
data = response.get_json()
|
||||
assert data['status'] == 'healthy'
|
||||
|
||||
|
||||
def test_app_creates(app):
|
||||
"""App should create without errors."""
|
||||
assert app is not None
|
||||
assert app.config['TESTING'] is True
|
||||
27
backend/tests/test_image.py
Normal file
27
backend/tests/test_image.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Tests for image conversion & resize endpoints."""
|
||||
import io
|
||||
|
||||
|
||||
def test_image_convert_no_file(client):
|
||||
"""POST /api/image/convert without file should return 400."""
|
||||
response = client.post('/api/image/convert')
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_image_resize_no_file(client):
|
||||
"""POST /api/image/resize without file should return 400."""
|
||||
response = client.post('/api/image/resize')
|
||||
assert response.status_code == 400
|
||||
|
||||
|
||||
def test_image_convert_wrong_type(client):
|
||||
"""POST /api/image/convert with non-image should return 400."""
|
||||
data = {
|
||||
'file': (io.BytesIO(b'not an image'), 'test.pdf'),
|
||||
}
|
||||
response = client.post(
|
||||
'/api/image/convert',
|
||||
data=data,
|
||||
content_type='multipart/form-data',
|
||||
)
|
||||
assert response.status_code == 400
|
||||
19
backend/tests/test_utils.py
Normal file
19
backend/tests/test_utils.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Tests for text utility functions."""
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add backend to path so we can import utils directly
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
from app.utils.file_validator import validate_file
|
||||
from app.utils.sanitizer import generate_safe_path
|
||||
|
||||
|
||||
def test_generate_safe_path():
|
||||
"""generate_safe_path should produce UUID-based path."""
|
||||
path = generate_safe_path('uploads', 'test.pdf')
|
||||
assert path.startswith('uploads')
|
||||
assert path.endswith('.pdf')
|
||||
# Should contain a UUID directory
|
||||
parts = path.replace('\\', '/').split('/')
|
||||
assert len(parts) >= 3 # uploads / uuid / filename.pdf
|
||||
7
backend/wsgi.py
Normal file
7
backend/wsgi.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""WSGI entry point for Gunicorn."""
|
||||
from app import create_app
|
||||
|
||||
app = create_app()
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=5000)
|
||||
Reference in New Issue
Block a user