feat: Initialize frontend with React, Vite, and Tailwind CSS
- Set up main entry point for React application. - Create About, Home, NotFound, Privacy, and Terms pages with SEO support. - Implement API service for file uploads and task management. - Add global styles using Tailwind CSS. - Create utility functions for SEO and text processing. - Configure Vite for development and production builds. - Set up Nginx configuration for serving frontend and backend. - Add scripts for cleanup of expired files and sitemap generation. - Implement deployment script for production environment.
This commit is contained in:
1
backend/app/utils/__init__.py
Normal file
1
backend/app/utils/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application utilities."""
|
||||
31
backend/app/utils/cleanup.py
Normal file
31
backend/app/utils/cleanup.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Scheduled cleanup of expired temporary files."""
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def cleanup_expired_files():
|
||||
"""Remove files older than FILE_EXPIRY_SECONDS from upload/output dirs."""
|
||||
expiry = current_app.config.get("FILE_EXPIRY_SECONDS", 1800)
|
||||
now = time.time()
|
||||
removed_count = 0
|
||||
|
||||
for folder_key in ["UPLOAD_FOLDER", "OUTPUT_FOLDER"]:
|
||||
folder = current_app.config.get(folder_key)
|
||||
if not folder or not os.path.exists(folder):
|
||||
continue
|
||||
|
||||
for task_dir_name in os.listdir(folder):
|
||||
task_dir = os.path.join(folder, task_dir_name)
|
||||
if not os.path.isdir(task_dir):
|
||||
continue
|
||||
|
||||
# Check directory age based on modification time
|
||||
dir_mtime = os.path.getmtime(task_dir)
|
||||
if now - dir_mtime > expiry:
|
||||
shutil.rmtree(task_dir, ignore_errors=True)
|
||||
removed_count += 1
|
||||
|
||||
return removed_count
|
||||
111
backend/app/utils/file_validator.py
Normal file
111
backend/app/utils/file_validator.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""File validation utilities — multi-layer security checks."""
|
||||
import os
|
||||
|
||||
import magic
|
||||
from flask import current_app
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
|
||||
class FileValidationError(Exception):
|
||||
"""Custom exception for file validation failures."""
|
||||
|
||||
def __init__(self, message: str, code: int = 400):
|
||||
self.message = message
|
||||
self.code = code
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
def validate_file(file_storage, allowed_types: list[str] | None = None):
|
||||
"""
|
||||
Validate an uploaded file through multiple security layers.
|
||||
|
||||
Args:
|
||||
file_storage: Flask FileStorage object from request.files
|
||||
allowed_types: List of allowed extensions (e.g., ["pdf", "docx"]).
|
||||
If None, uses all allowed extensions from config.
|
||||
|
||||
Returns:
|
||||
tuple: (sanitized_filename, detected_extension)
|
||||
|
||||
Raises:
|
||||
FileValidationError: If validation fails at any layer.
|
||||
"""
|
||||
config = current_app.config
|
||||
|
||||
# Layer 1: Check if file exists and has a filename
|
||||
if not file_storage or file_storage.filename == "":
|
||||
raise FileValidationError("No file provided.")
|
||||
|
||||
filename = secure_filename(file_storage.filename)
|
||||
if not filename:
|
||||
raise FileValidationError("Invalid filename.")
|
||||
|
||||
# Layer 2: Check file extension against whitelist
|
||||
ext = _get_extension(filename)
|
||||
allowed_extensions = config.get("ALLOWED_EXTENSIONS", {})
|
||||
|
||||
if allowed_types:
|
||||
valid_extensions = {k: v for k, v in allowed_extensions.items() if k in allowed_types}
|
||||
else:
|
||||
valid_extensions = allowed_extensions
|
||||
|
||||
if ext not in valid_extensions:
|
||||
raise FileValidationError(
|
||||
f"File type '.{ext}' is not allowed. "
|
||||
f"Allowed types: {', '.join(valid_extensions.keys())}"
|
||||
)
|
||||
|
||||
# Layer 3: Check file size against type-specific limits
|
||||
file_storage.seek(0, os.SEEK_END)
|
||||
file_size = file_storage.tell()
|
||||
file_storage.seek(0)
|
||||
|
||||
size_limits = config.get("FILE_SIZE_LIMITS", {})
|
||||
max_size = size_limits.get(ext, 20 * 1024 * 1024) # Default 20MB
|
||||
|
||||
if file_size > max_size:
|
||||
max_mb = max_size / (1024 * 1024)
|
||||
raise FileValidationError(
|
||||
f"File too large. Maximum size for .{ext} files is {max_mb:.0f}MB."
|
||||
)
|
||||
|
||||
if file_size == 0:
|
||||
raise FileValidationError("File is empty.")
|
||||
|
||||
# Layer 4: Check MIME type using magic bytes
|
||||
file_header = file_storage.read(8192)
|
||||
file_storage.seek(0)
|
||||
|
||||
detected_mime = magic.from_buffer(file_header, mime=True)
|
||||
expected_mimes = valid_extensions.get(ext, [])
|
||||
|
||||
if detected_mime not in expected_mimes:
|
||||
raise FileValidationError(
|
||||
f"File content does not match extension '.{ext}'. "
|
||||
f"Detected type: {detected_mime}"
|
||||
)
|
||||
|
||||
# Layer 5: Additional content checks for specific types
|
||||
if ext == "pdf":
|
||||
_check_pdf_safety(file_header)
|
||||
|
||||
return filename, ext
|
||||
|
||||
|
||||
def _get_extension(filename: str) -> str:
|
||||
"""Extract and normalize file extension."""
|
||||
if "." not in filename:
|
||||
return ""
|
||||
return filename.rsplit(".", 1)[1].lower()
|
||||
|
||||
|
||||
def _check_pdf_safety(file_header: bytes):
|
||||
"""Check PDF for potentially dangerous embedded content."""
|
||||
dangerous_patterns = [b"/JS", b"/JavaScript", b"/Launch", b"/EmbeddedFile"]
|
||||
header_str = file_header
|
||||
|
||||
for pattern in dangerous_patterns:
|
||||
if pattern in header_str:
|
||||
raise FileValidationError(
|
||||
"PDF contains potentially unsafe content (embedded scripts)."
|
||||
)
|
||||
77
backend/app/utils/sanitizer.py
Normal file
77
backend/app/utils/sanitizer.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""Filename sanitization and temporary file management."""
|
||||
import os
|
||||
import uuid
|
||||
|
||||
from flask import current_app
|
||||
|
||||
|
||||
def generate_safe_path(extension: str, folder_type: str = "upload") -> tuple[str, str]:
|
||||
"""
|
||||
Generate a safe file path using UUID.
|
||||
|
||||
Args:
|
||||
extension: File extension (without dot)
|
||||
folder_type: "upload" for input files, "output" for processed files
|
||||
|
||||
Returns:
|
||||
tuple: (task_id, full_file_path)
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
if folder_type == "upload":
|
||||
base_dir = current_app.config["UPLOAD_FOLDER"]
|
||||
else:
|
||||
base_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
|
||||
# Create task-specific directory
|
||||
task_dir = os.path.join(base_dir, task_id)
|
||||
os.makedirs(task_dir, exist_ok=True)
|
||||
|
||||
filename = f"{task_id}.{extension}"
|
||||
file_path = os.path.join(task_dir, filename)
|
||||
|
||||
return task_id, file_path
|
||||
|
||||
|
||||
def get_output_path(task_id: str, extension: str) -> str:
|
||||
"""
|
||||
Get the output file path for a processed file.
|
||||
|
||||
Args:
|
||||
task_id: The task UUID
|
||||
extension: Output file extension
|
||||
|
||||
Returns:
|
||||
Full output file path
|
||||
"""
|
||||
output_dir = current_app.config["OUTPUT_FOLDER"]
|
||||
task_dir = os.path.join(output_dir, task_id)
|
||||
os.makedirs(task_dir, exist_ok=True)
|
||||
|
||||
filename = f"{task_id}.{extension}"
|
||||
return os.path.join(task_dir, filename)
|
||||
|
||||
|
||||
def cleanup_task_files(task_id: str, keep_outputs: bool = False):
|
||||
"""
|
||||
Remove temporary files for a given task.
|
||||
|
||||
Args:
|
||||
task_id: The task UUID
|
||||
keep_outputs: If True, only clean uploads (used in local storage mode)
|
||||
"""
|
||||
import shutil
|
||||
|
||||
upload_dir = current_app.config.get("UPLOAD_FOLDER", "/tmp/uploads")
|
||||
output_dir = current_app.config.get("OUTPUT_FOLDER", "/tmp/outputs")
|
||||
|
||||
# Always clean uploads
|
||||
upload_task_dir = os.path.join(upload_dir, task_id)
|
||||
if os.path.exists(upload_task_dir):
|
||||
shutil.rmtree(upload_task_dir, ignore_errors=True)
|
||||
|
||||
# Only clean outputs when using S3 (files already uploaded to S3)
|
||||
if not keep_outputs:
|
||||
output_task_dir = os.path.join(output_dir, task_id)
|
||||
if os.path.exists(output_task_dir):
|
||||
shutil.rmtree(output_task_dir, ignore_errors=True)
|
||||
Reference in New Issue
Block a user