feat: Enhance task access control and session management

- Implemented API and web task access assertions in the task status polling endpoint.
- Added functions to remember and check task access in user sessions.
- Updated task status tests to validate access control based on session data.
- Enhanced download route tests to ensure proper access checks.
- Improved SEO metadata handling with dynamic social preview images.
- Updated sitemap generation to include blog posts and new tools.
- Added a social preview SVG for better sharing on social media platforms.
This commit is contained in:
Your Name
2026-03-17 21:19:23 +02:00
parent ff5bd19335
commit 3f24a7ea3e
17 changed files with 384 additions and 75 deletions

View File

@@ -3,6 +3,14 @@ import os
from flask import Blueprint, send_file, abort, request, current_app
from app.services.policy_service import (
PolicyError,
assert_api_task_access,
assert_web_task_access,
resolve_api_actor,
resolve_web_actor,
)
download_bp = Blueprint("download", __name__)
@@ -20,6 +28,16 @@ def download_file(task_id: str, filename: str):
if ".." in filename or "/" in filename or "\\" in filename:
abort(400, "Invalid filename.")
try:
if request.headers.get("X-API-Key", "").strip():
actor = resolve_api_actor()
assert_api_task_access(actor, task_id)
else:
actor = resolve_web_actor()
assert_web_task_access(actor, task_id)
except PolicyError as exc:
abort(exc.status_code, exc.message)
output_dir = current_app.config["OUTPUT_FOLDER"]
file_path = os.path.join(output_dir, task_id, filename)

View File

@@ -1,9 +1,16 @@
"""Task status polling endpoint."""
from flask import Blueprint, jsonify
from flask import Blueprint, jsonify, request
from celery.result import AsyncResult
from app.extensions import celery
from app.middleware.rate_limiter import limiter
from app.services.policy_service import (
PolicyError,
assert_api_task_access,
assert_web_task_access,
resolve_api_actor,
resolve_web_actor,
)
tasks_bp = Blueprint("tasks", __name__)
@@ -17,6 +24,16 @@ def get_task_status(task_id: str):
Returns:
JSON with task state and result (if completed)
"""
try:
if request.headers.get("X-API-Key", "").strip():
actor = resolve_api_actor()
assert_api_task_access(actor, task_id)
else:
actor = resolve_web_actor()
assert_web_task_access(actor, task_id)
except PolicyError as exc:
return jsonify({"error": exc.message}), exc.status_code
result = AsyncResult(task_id, app=celery)
response = {

View File

@@ -13,6 +13,7 @@ from app.services.account_service import (
record_usage_event,
)
from app.utils.auth import get_current_user_id, logout_user_session
from app.utils.auth import has_session_task_access, remember_task_access
from app.utils.file_validator import validate_file
FREE_PLAN = "free"
@@ -202,6 +203,9 @@ def assert_quota_available(actor: ActorContext):
def record_accepted_usage(actor: ActorContext, tool: str, celery_task_id: str):
"""Record one accepted usage event after task dispatch succeeds."""
if actor.source == "web":
remember_task_access(celery_task_id)
record_usage_event(
user_id=actor.user_id,
source=actor.source,
@@ -225,3 +229,14 @@ def assert_api_task_access(actor: ActorContext, task_id: str):
"""Ensure one API actor can poll one task id."""
if actor.user_id is None or not has_task_access(actor.user_id, "api", task_id):
raise PolicyError("Task not found.", 404)
def assert_web_task_access(actor: ActorContext, task_id: str):
"""Ensure one web browser session can access one task id."""
if actor.user_id is not None and has_task_access(actor.user_id, "web", task_id):
return
if has_session_task_access(task_id):
return
raise PolicyError("Task not found.", 404)

View File

@@ -1,6 +1,9 @@
"""Session helpers for authenticated routes."""
from flask import session
TASK_ACCESS_SESSION_KEY = "task_access_ids"
MAX_TRACKED_TASK_IDS = 200
def get_current_user_id() -> int | None:
"""Return the authenticated user id from session storage."""
@@ -8,11 +11,31 @@ def get_current_user_id() -> int | None:
return user_id if isinstance(user_id, int) else None
def remember_task_access(task_id: str):
"""Persist one web task id in the active browser session."""
tracked = session.get(TASK_ACCESS_SESSION_KEY, [])
if not isinstance(tracked, list):
tracked = []
normalized = [value for value in tracked if isinstance(value, str) and value != task_id]
normalized.append(task_id)
session[TASK_ACCESS_SESSION_KEY] = normalized[-MAX_TRACKED_TASK_IDS:]
def has_session_task_access(task_id: str) -> bool:
"""Return whether the active browser session owns one web task id."""
tracked = session.get(TASK_ACCESS_SESSION_KEY, [])
return isinstance(tracked, list) and task_id in tracked
def login_user_session(user_id: int):
"""Persist the authenticated user in the Flask session."""
tracked_task_ids = session.get(TASK_ACCESS_SESSION_KEY, [])
session.clear()
session.permanent = True
session["user_id"] = user_id
if isinstance(tracked_task_ids, list) and tracked_task_ids:
session[TASK_ACCESS_SESSION_KEY] = tracked_task_ids[-MAX_TRACKED_TASK_IDS:]
def logout_user_session():

View File

@@ -1,6 +1,8 @@
"""Tests for file download route."""
import os
from app.utils.auth import TASK_ACCESS_SESSION_KEY
class TestDownload:
def test_download_nonexistent_file(self, client):
@@ -31,6 +33,9 @@ class TestDownload:
with open(file_path, 'wb') as f:
f.write(b'%PDF-1.4 test content')
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = [task_id]
response = client.get(f'/api/download/{task_id}/{filename}')
assert response.status_code == 200
assert response.data == b'%PDF-1.4 test content'
@@ -45,5 +50,21 @@ class TestDownload:
with open(os.path.join(output_dir, filename), 'wb') as f:
f.write(b'%PDF-1.4')
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = [task_id]
response = client.get(f'/api/download/{task_id}/{filename}?name=my-document.pdf')
assert response.status_code == 200
assert response.status_code == 200
def test_download_requires_task_access(self, client, app):
"""Should not serve an existing file without session or API ownership."""
task_id = 'protected-download-id'
filename = 'output.pdf'
output_dir = os.path.join(app.config['OUTPUT_FOLDER'], task_id)
os.makedirs(output_dir, exist_ok=True)
with open(os.path.join(output_dir, filename), 'wb') as f:
f.write(b'%PDF-1.4 protected')
response = client.get(f'/api/download/{task_id}/{filename}')
assert response.status_code == 404

View File

@@ -1,6 +1,8 @@
"""Tests for task status polling route."""
from unittest.mock import patch, MagicMock
from app.utils.auth import TASK_ACCESS_SESSION_KEY
class TestTaskStatus:
def test_pending_task(self, client, monkeypatch):
@@ -9,6 +11,9 @@ class TestTaskStatus:
mock_result.state = 'PENDING'
mock_result.info = None
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = ['test-task-id']
with patch('app.routes.tasks.AsyncResult', return_value=mock_result):
response = client.get('/api/tasks/test-task-id/status')
@@ -24,6 +29,9 @@ class TestTaskStatus:
mock_result.state = 'PROCESSING'
mock_result.info = {'step': 'Converting page 3 of 10...'}
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = ['processing-id']
with patch('app.routes.tasks.AsyncResult', return_value=mock_result):
response = client.get('/api/tasks/processing-id/status')
@@ -42,6 +50,9 @@ class TestTaskStatus:
'filename': 'output.pdf',
}
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = ['success-id']
with patch('app.routes.tasks.AsyncResult', return_value=mock_result):
response = client.get('/api/tasks/success-id/status')
@@ -57,10 +68,19 @@ class TestTaskStatus:
mock_result.state = 'FAILURE'
mock_result.info = Exception('Conversion failed due to corrupt PDF.')
with client.session_transaction() as session:
session[TASK_ACCESS_SESSION_KEY] = ['failed-id']
with patch('app.routes.tasks.AsyncResult', return_value=mock_result):
response = client.get('/api/tasks/failed-id/status')
assert response.status_code == 200
data = response.get_json()
assert data['state'] == 'FAILURE'
assert 'error' in data
assert 'error' in data
def test_unknown_task_without_access_returns_404(self, client):
"""Should not expose task state without session or API ownership."""
response = client.get('/api/tasks/unknown-task/status')
assert response.status_code == 404