Files
SaaS-PDF/backend/app/tasks/pdf_ai_tasks.py
Your Name 314f847ece fix: Add scrollable container to ToolSelectorModal for small screens
- Add max-h-[90vh] and flex-col to modal content container
- Wrap tools grid in max-h-[50vh] overflow-y-auto container
- Add overscroll-contain for smooth scroll behavior on mobile
- Fixes issue where 21 PDF tools overflow viewport on small screens
2026-04-01 22:22:48 +02:00

360 lines
11 KiB
Python

"""Celery tasks for PDF AI tools — Chat, Summarize, Translate, Table Extract."""
import os
import logging
import json
from flask import current_app
from app.extensions import celery
from app.services.pdf_ai_service import (
chat_with_pdf,
summarize_pdf,
translate_pdf,
extract_tables,
PdfAiError,
)
from app.services.task_tracking_service import finalize_task_tracking
from app.services.translation_guardrails import (
get_cached_translation,
store_cached_translation,
)
from app.utils.sanitizer import cleanup_task_files
logger = logging.getLogger(__name__)
def _cleanup(task_id: str):
cleanup_task_files(task_id, keep_outputs=False)
def _build_pdf_ai_error_payload(task_id: str, error: PdfAiError, tool: str) -> dict:
"""Build a normalized error payload for AI tasks and emit structured logs."""
payload = {
"status": "failed",
"error_code": getattr(error, "error_code", "PDF_AI_ERROR"),
"user_message": getattr(error, "user_message", str(error))
or "AI processing failed.",
"task_id": task_id,
}
detail = getattr(error, "detail", None)
if detail:
payload["detail"] = detail
logger.error(
json.dumps(
{
"event": "pdf_ai_task_failed",
"tool": tool,
"task_id": task_id,
"error_code": payload["error_code"],
"user_message": payload["user_message"],
"detail": detail,
},
ensure_ascii=False,
)
)
return payload
# ---------------------------------------------------------------------------
# Chat with PDF
# ---------------------------------------------------------------------------
@celery.task(bind=True, name="app.tasks.pdf_ai_tasks.chat_with_pdf_task")
def chat_with_pdf_task(
self,
input_path: str,
task_id: str,
original_filename: str,
question: str,
user_id: int | None = None,
usage_source: str = "web",
api_key_id: int | None = None,
):
"""Ask a question about a PDF document."""
try:
self.update_state(state="PROCESSING", meta={"step": "Analyzing document..."})
data = chat_with_pdf(input_path, question)
result = {
"status": "completed",
"reply": data["reply"],
"pages_analyzed": data["pages_analyzed"],
}
logger.info(f"Task {task_id}: Chat with PDF completed")
finalize_task_tracking(
user_id=user_id,
tool="chat-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except PdfAiError as e:
result = _build_pdf_ai_error_payload(task_id, e, "chat-pdf")
finalize_task_tracking(
user_id=user_id,
tool="chat-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except Exception as e:
logger.error(f"Task {task_id}: Unexpected error — {e}")
result = {"status": "failed", "error": "An unexpected error occurred."}
finalize_task_tracking(
user_id=user_id,
tool="chat-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
# ---------------------------------------------------------------------------
# Summarize PDF
# ---------------------------------------------------------------------------
@celery.task(bind=True, name="app.tasks.pdf_ai_tasks.summarize_pdf_task")
def summarize_pdf_task(
self,
input_path: str,
task_id: str,
original_filename: str,
length: str = "medium",
user_id: int | None = None,
usage_source: str = "web",
api_key_id: int | None = None,
):
"""Generate a summary of a PDF document."""
try:
self.update_state(state="PROCESSING", meta={"step": "Summarizing document..."})
data = summarize_pdf(input_path, length)
result = {
"status": "completed",
"summary": data["summary"],
"pages_analyzed": data["pages_analyzed"],
}
logger.info(f"Task {task_id}: PDF summarize completed")
finalize_task_tracking(
user_id=user_id,
tool="summarize-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except PdfAiError as e:
result = _build_pdf_ai_error_payload(task_id, e, "summarize-pdf")
finalize_task_tracking(
user_id=user_id,
tool="summarize-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except Exception as e:
logger.error(f"Task {task_id}: Unexpected error — {e}")
result = {"status": "failed", "error": "An unexpected error occurred."}
finalize_task_tracking(
user_id=user_id,
tool="summarize-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
# ---------------------------------------------------------------------------
# Translate PDF
# ---------------------------------------------------------------------------
@celery.task(bind=True, name="app.tasks.pdf_ai_tasks.translate_pdf_task")
def translate_pdf_task(
self,
input_path: str,
task_id: str,
original_filename: str,
target_language: str,
source_language: str | None = None,
user_id: int | None = None,
usage_source: str = "web",
api_key_id: int | None = None,
):
"""Translate a PDF document to another language."""
try:
self.update_state(
state="PROCESSING",
meta={"step": "Translating document with provider fallback..."},
)
# ── Cache lookup — skip AI call if identical translation exists ──
cached = get_cached_translation(
input_path, target_language, source_language or "auto"
)
if cached is not None:
data = cached
data["provider"] = f"{data.get('provider', 'unknown')} (cached)"
else:
data = translate_pdf(
input_path, target_language, source_language=source_language
)
# Store successful result for future cache hits
store_cached_translation(
input_path,
target_language,
source_language or "auto",
data,
)
result = {
"status": "completed",
"translation": data["translation"],
"pages_analyzed": data["pages_analyzed"],
"target_language": data["target_language"],
"source_language": data.get("source_language"),
"detected_source_language": data.get("detected_source_language"),
"provider": data.get("provider"),
"chunks_translated": data.get("chunks_translated"),
}
logger.info(f"Task {task_id}: PDF translate completed")
finalize_task_tracking(
user_id=user_id,
tool="translate-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except PdfAiError as e:
result = _build_pdf_ai_error_payload(task_id, e, "translate-pdf")
finalize_task_tracking(
user_id=user_id,
tool="translate-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except Exception as e:
logger.error(f"Task {task_id}: Unexpected error — {e}")
result = {"status": "failed", "error": "An unexpected error occurred."}
finalize_task_tracking(
user_id=user_id,
tool="translate-pdf",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
# ---------------------------------------------------------------------------
# Extract Tables
# ---------------------------------------------------------------------------
@celery.task(bind=True, name="app.tasks.pdf_ai_tasks.extract_tables_task")
def extract_tables_task(
self,
input_path: str,
task_id: str,
original_filename: str,
user_id: int | None = None,
usage_source: str = "web",
api_key_id: int | None = None,
):
"""Extract tables from a PDF document."""
try:
self.update_state(state="PROCESSING", meta={"step": "Extracting tables..."})
data = extract_tables(input_path)
result = {
"status": "completed",
"tables": data["tables"],
"tables_found": data["tables_found"],
}
logger.info(f"Task {task_id}: Table extraction completed")
finalize_task_tracking(
user_id=user_id,
tool="extract-tables",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except PdfAiError as e:
result = _build_pdf_ai_error_payload(task_id, e, "extract-tables")
finalize_task_tracking(
user_id=user_id,
tool="extract-tables",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result
except Exception as e:
logger.error(f"Task {task_id}: Unexpected error — {e}")
result = {"status": "failed", "error": "An unexpected error occurred."}
finalize_task_tracking(
user_id=user_id,
tool="extract-tables",
original_filename=original_filename,
result=result,
usage_source=usage_source,
api_key_id=api_key_id,
celery_task_id=self.request.id,
)
_cleanup(task_id)
return result