feat: harden PDF translation workflow
This commit is contained in:
@@ -20,6 +20,11 @@ OPENROUTER_API_KEY=
|
||||
OPENROUTER_MODEL=nvidia/nemotron-3-super-120b-a12b:free
|
||||
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
|
||||
|
||||
# Premium document translation (recommended for Translate PDF)
|
||||
DEEPL_API_KEY=
|
||||
DEEPL_API_URL=https://api-free.deepl.com/v2/translate
|
||||
DEEPL_TIMEOUT_SECONDS=90
|
||||
|
||||
# AWS S3
|
||||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""PDF AI tool routes — Chat, Summarize, Translate, Table Extract."""
|
||||
|
||||
from flask import Blueprint, request, jsonify
|
||||
|
||||
from app.extensions import limiter
|
||||
@@ -70,10 +71,12 @@ def chat_pdf_route():
|
||||
)
|
||||
record_accepted_usage(actor, "chat-pdf", task.id)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Processing your question. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
return jsonify(
|
||||
{
|
||||
"task_id": task.id,
|
||||
"message": "Processing your question. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}
|
||||
), 202
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -124,10 +127,12 @@ def summarize_pdf_route():
|
||||
)
|
||||
record_accepted_usage(actor, "summarize-pdf", task.id)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Summarizing document. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
return jsonify(
|
||||
{
|
||||
"task_id": task.id,
|
||||
"message": "Summarizing document. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}
|
||||
), 202
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -149,6 +154,7 @@ def translate_pdf_route():
|
||||
|
||||
file = request.files["file"]
|
||||
target_language = request.form.get("target_language", "").strip()
|
||||
source_language = request.form.get("source_language", "auto").strip()
|
||||
|
||||
if not target_language:
|
||||
return jsonify({"error": "No target language specified."}), 400
|
||||
@@ -174,14 +180,17 @@ def translate_pdf_route():
|
||||
task_id,
|
||||
original_filename,
|
||||
target_language,
|
||||
source_language,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "translate-pdf", task.id)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Translating document. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
return jsonify(
|
||||
{
|
||||
"task_id": task.id,
|
||||
"message": "Translating document. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}
|
||||
), 202
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -226,7 +235,9 @@ def extract_tables_route():
|
||||
)
|
||||
record_accepted_usage(actor, "extract-tables", task.id)
|
||||
|
||||
return jsonify({
|
||||
"task_id": task.id,
|
||||
"message": "Extracting tables. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}), 202
|
||||
return jsonify(
|
||||
{
|
||||
"task_id": task.id,
|
||||
"message": "Extracting tables. Poll /api/tasks/{task_id}/status for progress.",
|
||||
}
|
||||
), 202
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""B2B API v1 tool routes — authenticated via X-API-Key, Pro plan only."""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import logging
|
||||
@@ -37,16 +38,25 @@ from app.tasks.flowchart_tasks import extract_flowchart_task
|
||||
from app.tasks.ocr_tasks import ocr_image_task, ocr_pdf_task
|
||||
from app.tasks.removebg_tasks import remove_bg_task
|
||||
from app.tasks.pdf_ai_tasks import (
|
||||
chat_with_pdf_task, summarize_pdf_task, translate_pdf_task, extract_tables_task,
|
||||
chat_with_pdf_task,
|
||||
summarize_pdf_task,
|
||||
translate_pdf_task,
|
||||
extract_tables_task,
|
||||
)
|
||||
from app.tasks.pdf_to_excel_tasks import pdf_to_excel_task
|
||||
from app.tasks.html_to_pdf_tasks import html_to_pdf_task
|
||||
from app.tasks.qrcode_tasks import generate_qr_task
|
||||
from app.tasks.pdf_convert_tasks import (
|
||||
pdf_to_pptx_task, excel_to_pdf_task, pptx_to_pdf_task, sign_pdf_task,
|
||||
pdf_to_pptx_task,
|
||||
excel_to_pdf_task,
|
||||
pptx_to_pdf_task,
|
||||
sign_pdf_task,
|
||||
)
|
||||
from app.tasks.pdf_extra_tasks import (
|
||||
crop_pdf_task, flatten_pdf_task, repair_pdf_task, edit_metadata_task,
|
||||
crop_pdf_task,
|
||||
flatten_pdf_task,
|
||||
repair_pdf_task,
|
||||
edit_metadata_task,
|
||||
)
|
||||
from app.tasks.image_extra_tasks import crop_image_task, rotate_flip_image_task
|
||||
from app.tasks.barcode_tasks import generate_barcode_task
|
||||
@@ -80,6 +90,7 @@ def _resolve_and_check() -> tuple:
|
||||
# Task status — GET /api/v1/tasks/<task_id>/status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/tasks/<task_id>/status", methods=["GET"])
|
||||
@limiter.limit("300/minute", override_defaults=True)
|
||||
def get_task_status(task_id: str):
|
||||
@@ -113,6 +124,7 @@ def get_task_status(task_id: str):
|
||||
# Compress — POST /api/v1/compress/pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/compress/pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def compress_pdf_route():
|
||||
@@ -130,7 +142,9 @@ def compress_pdf_route():
|
||||
quality = "medium"
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
@@ -138,7 +152,10 @@ def compress_pdf_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = compress_pdf_task.delay(
|
||||
input_path, task_id, original_filename, quality,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
quality,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "compress-pdf", task.id)
|
||||
@@ -150,6 +167,7 @@ def compress_pdf_route():
|
||||
# Convert — POST /api/v1/convert/pdf-to-word & /api/v1/convert/word-to-pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/pdf-to-word", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def pdf_to_word_route():
|
||||
@@ -163,7 +181,9 @@ def pdf_to_word_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
@@ -171,7 +191,9 @@ def pdf_to_word_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = convert_pdf_to_word.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pdf-to-word", task.id)
|
||||
@@ -201,7 +223,9 @@ def word_to_pdf_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = convert_word_to_pdf.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "word-to-pdf", task.id)
|
||||
@@ -212,6 +236,7 @@ def word_to_pdf_route():
|
||||
# Image — POST /api/v1/image/convert & /api/v1/image/resize
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/image/convert", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def convert_image_route():
|
||||
@@ -226,7 +251,9 @@ def convert_image_route():
|
||||
file = request.files["file"]
|
||||
output_format = request.form.get("format", "").lower()
|
||||
if output_format not in ALLOWED_OUTPUT_FORMATS:
|
||||
return jsonify({"error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}"}), 400
|
||||
return jsonify(
|
||||
{"error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}"}
|
||||
), 400
|
||||
|
||||
try:
|
||||
quality = max(1, min(100, int(request.form.get("quality", "85"))))
|
||||
@@ -244,7 +271,11 @@ def convert_image_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = convert_image_task.delay(
|
||||
input_path, task_id, original_filename, output_format, quality,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
output_format,
|
||||
quality,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "image-convert", task.id)
|
||||
@@ -292,7 +323,12 @@ def resize_image_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = resize_image_task.delay(
|
||||
input_path, task_id, original_filename, width, height, quality,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
width,
|
||||
height,
|
||||
quality,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "image-resize", task.id)
|
||||
@@ -303,6 +339,7 @@ def resize_image_route():
|
||||
# Video — POST /api/v1/video/to-gif
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/video/to-gif", methods=["POST"])
|
||||
@limiter.limit("5/minute")
|
||||
def video_to_gif_route():
|
||||
@@ -343,7 +380,13 @@ def video_to_gif_route():
|
||||
file.save(input_path)
|
||||
|
||||
task = create_gif_task.delay(
|
||||
input_path, task_id, original_filename, start_time, duration, fps, width,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
start_time,
|
||||
duration,
|
||||
fps,
|
||||
width,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "video-to-gif", task.id)
|
||||
@@ -354,6 +397,7 @@ def video_to_gif_route():
|
||||
# PDF Tools — all single-file and multi-file routes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/merge", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def merge_pdfs_route():
|
||||
@@ -372,7 +416,9 @@ def merge_pdfs_route():
|
||||
input_paths, original_filenames = [], []
|
||||
for f in files:
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(f, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
f, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"], task_id)
|
||||
@@ -383,7 +429,9 @@ def merge_pdfs_route():
|
||||
original_filenames.append(original_filename)
|
||||
|
||||
task = merge_pdfs_task.delay(
|
||||
input_paths, task_id, original_filenames,
|
||||
input_paths,
|
||||
task_id,
|
||||
original_filenames,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "merge-pdf", task.id)
|
||||
@@ -410,14 +458,20 @@ def split_pdf_route():
|
||||
return jsonify({"error": "Please specify which pages to extract."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = split_pdf_task.delay(
|
||||
input_path, task_id, original_filename, mode, pages,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
mode,
|
||||
pages,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "split-pdf", task.id)
|
||||
@@ -445,14 +499,20 @@ def rotate_pdf_route():
|
||||
pages = request.form.get("pages", "all")
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = rotate_pdf_task.delay(
|
||||
input_path, task_id, original_filename, rotation, pages,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
rotation,
|
||||
pages,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "rotate-pdf", task.id)
|
||||
@@ -473,8 +533,12 @@ def add_page_numbers_route():
|
||||
file = request.files["file"]
|
||||
position = request.form.get("position", "bottom-center")
|
||||
valid_positions = [
|
||||
"bottom-center", "bottom-right", "bottom-left",
|
||||
"top-center", "top-right", "top-left",
|
||||
"bottom-center",
|
||||
"bottom-right",
|
||||
"bottom-left",
|
||||
"top-center",
|
||||
"top-right",
|
||||
"top-left",
|
||||
]
|
||||
if position not in valid_positions:
|
||||
position = "bottom-center"
|
||||
@@ -484,14 +548,20 @@ def add_page_numbers_route():
|
||||
start_number = 1
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = add_page_numbers_task.delay(
|
||||
input_path, task_id, original_filename, position, start_number,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
position,
|
||||
start_number,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "page-numbers", task.id)
|
||||
@@ -519,14 +589,20 @@ def pdf_to_images_route():
|
||||
dpi = 200
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = pdf_to_images_task.delay(
|
||||
input_path, task_id, original_filename, output_format, dpi,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
output_format,
|
||||
dpi,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pdf-to-images", task.id)
|
||||
@@ -564,7 +640,9 @@ def images_to_pdf_route():
|
||||
original_filenames.append(original_filename)
|
||||
|
||||
task = images_to_pdf_task.delay(
|
||||
input_paths, task_id, original_filenames,
|
||||
input_paths,
|
||||
task_id,
|
||||
original_filenames,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "images-to-pdf", task.id)
|
||||
@@ -594,14 +672,20 @@ def watermark_pdf_route():
|
||||
opacity = 0.3
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = watermark_pdf_task.delay(
|
||||
input_path, task_id, original_filename, watermark_text, opacity,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
watermark_text,
|
||||
opacity,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "watermark-pdf", task.id)
|
||||
@@ -627,14 +711,19 @@ def protect_pdf_route():
|
||||
return jsonify({"error": "Password must be at least 4 characters."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = protect_pdf_task.delay(
|
||||
input_path, task_id, original_filename, password,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
password,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "protect-pdf", task.id)
|
||||
@@ -658,14 +747,19 @@ def unlock_pdf_route():
|
||||
return jsonify({"error": "Password is required."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = unlock_pdf_task.delay(
|
||||
input_path, task_id, original_filename, password,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
password,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "unlock-pdf", task.id)
|
||||
@@ -685,18 +779,24 @@ def extract_flowchart_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext)
|
||||
file.save(input_path)
|
||||
task = extract_flowchart_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pdf-flowchart", task.id)
|
||||
return jsonify({"task_id": task.id, "message": "Flowchart extraction started."}), 202
|
||||
return jsonify(
|
||||
{"task_id": task.id, "message": "Flowchart extraction started."}
|
||||
), 202
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
@@ -707,6 +807,7 @@ def extract_flowchart_route():
|
||||
# OCR — POST /api/v1/ocr/image & /api/v1/ocr/pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/ocr/image", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def ocr_image_route():
|
||||
@@ -731,7 +832,10 @@ def ocr_image_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = ocr_image_task.delay(
|
||||
input_path, task_id, original_filename, lang,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
lang,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "ocr-image", task.id)
|
||||
@@ -753,14 +857,19 @@ def ocr_pdf_route():
|
||||
lang = request.form.get("lang", "eng")
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = ocr_pdf_task.delay(
|
||||
input_path, task_id, original_filename, lang,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
lang,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "ocr-pdf", task.id)
|
||||
@@ -771,6 +880,7 @@ def ocr_pdf_route():
|
||||
# Remove Background — POST /api/v1/image/remove-bg
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/image/remove-bg", methods=["POST"])
|
||||
@limiter.limit("5/minute")
|
||||
def remove_bg_route():
|
||||
@@ -793,7 +903,9 @@ def remove_bg_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = remove_bg_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "remove-bg", task.id)
|
||||
@@ -804,6 +916,7 @@ def remove_bg_route():
|
||||
# PDF AI — POST /api/v1/pdf-ai/chat, summarize, translate, extract-tables
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-ai/chat", methods=["POST"])
|
||||
@limiter.limit("5/minute")
|
||||
def chat_pdf_route():
|
||||
@@ -821,14 +934,19 @@ def chat_pdf_route():
|
||||
return jsonify({"error": "Question is required."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = chat_with_pdf_task.delay(
|
||||
input_path, task_id, original_filename, question,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
question,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "chat-pdf", task.id)
|
||||
@@ -852,14 +970,19 @@ def summarize_pdf_route():
|
||||
length = "medium"
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = summarize_pdf_task.delay(
|
||||
input_path, task_id, original_filename, length,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
length,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "summarize-pdf", task.id)
|
||||
@@ -879,18 +1002,25 @@ def translate_pdf_route():
|
||||
|
||||
file = request.files["file"]
|
||||
target_language = request.form.get("target_language", "").strip()
|
||||
source_language = request.form.get("source_language", "auto").strip()
|
||||
if not target_language:
|
||||
return jsonify({"error": "Target language is required."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = translate_pdf_task.delay(
|
||||
input_path, task_id, original_filename, target_language,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
target_language,
|
||||
source_language,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "translate-pdf", task.id)
|
||||
@@ -910,14 +1040,18 @@ def extract_tables_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = extract_tables_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "extract-tables", task.id)
|
||||
@@ -928,6 +1062,7 @@ def extract_tables_route():
|
||||
# PDF to Excel — POST /api/v1/convert/pdf-to-excel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/pdf-to-excel", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def pdf_to_excel_route():
|
||||
@@ -941,14 +1076,18 @@ def pdf_to_excel_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = pdf_to_excel_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pdf-to-excel", task.id)
|
||||
@@ -959,6 +1098,7 @@ def pdf_to_excel_route():
|
||||
# HTML to PDF — POST /api/v1/convert/html-to-pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/html-to-pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def html_to_pdf_route():
|
||||
@@ -981,7 +1121,9 @@ def html_to_pdf_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = html_to_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "html-to-pdf", task.id)
|
||||
@@ -992,6 +1134,7 @@ def html_to_pdf_route():
|
||||
# QR Code — POST /api/v1/qrcode/generate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/qrcode/generate", methods=["POST"])
|
||||
@limiter.limit("20/minute")
|
||||
def generate_qr_route():
|
||||
@@ -1018,7 +1161,10 @@ def generate_qr_route():
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
task = generate_qr_task.delay(
|
||||
task_id, str(data).strip(), size, "png",
|
||||
task_id,
|
||||
str(data).strip(),
|
||||
size,
|
||||
"png",
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "qr-code", task.id)
|
||||
@@ -1033,6 +1179,7 @@ def generate_qr_route():
|
||||
# PDF to PowerPoint — POST /api/v1/convert/pdf-to-pptx
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/pdf-to-pptx", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_pdf_to_pptx_route():
|
||||
@@ -1046,14 +1193,18 @@ def v1_pdf_to_pptx_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = pdf_to_pptx_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pdf-to-pptx", task.id)
|
||||
@@ -1064,6 +1215,7 @@ def v1_pdf_to_pptx_route():
|
||||
# Excel to PDF — POST /api/v1/convert/excel-to-pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/excel-to-pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_excel_to_pdf_route():
|
||||
@@ -1086,7 +1238,9 @@ def v1_excel_to_pdf_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = excel_to_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "excel-to-pdf", task.id)
|
||||
@@ -1097,6 +1251,7 @@ def v1_excel_to_pdf_route():
|
||||
# PowerPoint to PDF — POST /api/v1/convert/pptx-to-pdf
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/convert/pptx-to-pdf", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_pptx_to_pdf_route():
|
||||
@@ -1119,7 +1274,9 @@ def v1_pptx_to_pdf_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = pptx_to_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "pptx-to-pdf", task.id)
|
||||
@@ -1130,6 +1287,7 @@ def v1_pptx_to_pdf_route():
|
||||
# Sign PDF — POST /api/v1/pdf-tools/sign
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/sign", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_sign_pdf_route():
|
||||
@@ -1147,12 +1305,16 @@ def v1_sign_pdf_route():
|
||||
sig_file = request.files["signature"]
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(pdf_file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
pdf_file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
try:
|
||||
_, sig_ext = validate_actor_file(sig_file, allowed_types=ALLOWED_IMAGE_TYPES, actor=actor)
|
||||
_, sig_ext = validate_actor_file(
|
||||
sig_file, allowed_types=ALLOWED_IMAGE_TYPES, actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": f"Signature: {e.message}"}), e.code
|
||||
|
||||
@@ -1174,8 +1336,15 @@ def v1_sign_pdf_route():
|
||||
sig_file.save(signature_path)
|
||||
|
||||
task = sign_pdf_task.delay(
|
||||
input_path, signature_path, task_id, original_filename,
|
||||
page, x, y, width, height,
|
||||
input_path,
|
||||
signature_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
page,
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "sign-pdf", task.id)
|
||||
@@ -1186,6 +1355,7 @@ def v1_sign_pdf_route():
|
||||
# Crop PDF — POST /api/v1/pdf-tools/crop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/crop", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_crop_pdf_route():
|
||||
@@ -1209,15 +1379,23 @@ def v1_crop_pdf_route():
|
||||
pages = request.form.get("pages", "all")
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = crop_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
margin_left, margin_right, margin_top, margin_bottom, pages,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
margin_left,
|
||||
margin_right,
|
||||
margin_top,
|
||||
margin_bottom,
|
||||
pages,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "crop-pdf", task.id)
|
||||
@@ -1228,6 +1406,7 @@ def v1_crop_pdf_route():
|
||||
# Flatten PDF — POST /api/v1/pdf-tools/flatten
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/flatten", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_flatten_pdf_route():
|
||||
@@ -1241,14 +1420,18 @@ def v1_flatten_pdf_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = flatten_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "flatten-pdf", task.id)
|
||||
@@ -1259,6 +1442,7 @@ def v1_flatten_pdf_route():
|
||||
# Repair PDF — POST /api/v1/pdf-tools/repair
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/repair", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_repair_pdf_route():
|
||||
@@ -1272,14 +1456,18 @@ def v1_repair_pdf_route():
|
||||
|
||||
file = request.files["file"]
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = repair_pdf_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "repair-pdf", task.id)
|
||||
@@ -1290,6 +1478,7 @@ def v1_repair_pdf_route():
|
||||
# Edit PDF Metadata — POST /api/v1/pdf-tools/metadata
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/pdf-tools/metadata", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_edit_metadata_route():
|
||||
@@ -1312,15 +1501,23 @@ def v1_edit_metadata_route():
|
||||
return jsonify({"error": "At least one metadata field required."}), 400
|
||||
|
||||
try:
|
||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
||||
original_filename, ext = validate_actor_file(
|
||||
file, allowed_types=["pdf"], actor=actor
|
||||
)
|
||||
except FileValidationError as e:
|
||||
return jsonify({"error": e.message}), e.code
|
||||
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = edit_metadata_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
title, author, subject, keywords, creator,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
title,
|
||||
author,
|
||||
subject,
|
||||
keywords,
|
||||
creator,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "edit-metadata", task.id)
|
||||
@@ -1331,6 +1528,7 @@ def v1_edit_metadata_route():
|
||||
# Image Crop — POST /api/v1/image/crop
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/image/crop", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_crop_image_route():
|
||||
@@ -1364,8 +1562,13 @@ def v1_crop_image_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = crop_image_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
left, top, right, bottom,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
left,
|
||||
top,
|
||||
right,
|
||||
bottom,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "image-crop", task.id)
|
||||
@@ -1376,6 +1579,7 @@ def v1_crop_image_route():
|
||||
# Image Rotate/Flip — POST /api/v1/image/rotate-flip
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/image/rotate-flip", methods=["POST"])
|
||||
@limiter.limit("10/minute")
|
||||
def v1_rotate_flip_image_route():
|
||||
@@ -1408,8 +1612,12 @@ def v1_rotate_flip_image_route():
|
||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||
file.save(input_path)
|
||||
task = rotate_flip_image_task.delay(
|
||||
input_path, task_id, original_filename,
|
||||
rotation, flip_horizontal, flip_vertical,
|
||||
input_path,
|
||||
task_id,
|
||||
original_filename,
|
||||
rotation,
|
||||
flip_horizontal,
|
||||
flip_vertical,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "image-rotate-flip", task.id)
|
||||
@@ -1420,6 +1628,7 @@ def v1_rotate_flip_image_route():
|
||||
# Barcode — POST /api/v1/barcode/generate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@v1_bp.route("/barcode/generate", methods=["POST"])
|
||||
@limiter.limit("20/minute")
|
||||
def v1_generate_barcode_route():
|
||||
@@ -1442,14 +1651,21 @@ def v1_generate_barcode_route():
|
||||
return jsonify({"error": "Barcode data is required."}), 400
|
||||
|
||||
if barcode_type not in SUPPORTED_BARCODE_TYPES:
|
||||
return jsonify({"error": f"Unsupported type. Supported: {', '.join(SUPPORTED_BARCODE_TYPES)}"}), 400
|
||||
return jsonify(
|
||||
{
|
||||
"error": f"Unsupported type. Supported: {', '.join(SUPPORTED_BARCODE_TYPES)}"
|
||||
}
|
||||
), 400
|
||||
|
||||
if output_format not in ("png", "svg"):
|
||||
output_format = "png"
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
task = generate_barcode_task.delay(
|
||||
data, barcode_type, task_id, output_format,
|
||||
data,
|
||||
barcode_type,
|
||||
task_id,
|
||||
output_format,
|
||||
**build_task_tracking_kwargs(actor),
|
||||
)
|
||||
record_accepted_usage(actor, "barcode", task.id)
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
"""PDF AI services — Chat, Summarize, Translate, Table Extract."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import requests
|
||||
|
||||
@@ -11,9 +16,84 @@ from app.services.openrouter_config_service import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"
|
||||
DEFAULT_DEEPL_TIMEOUT_SECONDS = 90
|
||||
MAX_TRANSLATION_CHUNK_CHARS = 3500
|
||||
TRANSLATION_RETRY_ATTEMPTS = 3
|
||||
TRANSLATION_RETRY_DELAY_SECONDS = 2
|
||||
|
||||
LANGUAGE_LABELS = {
|
||||
"auto": "Auto Detect",
|
||||
"en": "English",
|
||||
"ar": "Arabic",
|
||||
"fr": "French",
|
||||
"es": "Spanish",
|
||||
"de": "German",
|
||||
"zh": "Chinese",
|
||||
"ja": "Japanese",
|
||||
"ko": "Korean",
|
||||
"pt": "Portuguese",
|
||||
"ru": "Russian",
|
||||
"tr": "Turkish",
|
||||
"it": "Italian",
|
||||
}
|
||||
|
||||
DEEPL_LANGUAGE_CODES = {
|
||||
"ar": "AR",
|
||||
"de": "DE",
|
||||
"en": "EN",
|
||||
"es": "ES",
|
||||
"fr": "FR",
|
||||
"it": "IT",
|
||||
"ja": "JA",
|
||||
"ko": "KO",
|
||||
"pt": "PT-PT",
|
||||
"ru": "RU",
|
||||
"tr": "TR",
|
||||
"zh": "ZH",
|
||||
}
|
||||
|
||||
OCR_LANGUAGE_CODES = {
|
||||
"ar": "ara",
|
||||
"en": "eng",
|
||||
"fr": "fra",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DeepLSettings:
|
||||
api_key: str
|
||||
base_url: str
|
||||
timeout_seconds: int
|
||||
|
||||
|
||||
def _normalize_language_code(value: str | None, default: str = "") -> str:
|
||||
normalized = str(value or "").strip().lower()
|
||||
return normalized or default
|
||||
|
||||
|
||||
def _language_label(value: str | None) -> str:
|
||||
normalized = _normalize_language_code(value)
|
||||
return LANGUAGE_LABELS.get(normalized, normalized or "Unknown")
|
||||
|
||||
|
||||
def _get_deepl_settings() -> DeepLSettings:
|
||||
api_key = str(os.getenv("DEEPL_API_KEY", "")).strip()
|
||||
base_url = (
|
||||
str(os.getenv("DEEPL_API_URL", DEFAULT_DEEPL_API_URL)).strip()
|
||||
or DEFAULT_DEEPL_API_URL
|
||||
)
|
||||
timeout_seconds = int(
|
||||
os.getenv("DEEPL_TIMEOUT_SECONDS", DEFAULT_DEEPL_TIMEOUT_SECONDS)
|
||||
)
|
||||
return DeepLSettings(
|
||||
api_key=api_key, base_url=base_url, timeout_seconds=timeout_seconds
|
||||
)
|
||||
|
||||
|
||||
class PdfAiError(Exception):
|
||||
"""Custom exception for PDF AI service failures."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_message: str,
|
||||
@@ -26,6 +106,42 @@ class PdfAiError(Exception):
|
||||
self.detail = detail
|
||||
|
||||
|
||||
class RetryableTranslationError(PdfAiError):
|
||||
"""Error wrapper used for provider failures that should be retried."""
|
||||
|
||||
|
||||
def _translate_with_retry(action, provider_name: str) -> dict:
|
||||
last_error: PdfAiError | None = None
|
||||
|
||||
for attempt in range(1, TRANSLATION_RETRY_ATTEMPTS + 1):
|
||||
try:
|
||||
return action()
|
||||
except RetryableTranslationError as error:
|
||||
last_error = error
|
||||
logger.warning(
|
||||
"%s translation attempt %s/%s failed with retryable error %s",
|
||||
provider_name,
|
||||
attempt,
|
||||
TRANSLATION_RETRY_ATTEMPTS,
|
||||
error.error_code,
|
||||
)
|
||||
if attempt == TRANSLATION_RETRY_ATTEMPTS:
|
||||
break
|
||||
time.sleep(TRANSLATION_RETRY_DELAY_SECONDS * attempt)
|
||||
|
||||
if last_error:
|
||||
raise PdfAiError(
|
||||
last_error.user_message,
|
||||
error_code=last_error.error_code,
|
||||
detail=last_error.detail,
|
||||
)
|
||||
|
||||
raise PdfAiError(
|
||||
"Translation provider failed unexpectedly.",
|
||||
error_code="TRANSLATION_PROVIDER_FAILED",
|
||||
)
|
||||
|
||||
|
||||
def _estimate_tokens(text: str) -> int:
|
||||
"""Rough token estimate: ~4 chars per token for English."""
|
||||
return max(1, len(text) // 4)
|
||||
@@ -49,7 +165,30 @@ def _extract_text_from_pdf(input_path: str, max_pages: int = 50) -> str:
|
||||
text = page.extract_text() or ""
|
||||
if text.strip():
|
||||
texts.append(f"[Page {i + 1}]\n{text}")
|
||||
return "\n\n".join(texts)
|
||||
|
||||
extracted = "\n\n".join(texts)
|
||||
if extracted.strip():
|
||||
return extracted
|
||||
|
||||
# Fall back to OCR for scanned/image-only PDFs instead of failing fast.
|
||||
try:
|
||||
from app.services.ocr_service import ocr_pdf
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as handle:
|
||||
ocr_output_path = handle.name
|
||||
|
||||
try:
|
||||
data = ocr_pdf(input_path, ocr_output_path, lang="eng")
|
||||
ocr_text = str(data.get("text", "")).strip()
|
||||
if ocr_text:
|
||||
return ocr_text
|
||||
finally:
|
||||
if os.path.exists(ocr_output_path):
|
||||
os.unlink(ocr_output_path)
|
||||
except Exception as ocr_error:
|
||||
logger.warning("OCR fallback for PDF text extraction failed: %s", ocr_error)
|
||||
|
||||
return ""
|
||||
except PdfAiError:
|
||||
raise
|
||||
except Exception as e:
|
||||
@@ -70,14 +209,17 @@ def _call_openrouter(
|
||||
# Budget guard
|
||||
try:
|
||||
from app.services.ai_cost_service import check_ai_budget, AiBudgetExceededError
|
||||
|
||||
check_ai_budget()
|
||||
except AiBudgetExceededError:
|
||||
raise PdfAiError(
|
||||
"Monthly AI processing budget has been reached. Please try again next month.",
|
||||
error_code="AI_BUDGET_EXCEEDED",
|
||||
)
|
||||
except Exception:
|
||||
pass # Don't block if cost service unavailable
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as error:
|
||||
if error.__class__.__name__ == "AiBudgetExceededError":
|
||||
raise PdfAiError(
|
||||
"Monthly AI processing budget has been reached. Please try again next month.",
|
||||
error_code="AI_BUDGET_EXCEEDED",
|
||||
)
|
||||
pass
|
||||
|
||||
settings = get_openrouter_settings()
|
||||
|
||||
@@ -127,14 +269,14 @@ def _call_openrouter(
|
||||
|
||||
if status_code == 429:
|
||||
logger.warning("OpenRouter rate limit reached (429).")
|
||||
raise PdfAiError(
|
||||
raise RetryableTranslationError(
|
||||
"AI service is experiencing high demand. Please wait a moment and try again.",
|
||||
error_code="OPENROUTER_RATE_LIMIT",
|
||||
)
|
||||
|
||||
if status_code >= 500:
|
||||
logger.error("OpenRouter server error (%s).", status_code)
|
||||
raise PdfAiError(
|
||||
raise RetryableTranslationError(
|
||||
"AI service provider is experiencing issues. Please try again shortly.",
|
||||
error_code="OPENROUTER_SERVER_ERROR",
|
||||
)
|
||||
@@ -144,7 +286,11 @@ def _call_openrouter(
|
||||
|
||||
# Handle model-level errors returned inside a 200 response
|
||||
if data.get("error"):
|
||||
error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
|
||||
error_msg = (
|
||||
data["error"].get("message", "")
|
||||
if isinstance(data["error"], dict)
|
||||
else str(data["error"])
|
||||
)
|
||||
logger.error("OpenRouter returned an error payload: %s", error_msg)
|
||||
raise PdfAiError(
|
||||
"AI service encountered an issue. Please try again.",
|
||||
@@ -163,6 +309,7 @@ def _call_openrouter(
|
||||
# Log usage
|
||||
try:
|
||||
from app.services.ai_cost_service import log_ai_usage
|
||||
|
||||
usage = data.get("usage", {})
|
||||
log_ai_usage(
|
||||
tool=tool_name,
|
||||
@@ -178,13 +325,13 @@ def _call_openrouter(
|
||||
except PdfAiError:
|
||||
raise
|
||||
except requests.exceptions.Timeout:
|
||||
raise PdfAiError(
|
||||
raise RetryableTranslationError(
|
||||
"AI service timed out. Please try again.",
|
||||
error_code="OPENROUTER_TIMEOUT",
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.error("Cannot connect to OpenRouter API at %s", settings.base_url)
|
||||
raise PdfAiError(
|
||||
raise RetryableTranslationError(
|
||||
"AI service is unreachable. Please try again shortly.",
|
||||
error_code="OPENROUTER_CONNECTION_ERROR",
|
||||
)
|
||||
@@ -197,6 +344,218 @@ def _call_openrouter(
|
||||
)
|
||||
|
||||
|
||||
def _split_translation_chunks(
|
||||
text: str, max_chars: int = MAX_TRANSLATION_CHUNK_CHARS
|
||||
) -> list[str]:
|
||||
"""Split extracted PDF text into stable chunks while preserving page markers."""
|
||||
chunks: list[str] = []
|
||||
current: list[str] = []
|
||||
current_length = 0
|
||||
|
||||
for block in text.split("\n\n"):
|
||||
normalized = block.strip()
|
||||
if not normalized:
|
||||
continue
|
||||
|
||||
block_length = len(normalized) + 2
|
||||
if current and current_length + block_length > max_chars:
|
||||
chunks.append("\n\n".join(current))
|
||||
current = [normalized]
|
||||
current_length = block_length
|
||||
continue
|
||||
|
||||
current.append(normalized)
|
||||
current_length += block_length
|
||||
|
||||
if current:
|
||||
chunks.append("\n\n".join(current))
|
||||
|
||||
return chunks or [text]
|
||||
|
||||
|
||||
def _call_deepl_translate(
|
||||
chunk: str, target_language: str, source_language: str | None = None
|
||||
) -> dict:
|
||||
"""Translate a chunk with DeepL when premium credentials are configured."""
|
||||
settings = _get_deepl_settings()
|
||||
if not settings.api_key:
|
||||
raise PdfAiError(
|
||||
"DeepL is not configured.",
|
||||
error_code="DEEPL_NOT_CONFIGURED",
|
||||
)
|
||||
|
||||
target_code = DEEPL_LANGUAGE_CODES.get(_normalize_language_code(target_language))
|
||||
if not target_code:
|
||||
raise PdfAiError(
|
||||
f"Target language '{target_language}' is not supported by the premium translation provider.",
|
||||
error_code="DEEPL_UNSUPPORTED_TARGET_LANGUAGE",
|
||||
)
|
||||
|
||||
payload: dict[str, object] = {
|
||||
"text": [chunk],
|
||||
"target_lang": target_code,
|
||||
"preserve_formatting": True,
|
||||
"tag_handling": "xml",
|
||||
"split_sentences": "nonewlines",
|
||||
}
|
||||
|
||||
source_code = DEEPL_LANGUAGE_CODES.get(_normalize_language_code(source_language))
|
||||
if source_code:
|
||||
payload["source_lang"] = source_code
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
settings.base_url,
|
||||
headers={
|
||||
"Authorization": f"DeepL-Auth-Key {settings.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
timeout=settings.timeout_seconds,
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
raise RetryableTranslationError(
|
||||
"Premium translation service timed out. Retrying...",
|
||||
error_code="DEEPL_TIMEOUT",
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
raise RetryableTranslationError(
|
||||
"Premium translation service is temporarily unreachable. Retrying...",
|
||||
error_code="DEEPL_CONNECTION_ERROR",
|
||||
)
|
||||
except requests.exceptions.RequestException as error:
|
||||
raise PdfAiError(
|
||||
"Premium translation service is temporarily unavailable.",
|
||||
error_code="DEEPL_REQUEST_ERROR",
|
||||
detail=str(error),
|
||||
)
|
||||
|
||||
if response.status_code == 429:
|
||||
raise RetryableTranslationError(
|
||||
"Premium translation service is busy. Retrying...",
|
||||
error_code="DEEPL_RATE_LIMIT",
|
||||
)
|
||||
|
||||
if response.status_code >= 500:
|
||||
raise RetryableTranslationError(
|
||||
"Premium translation service is experiencing issues. Retrying...",
|
||||
error_code="DEEPL_SERVER_ERROR",
|
||||
)
|
||||
|
||||
if response.status_code in {403, 456}:
|
||||
raise PdfAiError(
|
||||
"Premium translation provider credits or permissions need attention.",
|
||||
error_code="DEEPL_CREDITS_OR_PERMISSIONS",
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
translations = data.get("translations") or []
|
||||
if not translations:
|
||||
raise PdfAiError(
|
||||
"Premium translation provider returned an empty response.",
|
||||
error_code="DEEPL_EMPTY_RESPONSE",
|
||||
)
|
||||
|
||||
first = translations[0]
|
||||
translated_text = str(first.get("text", "")).strip()
|
||||
if not translated_text:
|
||||
raise PdfAiError(
|
||||
"Premium translation provider returned an empty response.",
|
||||
error_code="DEEPL_EMPTY_TEXT",
|
||||
)
|
||||
|
||||
return {
|
||||
"translation": translated_text,
|
||||
"provider": "deepl",
|
||||
"detected_source_language": str(first.get("detected_source_language", ""))
|
||||
.strip()
|
||||
.lower(),
|
||||
}
|
||||
|
||||
|
||||
def _call_openrouter_translate(
|
||||
chunk: str, target_language: str, source_language: str | None = None
|
||||
) -> dict:
|
||||
source_hint = "auto-detect the source language"
|
||||
if source_language and _normalize_language_code(source_language) != "auto":
|
||||
source_hint = f"treat {_language_label(source_language)} as the source language"
|
||||
|
||||
system_prompt = (
|
||||
"You are a professional document translator. "
|
||||
f"Translate the provided PDF content into {_language_label(target_language)}. "
|
||||
f"Please {source_hint}. Preserve headings, lists, tables, and page markers. "
|
||||
"Return only the translated text."
|
||||
)
|
||||
translation = _call_openrouter(
|
||||
system_prompt,
|
||||
chunk,
|
||||
max_tokens=2200,
|
||||
tool_name="pdf_translate_fallback",
|
||||
)
|
||||
return {
|
||||
"translation": translation,
|
||||
"provider": "openrouter",
|
||||
"detected_source_language": _normalize_language_code(
|
||||
source_language, default=""
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _translate_document_text(
|
||||
text: str, target_language: str, source_language: str | None = None
|
||||
) -> dict:
|
||||
chunks = _split_translation_chunks(text)
|
||||
translations: list[str] = []
|
||||
detected_source_language = _normalize_language_code(source_language)
|
||||
if detected_source_language == "auto":
|
||||
detected_source_language = ""
|
||||
providers_used: list[str] = []
|
||||
|
||||
for chunk in chunks:
|
||||
chunk_result: dict | None = None
|
||||
|
||||
deepl_settings = _get_deepl_settings()
|
||||
if deepl_settings.api_key:
|
||||
try:
|
||||
chunk_result = _translate_with_retry(
|
||||
lambda: _call_deepl_translate(
|
||||
chunk, target_language, source_language
|
||||
),
|
||||
provider_name="DeepL",
|
||||
)
|
||||
except PdfAiError as deepl_error:
|
||||
logger.warning(
|
||||
"DeepL translation failed for chunk; falling back to OpenRouter. code=%s detail=%s",
|
||||
deepl_error.error_code,
|
||||
deepl_error.detail,
|
||||
)
|
||||
|
||||
if chunk_result is None:
|
||||
chunk_result = _translate_with_retry(
|
||||
lambda: _call_openrouter_translate(
|
||||
chunk, target_language, source_language
|
||||
),
|
||||
provider_name="OpenRouter",
|
||||
)
|
||||
|
||||
translations.append(str(chunk_result["translation"]).strip())
|
||||
providers_used.append(str(chunk_result["provider"]))
|
||||
if not detected_source_language and chunk_result.get(
|
||||
"detected_source_language"
|
||||
):
|
||||
detected_source_language = _normalize_language_code(
|
||||
chunk_result["detected_source_language"]
|
||||
)
|
||||
|
||||
return {
|
||||
"translation": "\n\n".join(part for part in translations if part),
|
||||
"provider": ", ".join(sorted(set(providers_used))),
|
||||
"detected_source_language": detected_source_language,
|
||||
"chunks_translated": len(translations),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Chat with PDF
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -212,11 +571,15 @@ def chat_with_pdf(input_path: str, question: str) -> dict:
|
||||
{"reply": "...", "pages_analyzed": int}
|
||||
"""
|
||||
if not question or not question.strip():
|
||||
raise PdfAiError("Please provide a question.", error_code="PDF_AI_INVALID_INPUT")
|
||||
raise PdfAiError(
|
||||
"Please provide a question.", error_code="PDF_AI_INVALID_INPUT"
|
||||
)
|
||||
|
||||
text = _extract_text_from_pdf(input_path)
|
||||
if not text.strip():
|
||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
||||
raise PdfAiError(
|
||||
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||
)
|
||||
|
||||
# Truncate to fit context window
|
||||
max_chars = 12000
|
||||
@@ -230,7 +593,9 @@ def chat_with_pdf(input_path: str, question: str) -> dict:
|
||||
)
|
||||
|
||||
user_msg = f"Document content:\n{truncated}\n\nQuestion: {question}"
|
||||
reply = _call_openrouter(system_prompt, user_msg, max_tokens=800, tool_name="pdf_chat")
|
||||
reply = _call_openrouter(
|
||||
system_prompt, user_msg, max_tokens=800, tool_name="pdf_chat"
|
||||
)
|
||||
|
||||
page_count = text.count("[Page ")
|
||||
return {"reply": reply, "pages_analyzed": page_count}
|
||||
@@ -252,7 +617,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
||||
"""
|
||||
text = _extract_text_from_pdf(input_path)
|
||||
if not text.strip():
|
||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
||||
raise PdfAiError(
|
||||
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||
)
|
||||
|
||||
length_instruction = {
|
||||
"short": "Provide a brief summary in 2-3 sentences.",
|
||||
@@ -270,7 +637,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
||||
)
|
||||
|
||||
user_msg = f"{length_instruction}\n\nDocument content:\n{truncated}"
|
||||
summary = _call_openrouter(system_prompt, user_msg, max_tokens=1000, tool_name="pdf_summarize")
|
||||
summary = _call_openrouter(
|
||||
system_prompt, user_msg, max_tokens=1000, tool_name="pdf_summarize"
|
||||
)
|
||||
|
||||
page_count = text.count("[Page ")
|
||||
return {"summary": summary, "pages_analyzed": page_count}
|
||||
@@ -279,7 +648,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Translate PDF
|
||||
# ---------------------------------------------------------------------------
|
||||
def translate_pdf(input_path: str, target_language: str) -> dict:
|
||||
def translate_pdf(
|
||||
input_path: str, target_language: str, source_language: str | None = None
|
||||
) -> dict:
|
||||
"""
|
||||
Translate the text content of a PDF to another language.
|
||||
|
||||
@@ -290,29 +661,46 @@ def translate_pdf(input_path: str, target_language: str) -> dict:
|
||||
Returns:
|
||||
{"translation": "...", "pages_analyzed": int, "target_language": str}
|
||||
"""
|
||||
if not target_language or not target_language.strip():
|
||||
raise PdfAiError("Please specify a target language.", error_code="PDF_AI_INVALID_INPUT")
|
||||
normalized_target_language = _normalize_language_code(target_language)
|
||||
normalized_source_language = _normalize_language_code(
|
||||
source_language, default="auto"
|
||||
)
|
||||
|
||||
if not normalized_target_language:
|
||||
raise PdfAiError(
|
||||
"Please specify a target language.", error_code="PDF_AI_INVALID_INPUT"
|
||||
)
|
||||
|
||||
if (
|
||||
normalized_target_language == normalized_source_language
|
||||
and normalized_source_language != "auto"
|
||||
):
|
||||
raise PdfAiError(
|
||||
"Please choose different source and target languages.",
|
||||
error_code="PDF_AI_INVALID_INPUT",
|
||||
)
|
||||
|
||||
text = _extract_text_from_pdf(input_path)
|
||||
if not text.strip():
|
||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
||||
raise PdfAiError(
|
||||
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||
)
|
||||
|
||||
max_chars = 10000
|
||||
truncated = text[:max_chars]
|
||||
|
||||
system_prompt = (
|
||||
f"You are a professional translator. Translate the following document "
|
||||
f"content into {target_language}. Preserve the original formatting and "
|
||||
f"structure as much as possible. Only output the translation, nothing else."
|
||||
translated = _translate_document_text(
|
||||
text,
|
||||
target_language=normalized_target_language,
|
||||
source_language=normalized_source_language,
|
||||
)
|
||||
|
||||
translation = _call_openrouter(system_prompt, truncated, max_tokens=2000, tool_name="pdf_translate")
|
||||
|
||||
page_count = text.count("[Page ")
|
||||
return {
|
||||
"translation": translation,
|
||||
"translation": translated["translation"],
|
||||
"pages_analyzed": page_count,
|
||||
"target_language": target_language,
|
||||
"target_language": normalized_target_language,
|
||||
"source_language": normalized_source_language,
|
||||
"detected_source_language": translated["detected_source_language"],
|
||||
"provider": translated["provider"],
|
||||
"chunks_translated": translated["chunks_translated"],
|
||||
}
|
||||
|
||||
|
||||
@@ -361,12 +749,14 @@ def extract_tables(input_path: str) -> dict:
|
||||
cells.append(str(val))
|
||||
rows.append(cells)
|
||||
|
||||
result_tables.append({
|
||||
"page": page_num,
|
||||
"table_index": table_index,
|
||||
"headers": headers,
|
||||
"rows": rows,
|
||||
})
|
||||
result_tables.append(
|
||||
{
|
||||
"page": page_num,
|
||||
"table_index": table_index,
|
||||
"headers": headers,
|
||||
"rows": rows,
|
||||
}
|
||||
)
|
||||
table_index += 1
|
||||
|
||||
if not result_tables:
|
||||
@@ -385,7 +775,9 @@ def extract_tables(input_path: str) -> dict:
|
||||
except PdfAiError:
|
||||
raise
|
||||
except ImportError:
|
||||
raise PdfAiError("tabula-py library is not installed.", error_code="TABULA_NOT_INSTALLED")
|
||||
raise PdfAiError(
|
||||
"tabula-py library is not installed.", error_code="TABULA_NOT_INSTALLED"
|
||||
)
|
||||
except Exception as e:
|
||||
raise PdfAiError(
|
||||
"Failed to extract tables.",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
"""Celery tasks for PDF AI tools — Chat, Summarize, Translate, Table Extract."""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
@@ -28,7 +29,8 @@ def _build_pdf_ai_error_payload(task_id: str, error: PdfAiError, tool: str) -> d
|
||||
payload = {
|
||||
"status": "failed",
|
||||
"error_code": getattr(error, "error_code", "PDF_AI_ERROR"),
|
||||
"user_message": getattr(error, "user_message", str(error)) or "AI processing failed.",
|
||||
"user_message": getattr(error, "user_message", str(error))
|
||||
or "AI processing failed.",
|
||||
"task_id": task_id,
|
||||
}
|
||||
|
||||
@@ -80,9 +82,12 @@ def chat_with_pdf_task(
|
||||
|
||||
logger.info(f"Task {task_id}: Chat with PDF completed")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="chat-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="chat-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -91,9 +96,12 @@ def chat_with_pdf_task(
|
||||
except PdfAiError as e:
|
||||
result = _build_pdf_ai_error_payload(task_id, e, "chat-pdf")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="chat-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="chat-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -103,9 +111,12 @@ def chat_with_pdf_task(
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="chat-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="chat-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -140,9 +151,12 @@ def summarize_pdf_task(
|
||||
|
||||
logger.info(f"Task {task_id}: PDF summarize completed")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="summarize-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="summarize-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -151,9 +165,12 @@ def summarize_pdf_task(
|
||||
except PdfAiError as e:
|
||||
result = _build_pdf_ai_error_payload(task_id, e, "summarize-pdf")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="summarize-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="summarize-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -163,9 +180,12 @@ def summarize_pdf_task(
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="summarize-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="summarize-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -182,28 +202,41 @@ def translate_pdf_task(
|
||||
task_id: str,
|
||||
original_filename: str,
|
||||
target_language: str,
|
||||
source_language: str | None = None,
|
||||
user_id: int | None = None,
|
||||
usage_source: str = "web",
|
||||
api_key_id: int | None = None,
|
||||
):
|
||||
"""Translate a PDF document to another language."""
|
||||
try:
|
||||
self.update_state(state="PROCESSING", meta={"step": "Translating document..."})
|
||||
self.update_state(
|
||||
state="PROCESSING",
|
||||
meta={"step": "Translating document with provider fallback..."},
|
||||
)
|
||||
|
||||
data = translate_pdf(input_path, target_language)
|
||||
data = translate_pdf(
|
||||
input_path, target_language, source_language=source_language
|
||||
)
|
||||
|
||||
result = {
|
||||
"status": "completed",
|
||||
"translation": data["translation"],
|
||||
"pages_analyzed": data["pages_analyzed"],
|
||||
"target_language": data["target_language"],
|
||||
"source_language": data.get("source_language"),
|
||||
"detected_source_language": data.get("detected_source_language"),
|
||||
"provider": data.get("provider"),
|
||||
"chunks_translated": data.get("chunks_translated"),
|
||||
}
|
||||
|
||||
logger.info(f"Task {task_id}: PDF translate completed")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="translate-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="translate-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -212,9 +245,12 @@ def translate_pdf_task(
|
||||
except PdfAiError as e:
|
||||
result = _build_pdf_ai_error_payload(task_id, e, "translate-pdf")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="translate-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="translate-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -224,9 +260,12 @@ def translate_pdf_task(
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="translate-pdf",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="translate-pdf",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -260,9 +299,12 @@ def extract_tables_task(
|
||||
|
||||
logger.info(f"Task {task_id}: Table extraction completed")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="extract-tables",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="extract-tables",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -271,9 +313,12 @@ def extract_tables_task(
|
||||
except PdfAiError as e:
|
||||
result = _build_pdf_ai_error_payload(task_id, e, "extract-tables")
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="extract-tables",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="extract-tables",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
@@ -283,9 +328,12 @@ def extract_tables_task(
|
||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||
finalize_task_tracking(
|
||||
user_id=user_id, tool="extract-tables",
|
||||
original_filename=original_filename, result=result,
|
||||
usage_source=usage_source, api_key_id=api_key_id,
|
||||
user_id=user_id,
|
||||
tool="extract-tables",
|
||||
original_filename=original_filename,
|
||||
result=result,
|
||||
usage_source=usage_source,
|
||||
api_key_id=api_key_id,
|
||||
celery_task_id=self.request.id,
|
||||
)
|
||||
_cleanup(task_id)
|
||||
|
||||
@@ -26,20 +26,21 @@ def _env_or_default(name: str, default: str) -> str:
|
||||
|
||||
class BaseConfig:
|
||||
"""Base configuration."""
|
||||
|
||||
SECRET_KEY = os.getenv("SECRET_KEY", "change-me-in-production")
|
||||
INTERNAL_ADMIN_SECRET = os.getenv("INTERNAL_ADMIN_SECRET", "")
|
||||
INTERNAL_ADMIN_EMAILS = _parse_csv_env("INTERNAL_ADMIN_EMAILS")
|
||||
|
||||
# File upload settings
|
||||
MAX_CONTENT_LENGTH = int(
|
||||
os.getenv("ABSOLUTE_MAX_CONTENT_LENGTH_MB", 100)
|
||||
) * 1024 * 1024
|
||||
MAX_CONTENT_LENGTH = (
|
||||
int(os.getenv("ABSOLUTE_MAX_CONTENT_LENGTH_MB", 100)) * 1024 * 1024
|
||||
)
|
||||
UPLOAD_FOLDER = _env_or_default("UPLOAD_FOLDER", "/tmp/uploads")
|
||||
OUTPUT_FOLDER = _env_or_default("OUTPUT_FOLDER", "/tmp/outputs")
|
||||
FILE_EXPIRY_SECONDS = int(os.getenv("FILE_EXPIRY_SECONDS", 1800))
|
||||
STORAGE_ALLOW_LOCAL_FALLBACK = os.getenv(
|
||||
"STORAGE_ALLOW_LOCAL_FALLBACK", "true"
|
||||
).lower() == "true"
|
||||
STORAGE_ALLOW_LOCAL_FALLBACK = (
|
||||
os.getenv("STORAGE_ALLOW_LOCAL_FALLBACK", "true").lower() == "true"
|
||||
)
|
||||
DATABASE_PATH = _env_or_default(
|
||||
"DATABASE_PATH", os.path.join(BASE_DIR, "data", "dociva.db")
|
||||
)
|
||||
@@ -69,31 +70,29 @@ class BaseConfig:
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
],
|
||||
"ppt": ["application/vnd.ms-powerpoint"],
|
||||
"xlsx": [
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
],
|
||||
"xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
|
||||
"xls": ["application/vnd.ms-excel"],
|
||||
}
|
||||
|
||||
# File size limits per type (bytes)
|
||||
FILE_SIZE_LIMITS = {
|
||||
"pdf": 20 * 1024 * 1024, # 20MB
|
||||
"doc": 15 * 1024 * 1024, # 15MB
|
||||
"docx": 15 * 1024 * 1024, # 15MB
|
||||
"html": 10 * 1024 * 1024, # 10MB
|
||||
"htm": 10 * 1024 * 1024, # 10MB
|
||||
"png": 10 * 1024 * 1024, # 10MB
|
||||
"jpg": 10 * 1024 * 1024, # 10MB
|
||||
"jpeg": 10 * 1024 * 1024, # 10MB
|
||||
"webp": 10 * 1024 * 1024, # 10MB
|
||||
"tiff": 15 * 1024 * 1024, # 15MB
|
||||
"bmp": 15 * 1024 * 1024, # 15MB
|
||||
"mp4": 50 * 1024 * 1024, # 50MB
|
||||
"webm": 50 * 1024 * 1024, # 50MB
|
||||
"pptx": 20 * 1024 * 1024, # 20MB
|
||||
"ppt": 20 * 1024 * 1024, # 20MB
|
||||
"xlsx": 15 * 1024 * 1024, # 15MB
|
||||
"xls": 15 * 1024 * 1024, # 15MB
|
||||
"pdf": 20 * 1024 * 1024, # 20MB
|
||||
"doc": 15 * 1024 * 1024, # 15MB
|
||||
"docx": 15 * 1024 * 1024, # 15MB
|
||||
"html": 10 * 1024 * 1024, # 10MB
|
||||
"htm": 10 * 1024 * 1024, # 10MB
|
||||
"png": 10 * 1024 * 1024, # 10MB
|
||||
"jpg": 10 * 1024 * 1024, # 10MB
|
||||
"jpeg": 10 * 1024 * 1024, # 10MB
|
||||
"webp": 10 * 1024 * 1024, # 10MB
|
||||
"tiff": 15 * 1024 * 1024, # 15MB
|
||||
"bmp": 15 * 1024 * 1024, # 15MB
|
||||
"mp4": 50 * 1024 * 1024, # 50MB
|
||||
"webm": 50 * 1024 * 1024, # 50MB
|
||||
"pptx": 20 * 1024 * 1024, # 20MB
|
||||
"ppt": 20 * 1024 * 1024, # 20MB
|
||||
"xlsx": 15 * 1024 * 1024, # 15MB
|
||||
"xls": 15 * 1024 * 1024, # 15MB
|
||||
}
|
||||
|
||||
# Redis
|
||||
@@ -118,11 +117,20 @@ class BaseConfig:
|
||||
|
||||
# OpenRouter AI
|
||||
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
||||
OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "nvidia/nemotron-3-super-120b-a12b:free")
|
||||
OPENROUTER_MODEL = os.getenv(
|
||||
"OPENROUTER_MODEL", "nvidia/nemotron-3-super-120b-a12b:free"
|
||||
)
|
||||
OPENROUTER_BASE_URL = os.getenv(
|
||||
"OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions"
|
||||
)
|
||||
|
||||
# Premium translation provider (recommended for Translate PDF)
|
||||
DEEPL_API_KEY = os.getenv("DEEPL_API_KEY", "")
|
||||
DEEPL_API_URL = os.getenv(
|
||||
"DEEPL_API_URL", "https://api-free.deepl.com/v2/translate"
|
||||
)
|
||||
DEEPL_TIMEOUT_SECONDS = int(os.getenv("DEEPL_TIMEOUT_SECONDS", 90))
|
||||
|
||||
# SMTP (for password reset emails)
|
||||
SMTP_HOST = os.getenv("SMTP_HOST", "")
|
||||
SMTP_PORT = int(os.getenv("SMTP_PORT", 587))
|
||||
@@ -156,12 +164,14 @@ class BaseConfig:
|
||||
|
||||
class DevelopmentConfig(BaseConfig):
|
||||
"""Development configuration."""
|
||||
|
||||
DEBUG = True
|
||||
TESTING = False
|
||||
|
||||
|
||||
class ProductionConfig(BaseConfig):
|
||||
"""Production configuration."""
|
||||
|
||||
DEBUG = False
|
||||
TESTING = False
|
||||
SESSION_COOKIE_SECURE = True
|
||||
@@ -172,6 +182,7 @@ class ProductionConfig(BaseConfig):
|
||||
|
||||
class TestingConfig(BaseConfig):
|
||||
"""Testing configuration."""
|
||||
|
||||
DEBUG = True
|
||||
TESTING = True
|
||||
UPLOAD_FOLDER = "/tmp/test_uploads"
|
||||
|
||||
93
backend/tests/test_pdf_translate_service.py
Normal file
93
backend/tests/test_pdf_translate_service.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Tests for the resilient PDF translation workflow."""
|
||||
|
||||
from app.services.pdf_ai_service import DeepLSettings, PdfAiError, translate_pdf
|
||||
|
||||
|
||||
def test_translate_pdf_prefers_premium_provider(monkeypatch):
|
||||
"""Should use the premium provider when configured and available."""
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||
lambda _path: "[Page 1]\nHello world\n\n[Page 2]\nSecond page",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._get_deepl_settings",
|
||||
lambda: DeepLSettings(
|
||||
api_key="key",
|
||||
base_url="https://api-free.deepl.com/v2/translate",
|
||||
timeout_seconds=90,
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._translate_with_retry",
|
||||
lambda action, provider_name: action(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._call_deepl_translate",
|
||||
lambda chunk, target_language, source_language=None: {
|
||||
"translation": f"translated::{chunk}",
|
||||
"provider": "deepl",
|
||||
"detected_source_language": "en",
|
||||
},
|
||||
)
|
||||
|
||||
result = translate_pdf("/tmp/demo.pdf", "fr", source_language="en")
|
||||
|
||||
assert result["provider"] == "deepl"
|
||||
assert result["target_language"] == "fr"
|
||||
assert result["detected_source_language"] == "en"
|
||||
assert "translated::" in result["translation"]
|
||||
|
||||
|
||||
def test_translate_pdf_falls_back_when_premium_provider_fails(monkeypatch):
|
||||
"""Should fall back to OpenRouter if the premium provider fails."""
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||
lambda _path: "[Page 1]\nHello world",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._get_deepl_settings",
|
||||
lambda: DeepLSettings(
|
||||
api_key="key",
|
||||
base_url="https://api-free.deepl.com/v2/translate",
|
||||
timeout_seconds=90,
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._translate_with_retry",
|
||||
lambda action, provider_name: action(),
|
||||
)
|
||||
|
||||
def fail_deepl(*_args, **_kwargs):
|
||||
raise PdfAiError("DeepL unavailable", error_code="DEEPL_SERVER_ERROR")
|
||||
|
||||
monkeypatch.setattr("app.services.pdf_ai_service._call_deepl_translate", fail_deepl)
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._call_openrouter_translate",
|
||||
lambda chunk, target_language, source_language=None: {
|
||||
"translation": f"fallback::{chunk}",
|
||||
"provider": "openrouter",
|
||||
"detected_source_language": "en",
|
||||
},
|
||||
)
|
||||
|
||||
result = translate_pdf("/tmp/demo.pdf", "de", source_language="auto")
|
||||
|
||||
assert result["provider"] == "openrouter"
|
||||
assert result["detected_source_language"] == "en"
|
||||
assert result["translation"].startswith("fallback::")
|
||||
|
||||
|
||||
def test_translate_pdf_rejects_identical_languages(monkeypatch):
|
||||
"""Should reject no-op translation requests."""
|
||||
monkeypatch.setattr(
|
||||
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||
lambda _path: "[Page 1]\nHello world",
|
||||
)
|
||||
|
||||
try:
|
||||
translate_pdf("/tmp/demo.pdf", "fr", source_language="fr")
|
||||
except PdfAiError as error:
|
||||
assert error.error_code == "PDF_AI_INVALID_INPUT"
|
||||
assert "different source and target languages" in error.user_message
|
||||
else:
|
||||
raise AssertionError("Expected identical language validation to fail")
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,31 +2,31 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://dociva.io/blog/how-to-compress-pdf-online</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/blog/convert-images-without-losing-quality</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/blog/ocr-extract-text-from-images</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/blog/merge-split-pdf-files</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/blog/ai-chat-with-pdf-documents</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,55 +2,55 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://dociva.io/</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
<priority>1.0</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/about</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.4</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/contact</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.4</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/privacy</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>yearly</changefreq>
|
||||
<priority>0.3</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/terms</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>yearly</changefreq>
|
||||
<priority>0.3</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/pricing</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/blog</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/developers</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.5</priority>
|
||||
</url>
|
||||
|
||||
@@ -2,265 +2,265 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-to-word</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.9</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/word-to-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.9</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/compress-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.9</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/merge-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.9</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/split-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/rotate-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-to-images</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/images-to-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/watermark-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/protect-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/unlock-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/page-numbers</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-editor</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-flowchart</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-to-excel</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/remove-watermark-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/reorder-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/extract-pages</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/image-converter</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/image-resize</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/compress-image</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/ocr</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/remove-background</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/image-to-svg</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/html-to-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/chat-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/summarize-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/translate-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/extract-tables</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/qr-code</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/video-to-gif</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/word-counter</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/text-cleaner</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-to-pptx</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/excel-to-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pptx-to-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/sign-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/crop-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/flatten-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/repair-pdf</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/pdf-metadata</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.6</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/image-crop</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/image-rotate-flip</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://dociva.io/tools/barcode-generator</loc>
|
||||
<lastmod>2026-03-29</lastmod>
|
||||
<lastmod>2026-03-30</lastmod>
|
||||
<changefreq>weekly</changefreq>
|
||||
<priority>0.7</priority>
|
||||
</url>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { Helmet } from 'react-helmet-async';
|
||||
import { Languages } from 'lucide-react';
|
||||
import { Languages, ShieldCheck, Sparkles } from 'lucide-react';
|
||||
import FileUploader from '@/components/shared/FileUploader';
|
||||
import ProgressBar from '@/components/shared/ProgressBar';
|
||||
import AdSlot from '@/components/layout/AdSlot';
|
||||
@@ -26,11 +26,22 @@ const LANGUAGES = [
|
||||
{ value: 'it', label: 'Italiano' },
|
||||
];
|
||||
|
||||
const getLanguageLabel = (value: string) => {
|
||||
if (!value || value === 'auto') {
|
||||
return null;
|
||||
}
|
||||
|
||||
return LANGUAGES.find((language) => language.value === value)?.label ?? value;
|
||||
};
|
||||
|
||||
export default function TranslatePdf() {
|
||||
const { t } = useTranslation();
|
||||
const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload');
|
||||
const [sourceLang, setSourceLang] = useState('auto');
|
||||
const [targetLang, setTargetLang] = useState('en');
|
||||
const [translation, setTranslation] = useState('');
|
||||
const [provider, setProvider] = useState('');
|
||||
const [detectedSourceLanguage, setDetectedSourceLanguage] = useState('');
|
||||
|
||||
const {
|
||||
file, uploadProgress, isUploading, taskId,
|
||||
@@ -39,7 +50,7 @@ export default function TranslatePdf() {
|
||||
endpoint: '/pdf-ai/translate',
|
||||
maxSizeMB: 20,
|
||||
acceptedTypes: ['pdf'],
|
||||
extraData: { target_language: targetLang },
|
||||
extraData: { target_language: targetLang, source_language: sourceLang },
|
||||
});
|
||||
|
||||
const { status, result, error: taskError } = useTaskPolling({
|
||||
@@ -47,6 +58,8 @@ export default function TranslatePdf() {
|
||||
onComplete: (r) => {
|
||||
setPhase('done');
|
||||
setTranslation(r.translation || '');
|
||||
setProvider(r.provider || '');
|
||||
setDetectedSourceLanguage(r.detected_source_language || '');
|
||||
dispatchRatingPrompt('translate-pdf');
|
||||
},
|
||||
onError: () => setPhase('done'),
|
||||
@@ -63,7 +76,17 @@ export default function TranslatePdf() {
|
||||
if (id) setPhase('processing');
|
||||
};
|
||||
|
||||
const handleReset = () => { reset(); setPhase('upload'); setTargetLang('en'); setTranslation(''); };
|
||||
const handleReset = () => {
|
||||
reset();
|
||||
setPhase('upload');
|
||||
setSourceLang('auto');
|
||||
setTargetLang('en');
|
||||
setTranslation('');
|
||||
setProvider('');
|
||||
setDetectedSourceLanguage('');
|
||||
};
|
||||
|
||||
const resolvedDetectedLanguage = getLanguageLabel(detectedSourceLanguage) || getLanguageLabel(sourceLang);
|
||||
|
||||
const schema = generateToolSchema({
|
||||
name: t('tools.translatePdf.title'),
|
||||
@@ -103,15 +126,44 @@ export default function TranslatePdf() {
|
||||
{file && !isUploading && (
|
||||
<>
|
||||
<div className="rounded-2xl bg-white p-5 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
||||
{t('tools.translatePdf.targetLang')}
|
||||
</label>
|
||||
<select value={targetLang} onChange={(e) => setTargetLang(e.target.value)}
|
||||
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
||||
{LANGUAGES.map((lang) => (
|
||||
<option key={lang.value} value={lang.value}>{lang.label}</option>
|
||||
))}
|
||||
</select>
|
||||
<div className="mb-4 flex items-start gap-3 rounded-xl bg-slate-50 p-4 dark:bg-slate-900/60">
|
||||
<ShieldCheck className="mt-0.5 h-5 w-5 text-emerald-600 dark:text-emerald-400" />
|
||||
<div>
|
||||
<p className="text-sm font-semibold text-slate-900 dark:text-slate-100">
|
||||
{t('tools.translatePdf.engineTitle')}
|
||||
</p>
|
||||
<p className="mt-1 text-sm text-slate-600 dark:text-slate-400">
|
||||
{t('tools.translatePdf.engineDescription')}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-4 md:grid-cols-2">
|
||||
<div>
|
||||
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
||||
{t('tools.translatePdf.sourceLang')}
|
||||
</label>
|
||||
<select value={sourceLang} onChange={(e) => setSourceLang(e.target.value)}
|
||||
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
||||
<option value="auto">{t('tools.translatePdf.autoDetect')}</option>
|
||||
{LANGUAGES.map((lang) => (
|
||||
<option key={`source-${lang.value}`} value={lang.value}>{lang.label}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
||||
{t('tools.translatePdf.targetLang')}
|
||||
</label>
|
||||
<select value={targetLang} onChange={(e) => setTargetLang(e.target.value)}
|
||||
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
||||
{LANGUAGES.map((lang) => (
|
||||
<option key={lang.value} value={lang.value}>{lang.label}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button onClick={handleUpload} className="btn-primary w-full">
|
||||
{t('tools.translatePdf.shortDesc')}
|
||||
@@ -122,11 +174,39 @@ export default function TranslatePdf() {
|
||||
)}
|
||||
|
||||
{phase === 'processing' && !result && (
|
||||
<ProgressBar state={status?.state || 'PENDING'} message={status?.progress} />
|
||||
<div className="space-y-4">
|
||||
<ProgressBar state={status?.state || 'PENDING'} message={status?.progress} />
|
||||
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||
<div className="flex items-start gap-3">
|
||||
<Sparkles className="mt-0.5 h-5 w-5 text-purple-600 dark:text-purple-400" />
|
||||
<p className="text-sm text-slate-600 dark:text-slate-400">
|
||||
{t('tools.translatePdf.processingHint')}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{phase === 'done' && translation && (
|
||||
<div className="space-y-4">
|
||||
<div className="grid gap-3 sm:grid-cols-2">
|
||||
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||
<p className="text-xs font-semibold uppercase tracking-wide text-slate-500 dark:text-slate-400">
|
||||
{t('tools.translatePdf.sourceDetected')}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-medium text-slate-900 dark:text-slate-100">
|
||||
{resolvedDetectedLanguage || t('tools.translatePdf.autoDetect')}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||
<p className="text-xs font-semibold uppercase tracking-wide text-slate-500 dark:text-slate-400">
|
||||
{t('tools.translatePdf.translationEngine')}
|
||||
</p>
|
||||
<p className="mt-1 text-sm font-medium text-slate-900 dark:text-slate-100">
|
||||
{provider || 'auto'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="rounded-2xl bg-white p-6 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||
<h3 className="mb-3 text-sm font-semibold text-slate-700 dark:text-slate-300">
|
||||
{t('tools.translatePdf.resultTitle')}
|
||||
|
||||
@@ -827,9 +827,16 @@
|
||||
},
|
||||
"translatePdf": {
|
||||
"title": "ترجمة PDF",
|
||||
"description": "ترجم محتوى مستند PDF إلى أي لغة باستخدام الذكاء الاصطناعي.",
|
||||
"description": "ترجم ملفات PDF عبر مسار ترجمة احترافي مع fallback تلقائي وتعامل أفضل مع الملفات الممسوحة ضوئياً.",
|
||||
"shortDesc": "ترجمة PDF",
|
||||
"sourceLang": "لغة المصدر",
|
||||
"targetLang": "اللغة المستهدفة",
|
||||
"autoDetect": "اكتشاف تلقائي",
|
||||
"engineTitle": "ترجمة مستندات بجاهزية إنتاجية",
|
||||
"engineDescription": "يتم إرسال الملف أولاً إلى مزود ترجمة احترافي، ثم يتم التحويل تلقائياً إلى مسار AI فقط عند الحاجة. هذا يقلل مشاكل الضغط ويحسن ثبات النتيجة.",
|
||||
"processingHint": "قد تتم ترجمة المستندات الكبيرة على عدة أجزاء مع retries وfallback بين المزودات. اترك الصفحة مفتوحة حتى يكتمل الطلب.",
|
||||
"sourceDetected": "لغة المصدر المكتشفة",
|
||||
"translationEngine": "محرك الترجمة",
|
||||
"resultTitle": "الترجمة"
|
||||
},
|
||||
"tableExtractor": {
|
||||
|
||||
@@ -827,9 +827,16 @@
|
||||
},
|
||||
"translatePdf": {
|
||||
"title": "Translate PDF",
|
||||
"description": "Translate your PDF document content to any language using AI.",
|
||||
"description": "Translate PDF documents with a premium translation pipeline, automatic fallback, and better handling for scanned files.",
|
||||
"shortDesc": "Translate PDF",
|
||||
"sourceLang": "Source Language",
|
||||
"targetLang": "Target Language",
|
||||
"autoDetect": "Auto detect",
|
||||
"engineTitle": "Production-grade document translation",
|
||||
"engineDescription": "Your file is translated with a premium translation provider first, then automatically falls back to AI only if needed. This reduces high-demand failures and improves consistency.",
|
||||
"processingHint": "Large documents may be translated in multiple chunks with retries and provider fallback. Keep this page open until the job completes.",
|
||||
"sourceDetected": "Detected source",
|
||||
"translationEngine": "Translation engine",
|
||||
"resultTitle": "Translation"
|
||||
},
|
||||
"tableExtractor": {
|
||||
|
||||
@@ -827,9 +827,16 @@
|
||||
},
|
||||
"translatePdf": {
|
||||
"title": "Traduire un PDF",
|
||||
"description": "Traduisez le contenu de votre document PDF dans n'importe quelle langue grâce à l'IA.",
|
||||
"description": "Traduisez vos PDF avec un pipeline premium, un fallback automatique et une meilleure prise en charge des fichiers scannés.",
|
||||
"shortDesc": "Traduire le PDF",
|
||||
"sourceLang": "Langue source",
|
||||
"targetLang": "Langue cible",
|
||||
"autoDetect": "Détection automatique",
|
||||
"engineTitle": "Traduction documentaire de niveau production",
|
||||
"engineDescription": "Votre fichier passe d'abord par un fournisseur de traduction premium, puis bascule automatiquement vers l'IA seulement si nécessaire. Cela réduit les erreurs de forte demande et améliore la stabilité.",
|
||||
"processingHint": "Les documents volumineux peuvent être traduits en plusieurs segments avec retries et fallback entre fournisseurs. Laissez cette page ouverte jusqu'à la fin du traitement.",
|
||||
"sourceDetected": "Source détectée",
|
||||
"translationEngine": "Moteur de traduction",
|
||||
"resultTitle": "Traduction"
|
||||
},
|
||||
"tableExtractor": {
|
||||
|
||||
@@ -237,6 +237,10 @@ export interface TaskResult {
|
||||
summary?: string;
|
||||
translation?: string;
|
||||
target_language?: string;
|
||||
source_language?: string;
|
||||
detected_source_language?: string;
|
||||
provider?: string;
|
||||
chunks_translated?: number;
|
||||
pages_analyzed?: number;
|
||||
// Table extraction fields
|
||||
tables?: Array<{ page: number; table_index: number; headers: string[]; rows: string[][] }>;
|
||||
|
||||
Reference in New Issue
Block a user