feat: harden PDF translation workflow
This commit is contained in:
@@ -20,6 +20,11 @@ OPENROUTER_API_KEY=
|
|||||||
OPENROUTER_MODEL=nvidia/nemotron-3-super-120b-a12b:free
|
OPENROUTER_MODEL=nvidia/nemotron-3-super-120b-a12b:free
|
||||||
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
|
OPENROUTER_BASE_URL=https://openrouter.ai/api/v1/chat/completions
|
||||||
|
|
||||||
|
# Premium document translation (recommended for Translate PDF)
|
||||||
|
DEEPL_API_KEY=
|
||||||
|
DEEPL_API_URL=https://api-free.deepl.com/v2/translate
|
||||||
|
DEEPL_TIMEOUT_SECONDS=90
|
||||||
|
|
||||||
# AWS S3
|
# AWS S3
|
||||||
AWS_ACCESS_KEY_ID=
|
AWS_ACCESS_KEY_ID=
|
||||||
AWS_SECRET_ACCESS_KEY=
|
AWS_SECRET_ACCESS_KEY=
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
"""PDF AI tool routes — Chat, Summarize, Translate, Table Extract."""
|
"""PDF AI tool routes — Chat, Summarize, Translate, Table Extract."""
|
||||||
|
|
||||||
from flask import Blueprint, request, jsonify
|
from flask import Blueprint, request, jsonify
|
||||||
|
|
||||||
from app.extensions import limiter
|
from app.extensions import limiter
|
||||||
@@ -70,10 +71,12 @@ def chat_pdf_route():
|
|||||||
)
|
)
|
||||||
record_accepted_usage(actor, "chat-pdf", task.id)
|
record_accepted_usage(actor, "chat-pdf", task.id)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify(
|
||||||
"task_id": task.id,
|
{
|
||||||
"message": "Processing your question. Poll /api/tasks/{task_id}/status for progress.",
|
"task_id": task.id,
|
||||||
}), 202
|
"message": "Processing your question. Poll /api/tasks/{task_id}/status for progress.",
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -124,10 +127,12 @@ def summarize_pdf_route():
|
|||||||
)
|
)
|
||||||
record_accepted_usage(actor, "summarize-pdf", task.id)
|
record_accepted_usage(actor, "summarize-pdf", task.id)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify(
|
||||||
"task_id": task.id,
|
{
|
||||||
"message": "Summarizing document. Poll /api/tasks/{task_id}/status for progress.",
|
"task_id": task.id,
|
||||||
}), 202
|
"message": "Summarizing document. Poll /api/tasks/{task_id}/status for progress.",
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -149,6 +154,7 @@ def translate_pdf_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
target_language = request.form.get("target_language", "").strip()
|
target_language = request.form.get("target_language", "").strip()
|
||||||
|
source_language = request.form.get("source_language", "auto").strip()
|
||||||
|
|
||||||
if not target_language:
|
if not target_language:
|
||||||
return jsonify({"error": "No target language specified."}), 400
|
return jsonify({"error": "No target language specified."}), 400
|
||||||
@@ -174,14 +180,17 @@ def translate_pdf_route():
|
|||||||
task_id,
|
task_id,
|
||||||
original_filename,
|
original_filename,
|
||||||
target_language,
|
target_language,
|
||||||
|
source_language,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "translate-pdf", task.id)
|
record_accepted_usage(actor, "translate-pdf", task.id)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify(
|
||||||
"task_id": task.id,
|
{
|
||||||
"message": "Translating document. Poll /api/tasks/{task_id}/status for progress.",
|
"task_id": task.id,
|
||||||
}), 202
|
"message": "Translating document. Poll /api/tasks/{task_id}/status for progress.",
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -226,7 +235,9 @@ def extract_tables_route():
|
|||||||
)
|
)
|
||||||
record_accepted_usage(actor, "extract-tables", task.id)
|
record_accepted_usage(actor, "extract-tables", task.id)
|
||||||
|
|
||||||
return jsonify({
|
return jsonify(
|
||||||
"task_id": task.id,
|
{
|
||||||
"message": "Extracting tables. Poll /api/tasks/{task_id}/status for progress.",
|
"task_id": task.id,
|
||||||
}), 202
|
"message": "Extracting tables. Poll /api/tasks/{task_id}/status for progress.",
|
||||||
|
}
|
||||||
|
), 202
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
"""B2B API v1 tool routes — authenticated via X-API-Key, Pro plan only."""
|
"""B2B API v1 tool routes — authenticated via X-API-Key, Pro plan only."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
import logging
|
import logging
|
||||||
@@ -37,16 +38,25 @@ from app.tasks.flowchart_tasks import extract_flowchart_task
|
|||||||
from app.tasks.ocr_tasks import ocr_image_task, ocr_pdf_task
|
from app.tasks.ocr_tasks import ocr_image_task, ocr_pdf_task
|
||||||
from app.tasks.removebg_tasks import remove_bg_task
|
from app.tasks.removebg_tasks import remove_bg_task
|
||||||
from app.tasks.pdf_ai_tasks import (
|
from app.tasks.pdf_ai_tasks import (
|
||||||
chat_with_pdf_task, summarize_pdf_task, translate_pdf_task, extract_tables_task,
|
chat_with_pdf_task,
|
||||||
|
summarize_pdf_task,
|
||||||
|
translate_pdf_task,
|
||||||
|
extract_tables_task,
|
||||||
)
|
)
|
||||||
from app.tasks.pdf_to_excel_tasks import pdf_to_excel_task
|
from app.tasks.pdf_to_excel_tasks import pdf_to_excel_task
|
||||||
from app.tasks.html_to_pdf_tasks import html_to_pdf_task
|
from app.tasks.html_to_pdf_tasks import html_to_pdf_task
|
||||||
from app.tasks.qrcode_tasks import generate_qr_task
|
from app.tasks.qrcode_tasks import generate_qr_task
|
||||||
from app.tasks.pdf_convert_tasks import (
|
from app.tasks.pdf_convert_tasks import (
|
||||||
pdf_to_pptx_task, excel_to_pdf_task, pptx_to_pdf_task, sign_pdf_task,
|
pdf_to_pptx_task,
|
||||||
|
excel_to_pdf_task,
|
||||||
|
pptx_to_pdf_task,
|
||||||
|
sign_pdf_task,
|
||||||
)
|
)
|
||||||
from app.tasks.pdf_extra_tasks import (
|
from app.tasks.pdf_extra_tasks import (
|
||||||
crop_pdf_task, flatten_pdf_task, repair_pdf_task, edit_metadata_task,
|
crop_pdf_task,
|
||||||
|
flatten_pdf_task,
|
||||||
|
repair_pdf_task,
|
||||||
|
edit_metadata_task,
|
||||||
)
|
)
|
||||||
from app.tasks.image_extra_tasks import crop_image_task, rotate_flip_image_task
|
from app.tasks.image_extra_tasks import crop_image_task, rotate_flip_image_task
|
||||||
from app.tasks.barcode_tasks import generate_barcode_task
|
from app.tasks.barcode_tasks import generate_barcode_task
|
||||||
@@ -80,6 +90,7 @@ def _resolve_and_check() -> tuple:
|
|||||||
# Task status — GET /api/v1/tasks/<task_id>/status
|
# Task status — GET /api/v1/tasks/<task_id>/status
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/tasks/<task_id>/status", methods=["GET"])
|
@v1_bp.route("/tasks/<task_id>/status", methods=["GET"])
|
||||||
@limiter.limit("300/minute", override_defaults=True)
|
@limiter.limit("300/minute", override_defaults=True)
|
||||||
def get_task_status(task_id: str):
|
def get_task_status(task_id: str):
|
||||||
@@ -113,6 +124,7 @@ def get_task_status(task_id: str):
|
|||||||
# Compress — POST /api/v1/compress/pdf
|
# Compress — POST /api/v1/compress/pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/compress/pdf", methods=["POST"])
|
@v1_bp.route("/compress/pdf", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def compress_pdf_route():
|
def compress_pdf_route():
|
||||||
@@ -130,7 +142,9 @@ def compress_pdf_route():
|
|||||||
quality = "medium"
|
quality = "medium"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
@@ -138,7 +152,10 @@ def compress_pdf_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = compress_pdf_task.delay(
|
task = compress_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, quality,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
quality,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "compress-pdf", task.id)
|
record_accepted_usage(actor, "compress-pdf", task.id)
|
||||||
@@ -150,6 +167,7 @@ def compress_pdf_route():
|
|||||||
# Convert — POST /api/v1/convert/pdf-to-word & /api/v1/convert/word-to-pdf
|
# Convert — POST /api/v1/convert/pdf-to-word & /api/v1/convert/word-to-pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/pdf-to-word", methods=["POST"])
|
@v1_bp.route("/convert/pdf-to-word", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def pdf_to_word_route():
|
def pdf_to_word_route():
|
||||||
@@ -163,7 +181,9 @@ def pdf_to_word_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
@@ -171,7 +191,9 @@ def pdf_to_word_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = convert_pdf_to_word.delay(
|
task = convert_pdf_to_word.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pdf-to-word", task.id)
|
record_accepted_usage(actor, "pdf-to-word", task.id)
|
||||||
@@ -201,7 +223,9 @@ def word_to_pdf_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = convert_word_to_pdf.delay(
|
task = convert_word_to_pdf.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "word-to-pdf", task.id)
|
record_accepted_usage(actor, "word-to-pdf", task.id)
|
||||||
@@ -212,6 +236,7 @@ def word_to_pdf_route():
|
|||||||
# Image — POST /api/v1/image/convert & /api/v1/image/resize
|
# Image — POST /api/v1/image/convert & /api/v1/image/resize
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/image/convert", methods=["POST"])
|
@v1_bp.route("/image/convert", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def convert_image_route():
|
def convert_image_route():
|
||||||
@@ -226,7 +251,9 @@ def convert_image_route():
|
|||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
output_format = request.form.get("format", "").lower()
|
output_format = request.form.get("format", "").lower()
|
||||||
if output_format not in ALLOWED_OUTPUT_FORMATS:
|
if output_format not in ALLOWED_OUTPUT_FORMATS:
|
||||||
return jsonify({"error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}"}), 400
|
return jsonify(
|
||||||
|
{"error": f"Invalid format. Supported: {', '.join(ALLOWED_OUTPUT_FORMATS)}"}
|
||||||
|
), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
quality = max(1, min(100, int(request.form.get("quality", "85"))))
|
quality = max(1, min(100, int(request.form.get("quality", "85"))))
|
||||||
@@ -244,7 +271,11 @@ def convert_image_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = convert_image_task.delay(
|
task = convert_image_task.delay(
|
||||||
input_path, task_id, original_filename, output_format, quality,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
output_format,
|
||||||
|
quality,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "image-convert", task.id)
|
record_accepted_usage(actor, "image-convert", task.id)
|
||||||
@@ -292,7 +323,12 @@ def resize_image_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = resize_image_task.delay(
|
task = resize_image_task.delay(
|
||||||
input_path, task_id, original_filename, width, height, quality,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
quality,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "image-resize", task.id)
|
record_accepted_usage(actor, "image-resize", task.id)
|
||||||
@@ -303,6 +339,7 @@ def resize_image_route():
|
|||||||
# Video — POST /api/v1/video/to-gif
|
# Video — POST /api/v1/video/to-gif
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/video/to-gif", methods=["POST"])
|
@v1_bp.route("/video/to-gif", methods=["POST"])
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("5/minute")
|
||||||
def video_to_gif_route():
|
def video_to_gif_route():
|
||||||
@@ -343,7 +380,13 @@ def video_to_gif_route():
|
|||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
|
|
||||||
task = create_gif_task.delay(
|
task = create_gif_task.delay(
|
||||||
input_path, task_id, original_filename, start_time, duration, fps, width,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
start_time,
|
||||||
|
duration,
|
||||||
|
fps,
|
||||||
|
width,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "video-to-gif", task.id)
|
record_accepted_usage(actor, "video-to-gif", task.id)
|
||||||
@@ -354,6 +397,7 @@ def video_to_gif_route():
|
|||||||
# PDF Tools — all single-file and multi-file routes
|
# PDF Tools — all single-file and multi-file routes
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/merge", methods=["POST"])
|
@v1_bp.route("/pdf-tools/merge", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def merge_pdfs_route():
|
def merge_pdfs_route():
|
||||||
@@ -372,7 +416,9 @@ def merge_pdfs_route():
|
|||||||
input_paths, original_filenames = [], []
|
input_paths, original_filenames = [], []
|
||||||
for f in files:
|
for f in files:
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(f, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
f, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"], task_id)
|
upload_dir = os.path.join(current_app.config["UPLOAD_FOLDER"], task_id)
|
||||||
@@ -383,7 +429,9 @@ def merge_pdfs_route():
|
|||||||
original_filenames.append(original_filename)
|
original_filenames.append(original_filename)
|
||||||
|
|
||||||
task = merge_pdfs_task.delay(
|
task = merge_pdfs_task.delay(
|
||||||
input_paths, task_id, original_filenames,
|
input_paths,
|
||||||
|
task_id,
|
||||||
|
original_filenames,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "merge-pdf", task.id)
|
record_accepted_usage(actor, "merge-pdf", task.id)
|
||||||
@@ -410,14 +458,20 @@ def split_pdf_route():
|
|||||||
return jsonify({"error": "Please specify which pages to extract."}), 400
|
return jsonify({"error": "Please specify which pages to extract."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = split_pdf_task.delay(
|
task = split_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, mode, pages,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
mode,
|
||||||
|
pages,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "split-pdf", task.id)
|
record_accepted_usage(actor, "split-pdf", task.id)
|
||||||
@@ -445,14 +499,20 @@ def rotate_pdf_route():
|
|||||||
pages = request.form.get("pages", "all")
|
pages = request.form.get("pages", "all")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = rotate_pdf_task.delay(
|
task = rotate_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, rotation, pages,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
rotation,
|
||||||
|
pages,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "rotate-pdf", task.id)
|
record_accepted_usage(actor, "rotate-pdf", task.id)
|
||||||
@@ -473,8 +533,12 @@ def add_page_numbers_route():
|
|||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
position = request.form.get("position", "bottom-center")
|
position = request.form.get("position", "bottom-center")
|
||||||
valid_positions = [
|
valid_positions = [
|
||||||
"bottom-center", "bottom-right", "bottom-left",
|
"bottom-center",
|
||||||
"top-center", "top-right", "top-left",
|
"bottom-right",
|
||||||
|
"bottom-left",
|
||||||
|
"top-center",
|
||||||
|
"top-right",
|
||||||
|
"top-left",
|
||||||
]
|
]
|
||||||
if position not in valid_positions:
|
if position not in valid_positions:
|
||||||
position = "bottom-center"
|
position = "bottom-center"
|
||||||
@@ -484,14 +548,20 @@ def add_page_numbers_route():
|
|||||||
start_number = 1
|
start_number = 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = add_page_numbers_task.delay(
|
task = add_page_numbers_task.delay(
|
||||||
input_path, task_id, original_filename, position, start_number,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
position,
|
||||||
|
start_number,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "page-numbers", task.id)
|
record_accepted_usage(actor, "page-numbers", task.id)
|
||||||
@@ -519,14 +589,20 @@ def pdf_to_images_route():
|
|||||||
dpi = 200
|
dpi = 200
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = pdf_to_images_task.delay(
|
task = pdf_to_images_task.delay(
|
||||||
input_path, task_id, original_filename, output_format, dpi,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
output_format,
|
||||||
|
dpi,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pdf-to-images", task.id)
|
record_accepted_usage(actor, "pdf-to-images", task.id)
|
||||||
@@ -564,7 +640,9 @@ def images_to_pdf_route():
|
|||||||
original_filenames.append(original_filename)
|
original_filenames.append(original_filename)
|
||||||
|
|
||||||
task = images_to_pdf_task.delay(
|
task = images_to_pdf_task.delay(
|
||||||
input_paths, task_id, original_filenames,
|
input_paths,
|
||||||
|
task_id,
|
||||||
|
original_filenames,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "images-to-pdf", task.id)
|
record_accepted_usage(actor, "images-to-pdf", task.id)
|
||||||
@@ -594,14 +672,20 @@ def watermark_pdf_route():
|
|||||||
opacity = 0.3
|
opacity = 0.3
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = watermark_pdf_task.delay(
|
task = watermark_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, watermark_text, opacity,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
watermark_text,
|
||||||
|
opacity,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "watermark-pdf", task.id)
|
record_accepted_usage(actor, "watermark-pdf", task.id)
|
||||||
@@ -627,14 +711,19 @@ def protect_pdf_route():
|
|||||||
return jsonify({"error": "Password must be at least 4 characters."}), 400
|
return jsonify({"error": "Password must be at least 4 characters."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = protect_pdf_task.delay(
|
task = protect_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, password,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
password,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "protect-pdf", task.id)
|
record_accepted_usage(actor, "protect-pdf", task.id)
|
||||||
@@ -658,14 +747,19 @@ def unlock_pdf_route():
|
|||||||
return jsonify({"error": "Password is required."}), 400
|
return jsonify({"error": "Password is required."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = unlock_pdf_task.delay(
|
task = unlock_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, password,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
password,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "unlock-pdf", task.id)
|
record_accepted_usage(actor, "unlock-pdf", task.id)
|
||||||
@@ -685,18 +779,24 @@ def extract_flowchart_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext)
|
task_id, input_path = generate_safe_path(ext)
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = extract_flowchart_task.delay(
|
task = extract_flowchart_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pdf-flowchart", task.id)
|
record_accepted_usage(actor, "pdf-flowchart", task.id)
|
||||||
return jsonify({"task_id": task.id, "message": "Flowchart extraction started."}), 202
|
return jsonify(
|
||||||
|
{"task_id": task.id, "message": "Flowchart extraction started."}
|
||||||
|
), 202
|
||||||
|
|
||||||
|
|
||||||
# ===========================================================================
|
# ===========================================================================
|
||||||
@@ -707,6 +807,7 @@ def extract_flowchart_route():
|
|||||||
# OCR — POST /api/v1/ocr/image & /api/v1/ocr/pdf
|
# OCR — POST /api/v1/ocr/image & /api/v1/ocr/pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/ocr/image", methods=["POST"])
|
@v1_bp.route("/ocr/image", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def ocr_image_route():
|
def ocr_image_route():
|
||||||
@@ -731,7 +832,10 @@ def ocr_image_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = ocr_image_task.delay(
|
task = ocr_image_task.delay(
|
||||||
input_path, task_id, original_filename, lang,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
lang,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "ocr-image", task.id)
|
record_accepted_usage(actor, "ocr-image", task.id)
|
||||||
@@ -753,14 +857,19 @@ def ocr_pdf_route():
|
|||||||
lang = request.form.get("lang", "eng")
|
lang = request.form.get("lang", "eng")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = ocr_pdf_task.delay(
|
task = ocr_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, lang,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
lang,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "ocr-pdf", task.id)
|
record_accepted_usage(actor, "ocr-pdf", task.id)
|
||||||
@@ -771,6 +880,7 @@ def ocr_pdf_route():
|
|||||||
# Remove Background — POST /api/v1/image/remove-bg
|
# Remove Background — POST /api/v1/image/remove-bg
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/image/remove-bg", methods=["POST"])
|
@v1_bp.route("/image/remove-bg", methods=["POST"])
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("5/minute")
|
||||||
def remove_bg_route():
|
def remove_bg_route():
|
||||||
@@ -793,7 +903,9 @@ def remove_bg_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = remove_bg_task.delay(
|
task = remove_bg_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "remove-bg", task.id)
|
record_accepted_usage(actor, "remove-bg", task.id)
|
||||||
@@ -804,6 +916,7 @@ def remove_bg_route():
|
|||||||
# PDF AI — POST /api/v1/pdf-ai/chat, summarize, translate, extract-tables
|
# PDF AI — POST /api/v1/pdf-ai/chat, summarize, translate, extract-tables
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-ai/chat", methods=["POST"])
|
@v1_bp.route("/pdf-ai/chat", methods=["POST"])
|
||||||
@limiter.limit("5/minute")
|
@limiter.limit("5/minute")
|
||||||
def chat_pdf_route():
|
def chat_pdf_route():
|
||||||
@@ -821,14 +934,19 @@ def chat_pdf_route():
|
|||||||
return jsonify({"error": "Question is required."}), 400
|
return jsonify({"error": "Question is required."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = chat_with_pdf_task.delay(
|
task = chat_with_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, question,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
question,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "chat-pdf", task.id)
|
record_accepted_usage(actor, "chat-pdf", task.id)
|
||||||
@@ -852,14 +970,19 @@ def summarize_pdf_route():
|
|||||||
length = "medium"
|
length = "medium"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = summarize_pdf_task.delay(
|
task = summarize_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, length,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
length,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "summarize-pdf", task.id)
|
record_accepted_usage(actor, "summarize-pdf", task.id)
|
||||||
@@ -879,18 +1002,25 @@ def translate_pdf_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
target_language = request.form.get("target_language", "").strip()
|
target_language = request.form.get("target_language", "").strip()
|
||||||
|
source_language = request.form.get("source_language", "auto").strip()
|
||||||
if not target_language:
|
if not target_language:
|
||||||
return jsonify({"error": "Target language is required."}), 400
|
return jsonify({"error": "Target language is required."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = translate_pdf_task.delay(
|
task = translate_pdf_task.delay(
|
||||||
input_path, task_id, original_filename, target_language,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
target_language,
|
||||||
|
source_language,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "translate-pdf", task.id)
|
record_accepted_usage(actor, "translate-pdf", task.id)
|
||||||
@@ -910,14 +1040,18 @@ def extract_tables_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = extract_tables_task.delay(
|
task = extract_tables_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "extract-tables", task.id)
|
record_accepted_usage(actor, "extract-tables", task.id)
|
||||||
@@ -928,6 +1062,7 @@ def extract_tables_route():
|
|||||||
# PDF to Excel — POST /api/v1/convert/pdf-to-excel
|
# PDF to Excel — POST /api/v1/convert/pdf-to-excel
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/pdf-to-excel", methods=["POST"])
|
@v1_bp.route("/convert/pdf-to-excel", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def pdf_to_excel_route():
|
def pdf_to_excel_route():
|
||||||
@@ -941,14 +1076,18 @@ def pdf_to_excel_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = pdf_to_excel_task.delay(
|
task = pdf_to_excel_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pdf-to-excel", task.id)
|
record_accepted_usage(actor, "pdf-to-excel", task.id)
|
||||||
@@ -959,6 +1098,7 @@ def pdf_to_excel_route():
|
|||||||
# HTML to PDF — POST /api/v1/convert/html-to-pdf
|
# HTML to PDF — POST /api/v1/convert/html-to-pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/html-to-pdf", methods=["POST"])
|
@v1_bp.route("/convert/html-to-pdf", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def html_to_pdf_route():
|
def html_to_pdf_route():
|
||||||
@@ -981,7 +1121,9 @@ def html_to_pdf_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = html_to_pdf_task.delay(
|
task = html_to_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "html-to-pdf", task.id)
|
record_accepted_usage(actor, "html-to-pdf", task.id)
|
||||||
@@ -992,6 +1134,7 @@ def html_to_pdf_route():
|
|||||||
# QR Code — POST /api/v1/qrcode/generate
|
# QR Code — POST /api/v1/qrcode/generate
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/qrcode/generate", methods=["POST"])
|
@v1_bp.route("/qrcode/generate", methods=["POST"])
|
||||||
@limiter.limit("20/minute")
|
@limiter.limit("20/minute")
|
||||||
def generate_qr_route():
|
def generate_qr_route():
|
||||||
@@ -1018,7 +1161,10 @@ def generate_qr_route():
|
|||||||
|
|
||||||
task_id = str(uuid.uuid4())
|
task_id = str(uuid.uuid4())
|
||||||
task = generate_qr_task.delay(
|
task = generate_qr_task.delay(
|
||||||
task_id, str(data).strip(), size, "png",
|
task_id,
|
||||||
|
str(data).strip(),
|
||||||
|
size,
|
||||||
|
"png",
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "qr-code", task.id)
|
record_accepted_usage(actor, "qr-code", task.id)
|
||||||
@@ -1033,6 +1179,7 @@ def generate_qr_route():
|
|||||||
# PDF to PowerPoint — POST /api/v1/convert/pdf-to-pptx
|
# PDF to PowerPoint — POST /api/v1/convert/pdf-to-pptx
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/pdf-to-pptx", methods=["POST"])
|
@v1_bp.route("/convert/pdf-to-pptx", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_pdf_to_pptx_route():
|
def v1_pdf_to_pptx_route():
|
||||||
@@ -1046,14 +1193,18 @@ def v1_pdf_to_pptx_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = pdf_to_pptx_task.delay(
|
task = pdf_to_pptx_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pdf-to-pptx", task.id)
|
record_accepted_usage(actor, "pdf-to-pptx", task.id)
|
||||||
@@ -1064,6 +1215,7 @@ def v1_pdf_to_pptx_route():
|
|||||||
# Excel to PDF — POST /api/v1/convert/excel-to-pdf
|
# Excel to PDF — POST /api/v1/convert/excel-to-pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/excel-to-pdf", methods=["POST"])
|
@v1_bp.route("/convert/excel-to-pdf", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_excel_to_pdf_route():
|
def v1_excel_to_pdf_route():
|
||||||
@@ -1086,7 +1238,9 @@ def v1_excel_to_pdf_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = excel_to_pdf_task.delay(
|
task = excel_to_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "excel-to-pdf", task.id)
|
record_accepted_usage(actor, "excel-to-pdf", task.id)
|
||||||
@@ -1097,6 +1251,7 @@ def v1_excel_to_pdf_route():
|
|||||||
# PowerPoint to PDF — POST /api/v1/convert/pptx-to-pdf
|
# PowerPoint to PDF — POST /api/v1/convert/pptx-to-pdf
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/convert/pptx-to-pdf", methods=["POST"])
|
@v1_bp.route("/convert/pptx-to-pdf", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_pptx_to_pdf_route():
|
def v1_pptx_to_pdf_route():
|
||||||
@@ -1119,7 +1274,9 @@ def v1_pptx_to_pdf_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = pptx_to_pdf_task.delay(
|
task = pptx_to_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "pptx-to-pdf", task.id)
|
record_accepted_usage(actor, "pptx-to-pdf", task.id)
|
||||||
@@ -1130,6 +1287,7 @@ def v1_pptx_to_pdf_route():
|
|||||||
# Sign PDF — POST /api/v1/pdf-tools/sign
|
# Sign PDF — POST /api/v1/pdf-tools/sign
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/sign", methods=["POST"])
|
@v1_bp.route("/pdf-tools/sign", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_sign_pdf_route():
|
def v1_sign_pdf_route():
|
||||||
@@ -1147,12 +1305,16 @@ def v1_sign_pdf_route():
|
|||||||
sig_file = request.files["signature"]
|
sig_file = request.files["signature"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(pdf_file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
pdf_file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
try:
|
try:
|
||||||
_, sig_ext = validate_actor_file(sig_file, allowed_types=ALLOWED_IMAGE_TYPES, actor=actor)
|
_, sig_ext = validate_actor_file(
|
||||||
|
sig_file, allowed_types=ALLOWED_IMAGE_TYPES, actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": f"Signature: {e.message}"}), e.code
|
return jsonify({"error": f"Signature: {e.message}"}), e.code
|
||||||
|
|
||||||
@@ -1174,8 +1336,15 @@ def v1_sign_pdf_route():
|
|||||||
sig_file.save(signature_path)
|
sig_file.save(signature_path)
|
||||||
|
|
||||||
task = sign_pdf_task.delay(
|
task = sign_pdf_task.delay(
|
||||||
input_path, signature_path, task_id, original_filename,
|
input_path,
|
||||||
page, x, y, width, height,
|
signature_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
page,
|
||||||
|
x,
|
||||||
|
y,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "sign-pdf", task.id)
|
record_accepted_usage(actor, "sign-pdf", task.id)
|
||||||
@@ -1186,6 +1355,7 @@ def v1_sign_pdf_route():
|
|||||||
# Crop PDF — POST /api/v1/pdf-tools/crop
|
# Crop PDF — POST /api/v1/pdf-tools/crop
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/crop", methods=["POST"])
|
@v1_bp.route("/pdf-tools/crop", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_crop_pdf_route():
|
def v1_crop_pdf_route():
|
||||||
@@ -1209,15 +1379,23 @@ def v1_crop_pdf_route():
|
|||||||
pages = request.form.get("pages", "all")
|
pages = request.form.get("pages", "all")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = crop_pdf_task.delay(
|
task = crop_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
margin_left, margin_right, margin_top, margin_bottom, pages,
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
margin_left,
|
||||||
|
margin_right,
|
||||||
|
margin_top,
|
||||||
|
margin_bottom,
|
||||||
|
pages,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "crop-pdf", task.id)
|
record_accepted_usage(actor, "crop-pdf", task.id)
|
||||||
@@ -1228,6 +1406,7 @@ def v1_crop_pdf_route():
|
|||||||
# Flatten PDF — POST /api/v1/pdf-tools/flatten
|
# Flatten PDF — POST /api/v1/pdf-tools/flatten
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/flatten", methods=["POST"])
|
@v1_bp.route("/pdf-tools/flatten", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_flatten_pdf_route():
|
def v1_flatten_pdf_route():
|
||||||
@@ -1241,14 +1420,18 @@ def v1_flatten_pdf_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = flatten_pdf_task.delay(
|
task = flatten_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "flatten-pdf", task.id)
|
record_accepted_usage(actor, "flatten-pdf", task.id)
|
||||||
@@ -1259,6 +1442,7 @@ def v1_flatten_pdf_route():
|
|||||||
# Repair PDF — POST /api/v1/pdf-tools/repair
|
# Repair PDF — POST /api/v1/pdf-tools/repair
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/repair", methods=["POST"])
|
@v1_bp.route("/pdf-tools/repair", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_repair_pdf_route():
|
def v1_repair_pdf_route():
|
||||||
@@ -1272,14 +1456,18 @@ def v1_repair_pdf_route():
|
|||||||
|
|
||||||
file = request.files["file"]
|
file = request.files["file"]
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = repair_pdf_task.delay(
|
task = repair_pdf_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
|
task_id,
|
||||||
|
original_filename,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "repair-pdf", task.id)
|
record_accepted_usage(actor, "repair-pdf", task.id)
|
||||||
@@ -1290,6 +1478,7 @@ def v1_repair_pdf_route():
|
|||||||
# Edit PDF Metadata — POST /api/v1/pdf-tools/metadata
|
# Edit PDF Metadata — POST /api/v1/pdf-tools/metadata
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/pdf-tools/metadata", methods=["POST"])
|
@v1_bp.route("/pdf-tools/metadata", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_edit_metadata_route():
|
def v1_edit_metadata_route():
|
||||||
@@ -1312,15 +1501,23 @@ def v1_edit_metadata_route():
|
|||||||
return jsonify({"error": "At least one metadata field required."}), 400
|
return jsonify({"error": "At least one metadata field required."}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_filename, ext = validate_actor_file(file, allowed_types=["pdf"], actor=actor)
|
original_filename, ext = validate_actor_file(
|
||||||
|
file, allowed_types=["pdf"], actor=actor
|
||||||
|
)
|
||||||
except FileValidationError as e:
|
except FileValidationError as e:
|
||||||
return jsonify({"error": e.message}), e.code
|
return jsonify({"error": e.message}), e.code
|
||||||
|
|
||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = edit_metadata_task.delay(
|
task = edit_metadata_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
title, author, subject, keywords, creator,
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
title,
|
||||||
|
author,
|
||||||
|
subject,
|
||||||
|
keywords,
|
||||||
|
creator,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "edit-metadata", task.id)
|
record_accepted_usage(actor, "edit-metadata", task.id)
|
||||||
@@ -1331,6 +1528,7 @@ def v1_edit_metadata_route():
|
|||||||
# Image Crop — POST /api/v1/image/crop
|
# Image Crop — POST /api/v1/image/crop
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/image/crop", methods=["POST"])
|
@v1_bp.route("/image/crop", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_crop_image_route():
|
def v1_crop_image_route():
|
||||||
@@ -1364,8 +1562,13 @@ def v1_crop_image_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = crop_image_task.delay(
|
task = crop_image_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
left, top, right, bottom,
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
left,
|
||||||
|
top,
|
||||||
|
right,
|
||||||
|
bottom,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "image-crop", task.id)
|
record_accepted_usage(actor, "image-crop", task.id)
|
||||||
@@ -1376,6 +1579,7 @@ def v1_crop_image_route():
|
|||||||
# Image Rotate/Flip — POST /api/v1/image/rotate-flip
|
# Image Rotate/Flip — POST /api/v1/image/rotate-flip
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/image/rotate-flip", methods=["POST"])
|
@v1_bp.route("/image/rotate-flip", methods=["POST"])
|
||||||
@limiter.limit("10/minute")
|
@limiter.limit("10/minute")
|
||||||
def v1_rotate_flip_image_route():
|
def v1_rotate_flip_image_route():
|
||||||
@@ -1408,8 +1612,12 @@ def v1_rotate_flip_image_route():
|
|||||||
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
task_id, input_path = generate_safe_path(ext, folder_type="upload")
|
||||||
file.save(input_path)
|
file.save(input_path)
|
||||||
task = rotate_flip_image_task.delay(
|
task = rotate_flip_image_task.delay(
|
||||||
input_path, task_id, original_filename,
|
input_path,
|
||||||
rotation, flip_horizontal, flip_vertical,
|
task_id,
|
||||||
|
original_filename,
|
||||||
|
rotation,
|
||||||
|
flip_horizontal,
|
||||||
|
flip_vertical,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "image-rotate-flip", task.id)
|
record_accepted_usage(actor, "image-rotate-flip", task.id)
|
||||||
@@ -1420,6 +1628,7 @@ def v1_rotate_flip_image_route():
|
|||||||
# Barcode — POST /api/v1/barcode/generate
|
# Barcode — POST /api/v1/barcode/generate
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
@v1_bp.route("/barcode/generate", methods=["POST"])
|
@v1_bp.route("/barcode/generate", methods=["POST"])
|
||||||
@limiter.limit("20/minute")
|
@limiter.limit("20/minute")
|
||||||
def v1_generate_barcode_route():
|
def v1_generate_barcode_route():
|
||||||
@@ -1442,14 +1651,21 @@ def v1_generate_barcode_route():
|
|||||||
return jsonify({"error": "Barcode data is required."}), 400
|
return jsonify({"error": "Barcode data is required."}), 400
|
||||||
|
|
||||||
if barcode_type not in SUPPORTED_BARCODE_TYPES:
|
if barcode_type not in SUPPORTED_BARCODE_TYPES:
|
||||||
return jsonify({"error": f"Unsupported type. Supported: {', '.join(SUPPORTED_BARCODE_TYPES)}"}), 400
|
return jsonify(
|
||||||
|
{
|
||||||
|
"error": f"Unsupported type. Supported: {', '.join(SUPPORTED_BARCODE_TYPES)}"
|
||||||
|
}
|
||||||
|
), 400
|
||||||
|
|
||||||
if output_format not in ("png", "svg"):
|
if output_format not in ("png", "svg"):
|
||||||
output_format = "png"
|
output_format = "png"
|
||||||
|
|
||||||
task_id = str(uuid.uuid4())
|
task_id = str(uuid.uuid4())
|
||||||
task = generate_barcode_task.delay(
|
task = generate_barcode_task.delay(
|
||||||
data, barcode_type, task_id, output_format,
|
data,
|
||||||
|
barcode_type,
|
||||||
|
task_id,
|
||||||
|
output_format,
|
||||||
**build_task_tracking_kwargs(actor),
|
**build_task_tracking_kwargs(actor),
|
||||||
)
|
)
|
||||||
record_accepted_usage(actor, "barcode", task.id)
|
record_accepted_usage(actor, "barcode", task.id)
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
"""PDF AI services — Chat, Summarize, Translate, Table Extract."""
|
"""PDF AI services — Chat, Summarize, Translate, Table Extract."""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -11,9 +16,84 @@ from app.services.openrouter_config_service import (
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DEFAULT_DEEPL_API_URL = "https://api-free.deepl.com/v2/translate"
|
||||||
|
DEFAULT_DEEPL_TIMEOUT_SECONDS = 90
|
||||||
|
MAX_TRANSLATION_CHUNK_CHARS = 3500
|
||||||
|
TRANSLATION_RETRY_ATTEMPTS = 3
|
||||||
|
TRANSLATION_RETRY_DELAY_SECONDS = 2
|
||||||
|
|
||||||
|
LANGUAGE_LABELS = {
|
||||||
|
"auto": "Auto Detect",
|
||||||
|
"en": "English",
|
||||||
|
"ar": "Arabic",
|
||||||
|
"fr": "French",
|
||||||
|
"es": "Spanish",
|
||||||
|
"de": "German",
|
||||||
|
"zh": "Chinese",
|
||||||
|
"ja": "Japanese",
|
||||||
|
"ko": "Korean",
|
||||||
|
"pt": "Portuguese",
|
||||||
|
"ru": "Russian",
|
||||||
|
"tr": "Turkish",
|
||||||
|
"it": "Italian",
|
||||||
|
}
|
||||||
|
|
||||||
|
DEEPL_LANGUAGE_CODES = {
|
||||||
|
"ar": "AR",
|
||||||
|
"de": "DE",
|
||||||
|
"en": "EN",
|
||||||
|
"es": "ES",
|
||||||
|
"fr": "FR",
|
||||||
|
"it": "IT",
|
||||||
|
"ja": "JA",
|
||||||
|
"ko": "KO",
|
||||||
|
"pt": "PT-PT",
|
||||||
|
"ru": "RU",
|
||||||
|
"tr": "TR",
|
||||||
|
"zh": "ZH",
|
||||||
|
}
|
||||||
|
|
||||||
|
OCR_LANGUAGE_CODES = {
|
||||||
|
"ar": "ara",
|
||||||
|
"en": "eng",
|
||||||
|
"fr": "fra",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class DeepLSettings:
|
||||||
|
api_key: str
|
||||||
|
base_url: str
|
||||||
|
timeout_seconds: int
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_language_code(value: str | None, default: str = "") -> str:
|
||||||
|
normalized = str(value or "").strip().lower()
|
||||||
|
return normalized or default
|
||||||
|
|
||||||
|
|
||||||
|
def _language_label(value: str | None) -> str:
|
||||||
|
normalized = _normalize_language_code(value)
|
||||||
|
return LANGUAGE_LABELS.get(normalized, normalized or "Unknown")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_deepl_settings() -> DeepLSettings:
|
||||||
|
api_key = str(os.getenv("DEEPL_API_KEY", "")).strip()
|
||||||
|
base_url = (
|
||||||
|
str(os.getenv("DEEPL_API_URL", DEFAULT_DEEPL_API_URL)).strip()
|
||||||
|
or DEFAULT_DEEPL_API_URL
|
||||||
|
)
|
||||||
|
timeout_seconds = int(
|
||||||
|
os.getenv("DEEPL_TIMEOUT_SECONDS", DEFAULT_DEEPL_TIMEOUT_SECONDS)
|
||||||
|
)
|
||||||
|
return DeepLSettings(
|
||||||
|
api_key=api_key, base_url=base_url, timeout_seconds=timeout_seconds
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PdfAiError(Exception):
|
class PdfAiError(Exception):
|
||||||
"""Custom exception for PDF AI service failures."""
|
"""Custom exception for PDF AI service failures."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
user_message: str,
|
user_message: str,
|
||||||
@@ -26,6 +106,42 @@ class PdfAiError(Exception):
|
|||||||
self.detail = detail
|
self.detail = detail
|
||||||
|
|
||||||
|
|
||||||
|
class RetryableTranslationError(PdfAiError):
|
||||||
|
"""Error wrapper used for provider failures that should be retried."""
|
||||||
|
|
||||||
|
|
||||||
|
def _translate_with_retry(action, provider_name: str) -> dict:
|
||||||
|
last_error: PdfAiError | None = None
|
||||||
|
|
||||||
|
for attempt in range(1, TRANSLATION_RETRY_ATTEMPTS + 1):
|
||||||
|
try:
|
||||||
|
return action()
|
||||||
|
except RetryableTranslationError as error:
|
||||||
|
last_error = error
|
||||||
|
logger.warning(
|
||||||
|
"%s translation attempt %s/%s failed with retryable error %s",
|
||||||
|
provider_name,
|
||||||
|
attempt,
|
||||||
|
TRANSLATION_RETRY_ATTEMPTS,
|
||||||
|
error.error_code,
|
||||||
|
)
|
||||||
|
if attempt == TRANSLATION_RETRY_ATTEMPTS:
|
||||||
|
break
|
||||||
|
time.sleep(TRANSLATION_RETRY_DELAY_SECONDS * attempt)
|
||||||
|
|
||||||
|
if last_error:
|
||||||
|
raise PdfAiError(
|
||||||
|
last_error.user_message,
|
||||||
|
error_code=last_error.error_code,
|
||||||
|
detail=last_error.detail,
|
||||||
|
)
|
||||||
|
|
||||||
|
raise PdfAiError(
|
||||||
|
"Translation provider failed unexpectedly.",
|
||||||
|
error_code="TRANSLATION_PROVIDER_FAILED",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _estimate_tokens(text: str) -> int:
|
def _estimate_tokens(text: str) -> int:
|
||||||
"""Rough token estimate: ~4 chars per token for English."""
|
"""Rough token estimate: ~4 chars per token for English."""
|
||||||
return max(1, len(text) // 4)
|
return max(1, len(text) // 4)
|
||||||
@@ -49,7 +165,30 @@ def _extract_text_from_pdf(input_path: str, max_pages: int = 50) -> str:
|
|||||||
text = page.extract_text() or ""
|
text = page.extract_text() or ""
|
||||||
if text.strip():
|
if text.strip():
|
||||||
texts.append(f"[Page {i + 1}]\n{text}")
|
texts.append(f"[Page {i + 1}]\n{text}")
|
||||||
return "\n\n".join(texts)
|
|
||||||
|
extracted = "\n\n".join(texts)
|
||||||
|
if extracted.strip():
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
# Fall back to OCR for scanned/image-only PDFs instead of failing fast.
|
||||||
|
try:
|
||||||
|
from app.services.ocr_service import ocr_pdf
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as handle:
|
||||||
|
ocr_output_path = handle.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = ocr_pdf(input_path, ocr_output_path, lang="eng")
|
||||||
|
ocr_text = str(data.get("text", "")).strip()
|
||||||
|
if ocr_text:
|
||||||
|
return ocr_text
|
||||||
|
finally:
|
||||||
|
if os.path.exists(ocr_output_path):
|
||||||
|
os.unlink(ocr_output_path)
|
||||||
|
except Exception as ocr_error:
|
||||||
|
logger.warning("OCR fallback for PDF text extraction failed: %s", ocr_error)
|
||||||
|
|
||||||
|
return ""
|
||||||
except PdfAiError:
|
except PdfAiError:
|
||||||
raise
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -70,14 +209,17 @@ def _call_openrouter(
|
|||||||
# Budget guard
|
# Budget guard
|
||||||
try:
|
try:
|
||||||
from app.services.ai_cost_service import check_ai_budget, AiBudgetExceededError
|
from app.services.ai_cost_service import check_ai_budget, AiBudgetExceededError
|
||||||
|
|
||||||
check_ai_budget()
|
check_ai_budget()
|
||||||
except AiBudgetExceededError:
|
except ImportError:
|
||||||
raise PdfAiError(
|
pass
|
||||||
"Monthly AI processing budget has been reached. Please try again next month.",
|
except Exception as error:
|
||||||
error_code="AI_BUDGET_EXCEEDED",
|
if error.__class__.__name__ == "AiBudgetExceededError":
|
||||||
)
|
raise PdfAiError(
|
||||||
except Exception:
|
"Monthly AI processing budget has been reached. Please try again next month.",
|
||||||
pass # Don't block if cost service unavailable
|
error_code="AI_BUDGET_EXCEEDED",
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
settings = get_openrouter_settings()
|
settings = get_openrouter_settings()
|
||||||
|
|
||||||
@@ -127,14 +269,14 @@ def _call_openrouter(
|
|||||||
|
|
||||||
if status_code == 429:
|
if status_code == 429:
|
||||||
logger.warning("OpenRouter rate limit reached (429).")
|
logger.warning("OpenRouter rate limit reached (429).")
|
||||||
raise PdfAiError(
|
raise RetryableTranslationError(
|
||||||
"AI service is experiencing high demand. Please wait a moment and try again.",
|
"AI service is experiencing high demand. Please wait a moment and try again.",
|
||||||
error_code="OPENROUTER_RATE_LIMIT",
|
error_code="OPENROUTER_RATE_LIMIT",
|
||||||
)
|
)
|
||||||
|
|
||||||
if status_code >= 500:
|
if status_code >= 500:
|
||||||
logger.error("OpenRouter server error (%s).", status_code)
|
logger.error("OpenRouter server error (%s).", status_code)
|
||||||
raise PdfAiError(
|
raise RetryableTranslationError(
|
||||||
"AI service provider is experiencing issues. Please try again shortly.",
|
"AI service provider is experiencing issues. Please try again shortly.",
|
||||||
error_code="OPENROUTER_SERVER_ERROR",
|
error_code="OPENROUTER_SERVER_ERROR",
|
||||||
)
|
)
|
||||||
@@ -144,7 +286,11 @@ def _call_openrouter(
|
|||||||
|
|
||||||
# Handle model-level errors returned inside a 200 response
|
# Handle model-level errors returned inside a 200 response
|
||||||
if data.get("error"):
|
if data.get("error"):
|
||||||
error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
|
error_msg = (
|
||||||
|
data["error"].get("message", "")
|
||||||
|
if isinstance(data["error"], dict)
|
||||||
|
else str(data["error"])
|
||||||
|
)
|
||||||
logger.error("OpenRouter returned an error payload: %s", error_msg)
|
logger.error("OpenRouter returned an error payload: %s", error_msg)
|
||||||
raise PdfAiError(
|
raise PdfAiError(
|
||||||
"AI service encountered an issue. Please try again.",
|
"AI service encountered an issue. Please try again.",
|
||||||
@@ -163,6 +309,7 @@ def _call_openrouter(
|
|||||||
# Log usage
|
# Log usage
|
||||||
try:
|
try:
|
||||||
from app.services.ai_cost_service import log_ai_usage
|
from app.services.ai_cost_service import log_ai_usage
|
||||||
|
|
||||||
usage = data.get("usage", {})
|
usage = data.get("usage", {})
|
||||||
log_ai_usage(
|
log_ai_usage(
|
||||||
tool=tool_name,
|
tool=tool_name,
|
||||||
@@ -178,13 +325,13 @@ def _call_openrouter(
|
|||||||
except PdfAiError:
|
except PdfAiError:
|
||||||
raise
|
raise
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
raise PdfAiError(
|
raise RetryableTranslationError(
|
||||||
"AI service timed out. Please try again.",
|
"AI service timed out. Please try again.",
|
||||||
error_code="OPENROUTER_TIMEOUT",
|
error_code="OPENROUTER_TIMEOUT",
|
||||||
)
|
)
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
logger.error("Cannot connect to OpenRouter API at %s", settings.base_url)
|
logger.error("Cannot connect to OpenRouter API at %s", settings.base_url)
|
||||||
raise PdfAiError(
|
raise RetryableTranslationError(
|
||||||
"AI service is unreachable. Please try again shortly.",
|
"AI service is unreachable. Please try again shortly.",
|
||||||
error_code="OPENROUTER_CONNECTION_ERROR",
|
error_code="OPENROUTER_CONNECTION_ERROR",
|
||||||
)
|
)
|
||||||
@@ -197,6 +344,218 @@ def _call_openrouter(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _split_translation_chunks(
|
||||||
|
text: str, max_chars: int = MAX_TRANSLATION_CHUNK_CHARS
|
||||||
|
) -> list[str]:
|
||||||
|
"""Split extracted PDF text into stable chunks while preserving page markers."""
|
||||||
|
chunks: list[str] = []
|
||||||
|
current: list[str] = []
|
||||||
|
current_length = 0
|
||||||
|
|
||||||
|
for block in text.split("\n\n"):
|
||||||
|
normalized = block.strip()
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
|
||||||
|
block_length = len(normalized) + 2
|
||||||
|
if current and current_length + block_length > max_chars:
|
||||||
|
chunks.append("\n\n".join(current))
|
||||||
|
current = [normalized]
|
||||||
|
current_length = block_length
|
||||||
|
continue
|
||||||
|
|
||||||
|
current.append(normalized)
|
||||||
|
current_length += block_length
|
||||||
|
|
||||||
|
if current:
|
||||||
|
chunks.append("\n\n".join(current))
|
||||||
|
|
||||||
|
return chunks or [text]
|
||||||
|
|
||||||
|
|
||||||
|
def _call_deepl_translate(
|
||||||
|
chunk: str, target_language: str, source_language: str | None = None
|
||||||
|
) -> dict:
|
||||||
|
"""Translate a chunk with DeepL when premium credentials are configured."""
|
||||||
|
settings = _get_deepl_settings()
|
||||||
|
if not settings.api_key:
|
||||||
|
raise PdfAiError(
|
||||||
|
"DeepL is not configured.",
|
||||||
|
error_code="DEEPL_NOT_CONFIGURED",
|
||||||
|
)
|
||||||
|
|
||||||
|
target_code = DEEPL_LANGUAGE_CODES.get(_normalize_language_code(target_language))
|
||||||
|
if not target_code:
|
||||||
|
raise PdfAiError(
|
||||||
|
f"Target language '{target_language}' is not supported by the premium translation provider.",
|
||||||
|
error_code="DEEPL_UNSUPPORTED_TARGET_LANGUAGE",
|
||||||
|
)
|
||||||
|
|
||||||
|
payload: dict[str, object] = {
|
||||||
|
"text": [chunk],
|
||||||
|
"target_lang": target_code,
|
||||||
|
"preserve_formatting": True,
|
||||||
|
"tag_handling": "xml",
|
||||||
|
"split_sentences": "nonewlines",
|
||||||
|
}
|
||||||
|
|
||||||
|
source_code = DEEPL_LANGUAGE_CODES.get(_normalize_language_code(source_language))
|
||||||
|
if source_code:
|
||||||
|
payload["source_lang"] = source_code
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(
|
||||||
|
settings.base_url,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"DeepL-Auth-Key {settings.api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
json=payload,
|
||||||
|
timeout=settings.timeout_seconds,
|
||||||
|
)
|
||||||
|
except requests.exceptions.Timeout:
|
||||||
|
raise RetryableTranslationError(
|
||||||
|
"Premium translation service timed out. Retrying...",
|
||||||
|
error_code="DEEPL_TIMEOUT",
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
raise RetryableTranslationError(
|
||||||
|
"Premium translation service is temporarily unreachable. Retrying...",
|
||||||
|
error_code="DEEPL_CONNECTION_ERROR",
|
||||||
|
)
|
||||||
|
except requests.exceptions.RequestException as error:
|
||||||
|
raise PdfAiError(
|
||||||
|
"Premium translation service is temporarily unavailable.",
|
||||||
|
error_code="DEEPL_REQUEST_ERROR",
|
||||||
|
detail=str(error),
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 429:
|
||||||
|
raise RetryableTranslationError(
|
||||||
|
"Premium translation service is busy. Retrying...",
|
||||||
|
error_code="DEEPL_RATE_LIMIT",
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code >= 500:
|
||||||
|
raise RetryableTranslationError(
|
||||||
|
"Premium translation service is experiencing issues. Retrying...",
|
||||||
|
error_code="DEEPL_SERVER_ERROR",
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code in {403, 456}:
|
||||||
|
raise PdfAiError(
|
||||||
|
"Premium translation provider credits or permissions need attention.",
|
||||||
|
error_code="DEEPL_CREDITS_OR_PERMISSIONS",
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
translations = data.get("translations") or []
|
||||||
|
if not translations:
|
||||||
|
raise PdfAiError(
|
||||||
|
"Premium translation provider returned an empty response.",
|
||||||
|
error_code="DEEPL_EMPTY_RESPONSE",
|
||||||
|
)
|
||||||
|
|
||||||
|
first = translations[0]
|
||||||
|
translated_text = str(first.get("text", "")).strip()
|
||||||
|
if not translated_text:
|
||||||
|
raise PdfAiError(
|
||||||
|
"Premium translation provider returned an empty response.",
|
||||||
|
error_code="DEEPL_EMPTY_TEXT",
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"translation": translated_text,
|
||||||
|
"provider": "deepl",
|
||||||
|
"detected_source_language": str(first.get("detected_source_language", ""))
|
||||||
|
.strip()
|
||||||
|
.lower(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _call_openrouter_translate(
|
||||||
|
chunk: str, target_language: str, source_language: str | None = None
|
||||||
|
) -> dict:
|
||||||
|
source_hint = "auto-detect the source language"
|
||||||
|
if source_language and _normalize_language_code(source_language) != "auto":
|
||||||
|
source_hint = f"treat {_language_label(source_language)} as the source language"
|
||||||
|
|
||||||
|
system_prompt = (
|
||||||
|
"You are a professional document translator. "
|
||||||
|
f"Translate the provided PDF content into {_language_label(target_language)}. "
|
||||||
|
f"Please {source_hint}. Preserve headings, lists, tables, and page markers. "
|
||||||
|
"Return only the translated text."
|
||||||
|
)
|
||||||
|
translation = _call_openrouter(
|
||||||
|
system_prompt,
|
||||||
|
chunk,
|
||||||
|
max_tokens=2200,
|
||||||
|
tool_name="pdf_translate_fallback",
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"translation": translation,
|
||||||
|
"provider": "openrouter",
|
||||||
|
"detected_source_language": _normalize_language_code(
|
||||||
|
source_language, default=""
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _translate_document_text(
|
||||||
|
text: str, target_language: str, source_language: str | None = None
|
||||||
|
) -> dict:
|
||||||
|
chunks = _split_translation_chunks(text)
|
||||||
|
translations: list[str] = []
|
||||||
|
detected_source_language = _normalize_language_code(source_language)
|
||||||
|
if detected_source_language == "auto":
|
||||||
|
detected_source_language = ""
|
||||||
|
providers_used: list[str] = []
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
chunk_result: dict | None = None
|
||||||
|
|
||||||
|
deepl_settings = _get_deepl_settings()
|
||||||
|
if deepl_settings.api_key:
|
||||||
|
try:
|
||||||
|
chunk_result = _translate_with_retry(
|
||||||
|
lambda: _call_deepl_translate(
|
||||||
|
chunk, target_language, source_language
|
||||||
|
),
|
||||||
|
provider_name="DeepL",
|
||||||
|
)
|
||||||
|
except PdfAiError as deepl_error:
|
||||||
|
logger.warning(
|
||||||
|
"DeepL translation failed for chunk; falling back to OpenRouter. code=%s detail=%s",
|
||||||
|
deepl_error.error_code,
|
||||||
|
deepl_error.detail,
|
||||||
|
)
|
||||||
|
|
||||||
|
if chunk_result is None:
|
||||||
|
chunk_result = _translate_with_retry(
|
||||||
|
lambda: _call_openrouter_translate(
|
||||||
|
chunk, target_language, source_language
|
||||||
|
),
|
||||||
|
provider_name="OpenRouter",
|
||||||
|
)
|
||||||
|
|
||||||
|
translations.append(str(chunk_result["translation"]).strip())
|
||||||
|
providers_used.append(str(chunk_result["provider"]))
|
||||||
|
if not detected_source_language and chunk_result.get(
|
||||||
|
"detected_source_language"
|
||||||
|
):
|
||||||
|
detected_source_language = _normalize_language_code(
|
||||||
|
chunk_result["detected_source_language"]
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"translation": "\n\n".join(part for part in translations if part),
|
||||||
|
"provider": ", ".join(sorted(set(providers_used))),
|
||||||
|
"detected_source_language": detected_source_language,
|
||||||
|
"chunks_translated": len(translations),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# 1. Chat with PDF
|
# 1. Chat with PDF
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -212,11 +571,15 @@ def chat_with_pdf(input_path: str, question: str) -> dict:
|
|||||||
{"reply": "...", "pages_analyzed": int}
|
{"reply": "...", "pages_analyzed": int}
|
||||||
"""
|
"""
|
||||||
if not question or not question.strip():
|
if not question or not question.strip():
|
||||||
raise PdfAiError("Please provide a question.", error_code="PDF_AI_INVALID_INPUT")
|
raise PdfAiError(
|
||||||
|
"Please provide a question.", error_code="PDF_AI_INVALID_INPUT"
|
||||||
|
)
|
||||||
|
|
||||||
text = _extract_text_from_pdf(input_path)
|
text = _extract_text_from_pdf(input_path)
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
raise PdfAiError(
|
||||||
|
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||||
|
)
|
||||||
|
|
||||||
# Truncate to fit context window
|
# Truncate to fit context window
|
||||||
max_chars = 12000
|
max_chars = 12000
|
||||||
@@ -230,7 +593,9 @@ def chat_with_pdf(input_path: str, question: str) -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
user_msg = f"Document content:\n{truncated}\n\nQuestion: {question}"
|
user_msg = f"Document content:\n{truncated}\n\nQuestion: {question}"
|
||||||
reply = _call_openrouter(system_prompt, user_msg, max_tokens=800, tool_name="pdf_chat")
|
reply = _call_openrouter(
|
||||||
|
system_prompt, user_msg, max_tokens=800, tool_name="pdf_chat"
|
||||||
|
)
|
||||||
|
|
||||||
page_count = text.count("[Page ")
|
page_count = text.count("[Page ")
|
||||||
return {"reply": reply, "pages_analyzed": page_count}
|
return {"reply": reply, "pages_analyzed": page_count}
|
||||||
@@ -252,7 +617,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
|||||||
"""
|
"""
|
||||||
text = _extract_text_from_pdf(input_path)
|
text = _extract_text_from_pdf(input_path)
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
raise PdfAiError(
|
||||||
|
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||||
|
)
|
||||||
|
|
||||||
length_instruction = {
|
length_instruction = {
|
||||||
"short": "Provide a brief summary in 2-3 sentences.",
|
"short": "Provide a brief summary in 2-3 sentences.",
|
||||||
@@ -270,7 +637,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
|||||||
)
|
)
|
||||||
|
|
||||||
user_msg = f"{length_instruction}\n\nDocument content:\n{truncated}"
|
user_msg = f"{length_instruction}\n\nDocument content:\n{truncated}"
|
||||||
summary = _call_openrouter(system_prompt, user_msg, max_tokens=1000, tool_name="pdf_summarize")
|
summary = _call_openrouter(
|
||||||
|
system_prompt, user_msg, max_tokens=1000, tool_name="pdf_summarize"
|
||||||
|
)
|
||||||
|
|
||||||
page_count = text.count("[Page ")
|
page_count = text.count("[Page ")
|
||||||
return {"summary": summary, "pages_analyzed": page_count}
|
return {"summary": summary, "pages_analyzed": page_count}
|
||||||
@@ -279,7 +648,9 @@ def summarize_pdf(input_path: str, length: str = "medium") -> dict:
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# 3. Translate PDF
|
# 3. Translate PDF
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
def translate_pdf(input_path: str, target_language: str) -> dict:
|
def translate_pdf(
|
||||||
|
input_path: str, target_language: str, source_language: str | None = None
|
||||||
|
) -> dict:
|
||||||
"""
|
"""
|
||||||
Translate the text content of a PDF to another language.
|
Translate the text content of a PDF to another language.
|
||||||
|
|
||||||
@@ -290,29 +661,46 @@ def translate_pdf(input_path: str, target_language: str) -> dict:
|
|||||||
Returns:
|
Returns:
|
||||||
{"translation": "...", "pages_analyzed": int, "target_language": str}
|
{"translation": "...", "pages_analyzed": int, "target_language": str}
|
||||||
"""
|
"""
|
||||||
if not target_language or not target_language.strip():
|
normalized_target_language = _normalize_language_code(target_language)
|
||||||
raise PdfAiError("Please specify a target language.", error_code="PDF_AI_INVALID_INPUT")
|
normalized_source_language = _normalize_language_code(
|
||||||
|
source_language, default="auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not normalized_target_language:
|
||||||
|
raise PdfAiError(
|
||||||
|
"Please specify a target language.", error_code="PDF_AI_INVALID_INPUT"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
normalized_target_language == normalized_source_language
|
||||||
|
and normalized_source_language != "auto"
|
||||||
|
):
|
||||||
|
raise PdfAiError(
|
||||||
|
"Please choose different source and target languages.",
|
||||||
|
error_code="PDF_AI_INVALID_INPUT",
|
||||||
|
)
|
||||||
|
|
||||||
text = _extract_text_from_pdf(input_path)
|
text = _extract_text_from_pdf(input_path)
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
raise PdfAiError("Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY")
|
raise PdfAiError(
|
||||||
|
"Could not extract any text from the PDF.", error_code="PDF_TEXT_EMPTY"
|
||||||
|
)
|
||||||
|
|
||||||
max_chars = 10000
|
translated = _translate_document_text(
|
||||||
truncated = text[:max_chars]
|
text,
|
||||||
|
target_language=normalized_target_language,
|
||||||
system_prompt = (
|
source_language=normalized_source_language,
|
||||||
f"You are a professional translator. Translate the following document "
|
|
||||||
f"content into {target_language}. Preserve the original formatting and "
|
|
||||||
f"structure as much as possible. Only output the translation, nothing else."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
translation = _call_openrouter(system_prompt, truncated, max_tokens=2000, tool_name="pdf_translate")
|
|
||||||
|
|
||||||
page_count = text.count("[Page ")
|
page_count = text.count("[Page ")
|
||||||
return {
|
return {
|
||||||
"translation": translation,
|
"translation": translated["translation"],
|
||||||
"pages_analyzed": page_count,
|
"pages_analyzed": page_count,
|
||||||
"target_language": target_language,
|
"target_language": normalized_target_language,
|
||||||
|
"source_language": normalized_source_language,
|
||||||
|
"detected_source_language": translated["detected_source_language"],
|
||||||
|
"provider": translated["provider"],
|
||||||
|
"chunks_translated": translated["chunks_translated"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -361,12 +749,14 @@ def extract_tables(input_path: str) -> dict:
|
|||||||
cells.append(str(val))
|
cells.append(str(val))
|
||||||
rows.append(cells)
|
rows.append(cells)
|
||||||
|
|
||||||
result_tables.append({
|
result_tables.append(
|
||||||
"page": page_num,
|
{
|
||||||
"table_index": table_index,
|
"page": page_num,
|
||||||
"headers": headers,
|
"table_index": table_index,
|
||||||
"rows": rows,
|
"headers": headers,
|
||||||
})
|
"rows": rows,
|
||||||
|
}
|
||||||
|
)
|
||||||
table_index += 1
|
table_index += 1
|
||||||
|
|
||||||
if not result_tables:
|
if not result_tables:
|
||||||
@@ -385,7 +775,9 @@ def extract_tables(input_path: str) -> dict:
|
|||||||
except PdfAiError:
|
except PdfAiError:
|
||||||
raise
|
raise
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise PdfAiError("tabula-py library is not installed.", error_code="TABULA_NOT_INSTALLED")
|
raise PdfAiError(
|
||||||
|
"tabula-py library is not installed.", error_code="TABULA_NOT_INSTALLED"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise PdfAiError(
|
raise PdfAiError(
|
||||||
"Failed to extract tables.",
|
"Failed to extract tables.",
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
"""Celery tasks for PDF AI tools — Chat, Summarize, Translate, Table Extract."""
|
"""Celery tasks for PDF AI tools — Chat, Summarize, Translate, Table Extract."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
@@ -28,7 +29,8 @@ def _build_pdf_ai_error_payload(task_id: str, error: PdfAiError, tool: str) -> d
|
|||||||
payload = {
|
payload = {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"error_code": getattr(error, "error_code", "PDF_AI_ERROR"),
|
"error_code": getattr(error, "error_code", "PDF_AI_ERROR"),
|
||||||
"user_message": getattr(error, "user_message", str(error)) or "AI processing failed.",
|
"user_message": getattr(error, "user_message", str(error))
|
||||||
|
or "AI processing failed.",
|
||||||
"task_id": task_id,
|
"task_id": task_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,9 +82,12 @@ def chat_with_pdf_task(
|
|||||||
|
|
||||||
logger.info(f"Task {task_id}: Chat with PDF completed")
|
logger.info(f"Task {task_id}: Chat with PDF completed")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="chat-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="chat-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -91,9 +96,12 @@ def chat_with_pdf_task(
|
|||||||
except PdfAiError as e:
|
except PdfAiError as e:
|
||||||
result = _build_pdf_ai_error_payload(task_id, e, "chat-pdf")
|
result = _build_pdf_ai_error_payload(task_id, e, "chat-pdf")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="chat-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="chat-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -103,9 +111,12 @@ def chat_with_pdf_task(
|
|||||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="chat-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="chat-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -140,9 +151,12 @@ def summarize_pdf_task(
|
|||||||
|
|
||||||
logger.info(f"Task {task_id}: PDF summarize completed")
|
logger.info(f"Task {task_id}: PDF summarize completed")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="summarize-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="summarize-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -151,9 +165,12 @@ def summarize_pdf_task(
|
|||||||
except PdfAiError as e:
|
except PdfAiError as e:
|
||||||
result = _build_pdf_ai_error_payload(task_id, e, "summarize-pdf")
|
result = _build_pdf_ai_error_payload(task_id, e, "summarize-pdf")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="summarize-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="summarize-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -163,9 +180,12 @@ def summarize_pdf_task(
|
|||||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="summarize-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="summarize-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -182,28 +202,41 @@ def translate_pdf_task(
|
|||||||
task_id: str,
|
task_id: str,
|
||||||
original_filename: str,
|
original_filename: str,
|
||||||
target_language: str,
|
target_language: str,
|
||||||
|
source_language: str | None = None,
|
||||||
user_id: int | None = None,
|
user_id: int | None = None,
|
||||||
usage_source: str = "web",
|
usage_source: str = "web",
|
||||||
api_key_id: int | None = None,
|
api_key_id: int | None = None,
|
||||||
):
|
):
|
||||||
"""Translate a PDF document to another language."""
|
"""Translate a PDF document to another language."""
|
||||||
try:
|
try:
|
||||||
self.update_state(state="PROCESSING", meta={"step": "Translating document..."})
|
self.update_state(
|
||||||
|
state="PROCESSING",
|
||||||
|
meta={"step": "Translating document with provider fallback..."},
|
||||||
|
)
|
||||||
|
|
||||||
data = translate_pdf(input_path, target_language)
|
data = translate_pdf(
|
||||||
|
input_path, target_language, source_language=source_language
|
||||||
|
)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"translation": data["translation"],
|
"translation": data["translation"],
|
||||||
"pages_analyzed": data["pages_analyzed"],
|
"pages_analyzed": data["pages_analyzed"],
|
||||||
"target_language": data["target_language"],
|
"target_language": data["target_language"],
|
||||||
|
"source_language": data.get("source_language"),
|
||||||
|
"detected_source_language": data.get("detected_source_language"),
|
||||||
|
"provider": data.get("provider"),
|
||||||
|
"chunks_translated": data.get("chunks_translated"),
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Task {task_id}: PDF translate completed")
|
logger.info(f"Task {task_id}: PDF translate completed")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="translate-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="translate-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -212,9 +245,12 @@ def translate_pdf_task(
|
|||||||
except PdfAiError as e:
|
except PdfAiError as e:
|
||||||
result = _build_pdf_ai_error_payload(task_id, e, "translate-pdf")
|
result = _build_pdf_ai_error_payload(task_id, e, "translate-pdf")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="translate-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="translate-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -224,9 +260,12 @@ def translate_pdf_task(
|
|||||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="translate-pdf",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="translate-pdf",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -260,9 +299,12 @@ def extract_tables_task(
|
|||||||
|
|
||||||
logger.info(f"Task {task_id}: Table extraction completed")
|
logger.info(f"Task {task_id}: Table extraction completed")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="extract-tables",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="extract-tables",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -271,9 +313,12 @@ def extract_tables_task(
|
|||||||
except PdfAiError as e:
|
except PdfAiError as e:
|
||||||
result = _build_pdf_ai_error_payload(task_id, e, "extract-tables")
|
result = _build_pdf_ai_error_payload(task_id, e, "extract-tables")
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="extract-tables",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="extract-tables",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
@@ -283,9 +328,12 @@ def extract_tables_task(
|
|||||||
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
logger.error(f"Task {task_id}: Unexpected error — {e}")
|
||||||
result = {"status": "failed", "error": "An unexpected error occurred."}
|
result = {"status": "failed", "error": "An unexpected error occurred."}
|
||||||
finalize_task_tracking(
|
finalize_task_tracking(
|
||||||
user_id=user_id, tool="extract-tables",
|
user_id=user_id,
|
||||||
original_filename=original_filename, result=result,
|
tool="extract-tables",
|
||||||
usage_source=usage_source, api_key_id=api_key_id,
|
original_filename=original_filename,
|
||||||
|
result=result,
|
||||||
|
usage_source=usage_source,
|
||||||
|
api_key_id=api_key_id,
|
||||||
celery_task_id=self.request.id,
|
celery_task_id=self.request.id,
|
||||||
)
|
)
|
||||||
_cleanup(task_id)
|
_cleanup(task_id)
|
||||||
|
|||||||
@@ -26,20 +26,21 @@ def _env_or_default(name: str, default: str) -> str:
|
|||||||
|
|
||||||
class BaseConfig:
|
class BaseConfig:
|
||||||
"""Base configuration."""
|
"""Base configuration."""
|
||||||
|
|
||||||
SECRET_KEY = os.getenv("SECRET_KEY", "change-me-in-production")
|
SECRET_KEY = os.getenv("SECRET_KEY", "change-me-in-production")
|
||||||
INTERNAL_ADMIN_SECRET = os.getenv("INTERNAL_ADMIN_SECRET", "")
|
INTERNAL_ADMIN_SECRET = os.getenv("INTERNAL_ADMIN_SECRET", "")
|
||||||
INTERNAL_ADMIN_EMAILS = _parse_csv_env("INTERNAL_ADMIN_EMAILS")
|
INTERNAL_ADMIN_EMAILS = _parse_csv_env("INTERNAL_ADMIN_EMAILS")
|
||||||
|
|
||||||
# File upload settings
|
# File upload settings
|
||||||
MAX_CONTENT_LENGTH = int(
|
MAX_CONTENT_LENGTH = (
|
||||||
os.getenv("ABSOLUTE_MAX_CONTENT_LENGTH_MB", 100)
|
int(os.getenv("ABSOLUTE_MAX_CONTENT_LENGTH_MB", 100)) * 1024 * 1024
|
||||||
) * 1024 * 1024
|
)
|
||||||
UPLOAD_FOLDER = _env_or_default("UPLOAD_FOLDER", "/tmp/uploads")
|
UPLOAD_FOLDER = _env_or_default("UPLOAD_FOLDER", "/tmp/uploads")
|
||||||
OUTPUT_FOLDER = _env_or_default("OUTPUT_FOLDER", "/tmp/outputs")
|
OUTPUT_FOLDER = _env_or_default("OUTPUT_FOLDER", "/tmp/outputs")
|
||||||
FILE_EXPIRY_SECONDS = int(os.getenv("FILE_EXPIRY_SECONDS", 1800))
|
FILE_EXPIRY_SECONDS = int(os.getenv("FILE_EXPIRY_SECONDS", 1800))
|
||||||
STORAGE_ALLOW_LOCAL_FALLBACK = os.getenv(
|
STORAGE_ALLOW_LOCAL_FALLBACK = (
|
||||||
"STORAGE_ALLOW_LOCAL_FALLBACK", "true"
|
os.getenv("STORAGE_ALLOW_LOCAL_FALLBACK", "true").lower() == "true"
|
||||||
).lower() == "true"
|
)
|
||||||
DATABASE_PATH = _env_or_default(
|
DATABASE_PATH = _env_or_default(
|
||||||
"DATABASE_PATH", os.path.join(BASE_DIR, "data", "dociva.db")
|
"DATABASE_PATH", os.path.join(BASE_DIR, "data", "dociva.db")
|
||||||
)
|
)
|
||||||
@@ -69,31 +70,29 @@ class BaseConfig:
|
|||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||||
],
|
],
|
||||||
"ppt": ["application/vnd.ms-powerpoint"],
|
"ppt": ["application/vnd.ms-powerpoint"],
|
||||||
"xlsx": [
|
"xlsx": ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
||||||
],
|
|
||||||
"xls": ["application/vnd.ms-excel"],
|
"xls": ["application/vnd.ms-excel"],
|
||||||
}
|
}
|
||||||
|
|
||||||
# File size limits per type (bytes)
|
# File size limits per type (bytes)
|
||||||
FILE_SIZE_LIMITS = {
|
FILE_SIZE_LIMITS = {
|
||||||
"pdf": 20 * 1024 * 1024, # 20MB
|
"pdf": 20 * 1024 * 1024, # 20MB
|
||||||
"doc": 15 * 1024 * 1024, # 15MB
|
"doc": 15 * 1024 * 1024, # 15MB
|
||||||
"docx": 15 * 1024 * 1024, # 15MB
|
"docx": 15 * 1024 * 1024, # 15MB
|
||||||
"html": 10 * 1024 * 1024, # 10MB
|
"html": 10 * 1024 * 1024, # 10MB
|
||||||
"htm": 10 * 1024 * 1024, # 10MB
|
"htm": 10 * 1024 * 1024, # 10MB
|
||||||
"png": 10 * 1024 * 1024, # 10MB
|
"png": 10 * 1024 * 1024, # 10MB
|
||||||
"jpg": 10 * 1024 * 1024, # 10MB
|
"jpg": 10 * 1024 * 1024, # 10MB
|
||||||
"jpeg": 10 * 1024 * 1024, # 10MB
|
"jpeg": 10 * 1024 * 1024, # 10MB
|
||||||
"webp": 10 * 1024 * 1024, # 10MB
|
"webp": 10 * 1024 * 1024, # 10MB
|
||||||
"tiff": 15 * 1024 * 1024, # 15MB
|
"tiff": 15 * 1024 * 1024, # 15MB
|
||||||
"bmp": 15 * 1024 * 1024, # 15MB
|
"bmp": 15 * 1024 * 1024, # 15MB
|
||||||
"mp4": 50 * 1024 * 1024, # 50MB
|
"mp4": 50 * 1024 * 1024, # 50MB
|
||||||
"webm": 50 * 1024 * 1024, # 50MB
|
"webm": 50 * 1024 * 1024, # 50MB
|
||||||
"pptx": 20 * 1024 * 1024, # 20MB
|
"pptx": 20 * 1024 * 1024, # 20MB
|
||||||
"ppt": 20 * 1024 * 1024, # 20MB
|
"ppt": 20 * 1024 * 1024, # 20MB
|
||||||
"xlsx": 15 * 1024 * 1024, # 15MB
|
"xlsx": 15 * 1024 * 1024, # 15MB
|
||||||
"xls": 15 * 1024 * 1024, # 15MB
|
"xls": 15 * 1024 * 1024, # 15MB
|
||||||
}
|
}
|
||||||
|
|
||||||
# Redis
|
# Redis
|
||||||
@@ -118,11 +117,20 @@ class BaseConfig:
|
|||||||
|
|
||||||
# OpenRouter AI
|
# OpenRouter AI
|
||||||
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
|
||||||
OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "nvidia/nemotron-3-super-120b-a12b:free")
|
OPENROUTER_MODEL = os.getenv(
|
||||||
|
"OPENROUTER_MODEL", "nvidia/nemotron-3-super-120b-a12b:free"
|
||||||
|
)
|
||||||
OPENROUTER_BASE_URL = os.getenv(
|
OPENROUTER_BASE_URL = os.getenv(
|
||||||
"OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions"
|
"OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Premium translation provider (recommended for Translate PDF)
|
||||||
|
DEEPL_API_KEY = os.getenv("DEEPL_API_KEY", "")
|
||||||
|
DEEPL_API_URL = os.getenv(
|
||||||
|
"DEEPL_API_URL", "https://api-free.deepl.com/v2/translate"
|
||||||
|
)
|
||||||
|
DEEPL_TIMEOUT_SECONDS = int(os.getenv("DEEPL_TIMEOUT_SECONDS", 90))
|
||||||
|
|
||||||
# SMTP (for password reset emails)
|
# SMTP (for password reset emails)
|
||||||
SMTP_HOST = os.getenv("SMTP_HOST", "")
|
SMTP_HOST = os.getenv("SMTP_HOST", "")
|
||||||
SMTP_PORT = int(os.getenv("SMTP_PORT", 587))
|
SMTP_PORT = int(os.getenv("SMTP_PORT", 587))
|
||||||
@@ -156,12 +164,14 @@ class BaseConfig:
|
|||||||
|
|
||||||
class DevelopmentConfig(BaseConfig):
|
class DevelopmentConfig(BaseConfig):
|
||||||
"""Development configuration."""
|
"""Development configuration."""
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
TESTING = False
|
TESTING = False
|
||||||
|
|
||||||
|
|
||||||
class ProductionConfig(BaseConfig):
|
class ProductionConfig(BaseConfig):
|
||||||
"""Production configuration."""
|
"""Production configuration."""
|
||||||
|
|
||||||
DEBUG = False
|
DEBUG = False
|
||||||
TESTING = False
|
TESTING = False
|
||||||
SESSION_COOKIE_SECURE = True
|
SESSION_COOKIE_SECURE = True
|
||||||
@@ -172,6 +182,7 @@ class ProductionConfig(BaseConfig):
|
|||||||
|
|
||||||
class TestingConfig(BaseConfig):
|
class TestingConfig(BaseConfig):
|
||||||
"""Testing configuration."""
|
"""Testing configuration."""
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
TESTING = True
|
TESTING = True
|
||||||
UPLOAD_FOLDER = "/tmp/test_uploads"
|
UPLOAD_FOLDER = "/tmp/test_uploads"
|
||||||
|
|||||||
93
backend/tests/test_pdf_translate_service.py
Normal file
93
backend/tests/test_pdf_translate_service.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
"""Tests for the resilient PDF translation workflow."""
|
||||||
|
|
||||||
|
from app.services.pdf_ai_service import DeepLSettings, PdfAiError, translate_pdf
|
||||||
|
|
||||||
|
|
||||||
|
def test_translate_pdf_prefers_premium_provider(monkeypatch):
|
||||||
|
"""Should use the premium provider when configured and available."""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||||
|
lambda _path: "[Page 1]\nHello world\n\n[Page 2]\nSecond page",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._get_deepl_settings",
|
||||||
|
lambda: DeepLSettings(
|
||||||
|
api_key="key",
|
||||||
|
base_url="https://api-free.deepl.com/v2/translate",
|
||||||
|
timeout_seconds=90,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._translate_with_retry",
|
||||||
|
lambda action, provider_name: action(),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._call_deepl_translate",
|
||||||
|
lambda chunk, target_language, source_language=None: {
|
||||||
|
"translation": f"translated::{chunk}",
|
||||||
|
"provider": "deepl",
|
||||||
|
"detected_source_language": "en",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = translate_pdf("/tmp/demo.pdf", "fr", source_language="en")
|
||||||
|
|
||||||
|
assert result["provider"] == "deepl"
|
||||||
|
assert result["target_language"] == "fr"
|
||||||
|
assert result["detected_source_language"] == "en"
|
||||||
|
assert "translated::" in result["translation"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_translate_pdf_falls_back_when_premium_provider_fails(monkeypatch):
|
||||||
|
"""Should fall back to OpenRouter if the premium provider fails."""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||||
|
lambda _path: "[Page 1]\nHello world",
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._get_deepl_settings",
|
||||||
|
lambda: DeepLSettings(
|
||||||
|
api_key="key",
|
||||||
|
base_url="https://api-free.deepl.com/v2/translate",
|
||||||
|
timeout_seconds=90,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._translate_with_retry",
|
||||||
|
lambda action, provider_name: action(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def fail_deepl(*_args, **_kwargs):
|
||||||
|
raise PdfAiError("DeepL unavailable", error_code="DEEPL_SERVER_ERROR")
|
||||||
|
|
||||||
|
monkeypatch.setattr("app.services.pdf_ai_service._call_deepl_translate", fail_deepl)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._call_openrouter_translate",
|
||||||
|
lambda chunk, target_language, source_language=None: {
|
||||||
|
"translation": f"fallback::{chunk}",
|
||||||
|
"provider": "openrouter",
|
||||||
|
"detected_source_language": "en",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
result = translate_pdf("/tmp/demo.pdf", "de", source_language="auto")
|
||||||
|
|
||||||
|
assert result["provider"] == "openrouter"
|
||||||
|
assert result["detected_source_language"] == "en"
|
||||||
|
assert result["translation"].startswith("fallback::")
|
||||||
|
|
||||||
|
|
||||||
|
def test_translate_pdf_rejects_identical_languages(monkeypatch):
|
||||||
|
"""Should reject no-op translation requests."""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.services.pdf_ai_service._extract_text_from_pdf",
|
||||||
|
lambda _path: "[Page 1]\nHello world",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
translate_pdf("/tmp/demo.pdf", "fr", source_language="fr")
|
||||||
|
except PdfAiError as error:
|
||||||
|
assert error.error_code == "PDF_AI_INVALID_INPUT"
|
||||||
|
assert "different source and target languages" in error.user_message
|
||||||
|
else:
|
||||||
|
raise AssertionError("Expected identical language validation to fail")
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -2,31 +2,31 @@
|
|||||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog/how-to-compress-pdf-online</loc>
|
<loc>https://dociva.io/blog/how-to-compress-pdf-online</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog/convert-images-without-losing-quality</loc>
|
<loc>https://dociva.io/blog/convert-images-without-losing-quality</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog/ocr-extract-text-from-images</loc>
|
<loc>https://dociva.io/blog/ocr-extract-text-from-images</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog/merge-split-pdf-files</loc>
|
<loc>https://dociva.io/blog/merge-split-pdf-files</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog/ai-chat-with-pdf-documents</loc>
|
<loc>https://dociva.io/blog/ai-chat-with-pdf-documents</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -2,55 +2,55 @@
|
|||||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/</loc>
|
<loc>https://dociva.io/</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>daily</changefreq>
|
<changefreq>daily</changefreq>
|
||||||
<priority>1.0</priority>
|
<priority>1.0</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools</loc>
|
<loc>https://dociva.io/tools</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/about</loc>
|
<loc>https://dociva.io/about</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.4</priority>
|
<priority>0.4</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/contact</loc>
|
<loc>https://dociva.io/contact</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.4</priority>
|
<priority>0.4</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/privacy</loc>
|
<loc>https://dociva.io/privacy</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>yearly</changefreq>
|
<changefreq>yearly</changefreq>
|
||||||
<priority>0.3</priority>
|
<priority>0.3</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/terms</loc>
|
<loc>https://dociva.io/terms</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>yearly</changefreq>
|
<changefreq>yearly</changefreq>
|
||||||
<priority>0.3</priority>
|
<priority>0.3</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/pricing</loc>
|
<loc>https://dociva.io/pricing</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/blog</loc>
|
<loc>https://dociva.io/blog</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/developers</loc>
|
<loc>https://dociva.io/developers</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>monthly</changefreq>
|
<changefreq>monthly</changefreq>
|
||||||
<priority>0.5</priority>
|
<priority>0.5</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|||||||
@@ -2,265 +2,265 @@
|
|||||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-to-word</loc>
|
<loc>https://dociva.io/tools/pdf-to-word</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.9</priority>
|
<priority>0.9</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/word-to-pdf</loc>
|
<loc>https://dociva.io/tools/word-to-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.9</priority>
|
<priority>0.9</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/compress-pdf</loc>
|
<loc>https://dociva.io/tools/compress-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.9</priority>
|
<priority>0.9</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/merge-pdf</loc>
|
<loc>https://dociva.io/tools/merge-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.9</priority>
|
<priority>0.9</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/split-pdf</loc>
|
<loc>https://dociva.io/tools/split-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/rotate-pdf</loc>
|
<loc>https://dociva.io/tools/rotate-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-to-images</loc>
|
<loc>https://dociva.io/tools/pdf-to-images</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/images-to-pdf</loc>
|
<loc>https://dociva.io/tools/images-to-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/watermark-pdf</loc>
|
<loc>https://dociva.io/tools/watermark-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/protect-pdf</loc>
|
<loc>https://dociva.io/tools/protect-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/unlock-pdf</loc>
|
<loc>https://dociva.io/tools/unlock-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/page-numbers</loc>
|
<loc>https://dociva.io/tools/page-numbers</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-editor</loc>
|
<loc>https://dociva.io/tools/pdf-editor</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-flowchart</loc>
|
<loc>https://dociva.io/tools/pdf-flowchart</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-to-excel</loc>
|
<loc>https://dociva.io/tools/pdf-to-excel</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/remove-watermark-pdf</loc>
|
<loc>https://dociva.io/tools/remove-watermark-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/reorder-pdf</loc>
|
<loc>https://dociva.io/tools/reorder-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/extract-pages</loc>
|
<loc>https://dociva.io/tools/extract-pages</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/image-converter</loc>
|
<loc>https://dociva.io/tools/image-converter</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/image-resize</loc>
|
<loc>https://dociva.io/tools/image-resize</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/compress-image</loc>
|
<loc>https://dociva.io/tools/compress-image</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/ocr</loc>
|
<loc>https://dociva.io/tools/ocr</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/remove-background</loc>
|
<loc>https://dociva.io/tools/remove-background</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/image-to-svg</loc>
|
<loc>https://dociva.io/tools/image-to-svg</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/html-to-pdf</loc>
|
<loc>https://dociva.io/tools/html-to-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/chat-pdf</loc>
|
<loc>https://dociva.io/tools/chat-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/summarize-pdf</loc>
|
<loc>https://dociva.io/tools/summarize-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/translate-pdf</loc>
|
<loc>https://dociva.io/tools/translate-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/extract-tables</loc>
|
<loc>https://dociva.io/tools/extract-tables</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/qr-code</loc>
|
<loc>https://dociva.io/tools/qr-code</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/video-to-gif</loc>
|
<loc>https://dociva.io/tools/video-to-gif</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/word-counter</loc>
|
<loc>https://dociva.io/tools/word-counter</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/text-cleaner</loc>
|
<loc>https://dociva.io/tools/text-cleaner</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-to-pptx</loc>
|
<loc>https://dociva.io/tools/pdf-to-pptx</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/excel-to-pdf</loc>
|
<loc>https://dociva.io/tools/excel-to-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pptx-to-pdf</loc>
|
<loc>https://dociva.io/tools/pptx-to-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/sign-pdf</loc>
|
<loc>https://dociva.io/tools/sign-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.8</priority>
|
<priority>0.8</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/crop-pdf</loc>
|
<loc>https://dociva.io/tools/crop-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/flatten-pdf</loc>
|
<loc>https://dociva.io/tools/flatten-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/repair-pdf</loc>
|
<loc>https://dociva.io/tools/repair-pdf</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/pdf-metadata</loc>
|
<loc>https://dociva.io/tools/pdf-metadata</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.6</priority>
|
<priority>0.6</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/image-crop</loc>
|
<loc>https://dociva.io/tools/image-crop</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/image-rotate-flip</loc>
|
<loc>https://dociva.io/tools/image-rotate-flip</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
<url>
|
<url>
|
||||||
<loc>https://dociva.io/tools/barcode-generator</loc>
|
<loc>https://dociva.io/tools/barcode-generator</loc>
|
||||||
<lastmod>2026-03-29</lastmod>
|
<lastmod>2026-03-30</lastmod>
|
||||||
<changefreq>weekly</changefreq>
|
<changefreq>weekly</changefreq>
|
||||||
<priority>0.7</priority>
|
<priority>0.7</priority>
|
||||||
</url>
|
</url>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import { useState, useEffect } from 'react';
|
import { useState, useEffect } from 'react';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
import { Helmet } from 'react-helmet-async';
|
import { Helmet } from 'react-helmet-async';
|
||||||
import { Languages } from 'lucide-react';
|
import { Languages, ShieldCheck, Sparkles } from 'lucide-react';
|
||||||
import FileUploader from '@/components/shared/FileUploader';
|
import FileUploader from '@/components/shared/FileUploader';
|
||||||
import ProgressBar from '@/components/shared/ProgressBar';
|
import ProgressBar from '@/components/shared/ProgressBar';
|
||||||
import AdSlot from '@/components/layout/AdSlot';
|
import AdSlot from '@/components/layout/AdSlot';
|
||||||
@@ -26,11 +26,22 @@ const LANGUAGES = [
|
|||||||
{ value: 'it', label: 'Italiano' },
|
{ value: 'it', label: 'Italiano' },
|
||||||
];
|
];
|
||||||
|
|
||||||
|
const getLanguageLabel = (value: string) => {
|
||||||
|
if (!value || value === 'auto') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LANGUAGES.find((language) => language.value === value)?.label ?? value;
|
||||||
|
};
|
||||||
|
|
||||||
export default function TranslatePdf() {
|
export default function TranslatePdf() {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload');
|
const [phase, setPhase] = useState<'upload' | 'processing' | 'done'>('upload');
|
||||||
|
const [sourceLang, setSourceLang] = useState('auto');
|
||||||
const [targetLang, setTargetLang] = useState('en');
|
const [targetLang, setTargetLang] = useState('en');
|
||||||
const [translation, setTranslation] = useState('');
|
const [translation, setTranslation] = useState('');
|
||||||
|
const [provider, setProvider] = useState('');
|
||||||
|
const [detectedSourceLanguage, setDetectedSourceLanguage] = useState('');
|
||||||
|
|
||||||
const {
|
const {
|
||||||
file, uploadProgress, isUploading, taskId,
|
file, uploadProgress, isUploading, taskId,
|
||||||
@@ -39,7 +50,7 @@ export default function TranslatePdf() {
|
|||||||
endpoint: '/pdf-ai/translate',
|
endpoint: '/pdf-ai/translate',
|
||||||
maxSizeMB: 20,
|
maxSizeMB: 20,
|
||||||
acceptedTypes: ['pdf'],
|
acceptedTypes: ['pdf'],
|
||||||
extraData: { target_language: targetLang },
|
extraData: { target_language: targetLang, source_language: sourceLang },
|
||||||
});
|
});
|
||||||
|
|
||||||
const { status, result, error: taskError } = useTaskPolling({
|
const { status, result, error: taskError } = useTaskPolling({
|
||||||
@@ -47,6 +58,8 @@ export default function TranslatePdf() {
|
|||||||
onComplete: (r) => {
|
onComplete: (r) => {
|
||||||
setPhase('done');
|
setPhase('done');
|
||||||
setTranslation(r.translation || '');
|
setTranslation(r.translation || '');
|
||||||
|
setProvider(r.provider || '');
|
||||||
|
setDetectedSourceLanguage(r.detected_source_language || '');
|
||||||
dispatchRatingPrompt('translate-pdf');
|
dispatchRatingPrompt('translate-pdf');
|
||||||
},
|
},
|
||||||
onError: () => setPhase('done'),
|
onError: () => setPhase('done'),
|
||||||
@@ -63,7 +76,17 @@ export default function TranslatePdf() {
|
|||||||
if (id) setPhase('processing');
|
if (id) setPhase('processing');
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleReset = () => { reset(); setPhase('upload'); setTargetLang('en'); setTranslation(''); };
|
const handleReset = () => {
|
||||||
|
reset();
|
||||||
|
setPhase('upload');
|
||||||
|
setSourceLang('auto');
|
||||||
|
setTargetLang('en');
|
||||||
|
setTranslation('');
|
||||||
|
setProvider('');
|
||||||
|
setDetectedSourceLanguage('');
|
||||||
|
};
|
||||||
|
|
||||||
|
const resolvedDetectedLanguage = getLanguageLabel(detectedSourceLanguage) || getLanguageLabel(sourceLang);
|
||||||
|
|
||||||
const schema = generateToolSchema({
|
const schema = generateToolSchema({
|
||||||
name: t('tools.translatePdf.title'),
|
name: t('tools.translatePdf.title'),
|
||||||
@@ -103,15 +126,44 @@ export default function TranslatePdf() {
|
|||||||
{file && !isUploading && (
|
{file && !isUploading && (
|
||||||
<>
|
<>
|
||||||
<div className="rounded-2xl bg-white p-5 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
<div className="rounded-2xl bg-white p-5 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||||
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
<div className="mb-4 flex items-start gap-3 rounded-xl bg-slate-50 p-4 dark:bg-slate-900/60">
|
||||||
{t('tools.translatePdf.targetLang')}
|
<ShieldCheck className="mt-0.5 h-5 w-5 text-emerald-600 dark:text-emerald-400" />
|
||||||
</label>
|
<div>
|
||||||
<select value={targetLang} onChange={(e) => setTargetLang(e.target.value)}
|
<p className="text-sm font-semibold text-slate-900 dark:text-slate-100">
|
||||||
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
{t('tools.translatePdf.engineTitle')}
|
||||||
{LANGUAGES.map((lang) => (
|
</p>
|
||||||
<option key={lang.value} value={lang.value}>{lang.label}</option>
|
<p className="mt-1 text-sm text-slate-600 dark:text-slate-400">
|
||||||
))}
|
{t('tools.translatePdf.engineDescription')}
|
||||||
</select>
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="grid gap-4 md:grid-cols-2">
|
||||||
|
<div>
|
||||||
|
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
||||||
|
{t('tools.translatePdf.sourceLang')}
|
||||||
|
</label>
|
||||||
|
<select value={sourceLang} onChange={(e) => setSourceLang(e.target.value)}
|
||||||
|
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
||||||
|
<option value="auto">{t('tools.translatePdf.autoDetect')}</option>
|
||||||
|
{LANGUAGES.map((lang) => (
|
||||||
|
<option key={`source-${lang.value}`} value={lang.value}>{lang.label}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<label className="mb-2 block text-sm font-medium text-slate-700 dark:text-slate-300">
|
||||||
|
{t('tools.translatePdf.targetLang')}
|
||||||
|
</label>
|
||||||
|
<select value={targetLang} onChange={(e) => setTargetLang(e.target.value)}
|
||||||
|
className="w-full rounded-lg border border-slate-300 px-3 py-2 text-sm dark:border-slate-600 dark:bg-slate-700 dark:text-slate-200">
|
||||||
|
{LANGUAGES.map((lang) => (
|
||||||
|
<option key={lang.value} value={lang.value}>{lang.label}</option>
|
||||||
|
))}
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<button onClick={handleUpload} className="btn-primary w-full">
|
<button onClick={handleUpload} className="btn-primary w-full">
|
||||||
{t('tools.translatePdf.shortDesc')}
|
{t('tools.translatePdf.shortDesc')}
|
||||||
@@ -122,11 +174,39 @@ export default function TranslatePdf() {
|
|||||||
)}
|
)}
|
||||||
|
|
||||||
{phase === 'processing' && !result && (
|
{phase === 'processing' && !result && (
|
||||||
<ProgressBar state={status?.state || 'PENDING'} message={status?.progress} />
|
<div className="space-y-4">
|
||||||
|
<ProgressBar state={status?.state || 'PENDING'} message={status?.progress} />
|
||||||
|
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<Sparkles className="mt-0.5 h-5 w-5 text-purple-600 dark:text-purple-400" />
|
||||||
|
<p className="text-sm text-slate-600 dark:text-slate-400">
|
||||||
|
{t('tools.translatePdf.processingHint')}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{phase === 'done' && translation && (
|
{phase === 'done' && translation && (
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
|
<div className="grid gap-3 sm:grid-cols-2">
|
||||||
|
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||||
|
<p className="text-xs font-semibold uppercase tracking-wide text-slate-500 dark:text-slate-400">
|
||||||
|
{t('tools.translatePdf.sourceDetected')}
|
||||||
|
</p>
|
||||||
|
<p className="mt-1 text-sm font-medium text-slate-900 dark:text-slate-100">
|
||||||
|
{resolvedDetectedLanguage || t('tools.translatePdf.autoDetect')}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-xl bg-white p-4 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||||
|
<p className="text-xs font-semibold uppercase tracking-wide text-slate-500 dark:text-slate-400">
|
||||||
|
{t('tools.translatePdf.translationEngine')}
|
||||||
|
</p>
|
||||||
|
<p className="mt-1 text-sm font-medium text-slate-900 dark:text-slate-100">
|
||||||
|
{provider || 'auto'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div className="rounded-2xl bg-white p-6 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
<div className="rounded-2xl bg-white p-6 ring-1 ring-slate-200 dark:bg-slate-800 dark:ring-slate-700">
|
||||||
<h3 className="mb-3 text-sm font-semibold text-slate-700 dark:text-slate-300">
|
<h3 className="mb-3 text-sm font-semibold text-slate-700 dark:text-slate-300">
|
||||||
{t('tools.translatePdf.resultTitle')}
|
{t('tools.translatePdf.resultTitle')}
|
||||||
|
|||||||
@@ -827,9 +827,16 @@
|
|||||||
},
|
},
|
||||||
"translatePdf": {
|
"translatePdf": {
|
||||||
"title": "ترجمة PDF",
|
"title": "ترجمة PDF",
|
||||||
"description": "ترجم محتوى مستند PDF إلى أي لغة باستخدام الذكاء الاصطناعي.",
|
"description": "ترجم ملفات PDF عبر مسار ترجمة احترافي مع fallback تلقائي وتعامل أفضل مع الملفات الممسوحة ضوئياً.",
|
||||||
"shortDesc": "ترجمة PDF",
|
"shortDesc": "ترجمة PDF",
|
||||||
|
"sourceLang": "لغة المصدر",
|
||||||
"targetLang": "اللغة المستهدفة",
|
"targetLang": "اللغة المستهدفة",
|
||||||
|
"autoDetect": "اكتشاف تلقائي",
|
||||||
|
"engineTitle": "ترجمة مستندات بجاهزية إنتاجية",
|
||||||
|
"engineDescription": "يتم إرسال الملف أولاً إلى مزود ترجمة احترافي، ثم يتم التحويل تلقائياً إلى مسار AI فقط عند الحاجة. هذا يقلل مشاكل الضغط ويحسن ثبات النتيجة.",
|
||||||
|
"processingHint": "قد تتم ترجمة المستندات الكبيرة على عدة أجزاء مع retries وfallback بين المزودات. اترك الصفحة مفتوحة حتى يكتمل الطلب.",
|
||||||
|
"sourceDetected": "لغة المصدر المكتشفة",
|
||||||
|
"translationEngine": "محرك الترجمة",
|
||||||
"resultTitle": "الترجمة"
|
"resultTitle": "الترجمة"
|
||||||
},
|
},
|
||||||
"tableExtractor": {
|
"tableExtractor": {
|
||||||
|
|||||||
@@ -827,9 +827,16 @@
|
|||||||
},
|
},
|
||||||
"translatePdf": {
|
"translatePdf": {
|
||||||
"title": "Translate PDF",
|
"title": "Translate PDF",
|
||||||
"description": "Translate your PDF document content to any language using AI.",
|
"description": "Translate PDF documents with a premium translation pipeline, automatic fallback, and better handling for scanned files.",
|
||||||
"shortDesc": "Translate PDF",
|
"shortDesc": "Translate PDF",
|
||||||
|
"sourceLang": "Source Language",
|
||||||
"targetLang": "Target Language",
|
"targetLang": "Target Language",
|
||||||
|
"autoDetect": "Auto detect",
|
||||||
|
"engineTitle": "Production-grade document translation",
|
||||||
|
"engineDescription": "Your file is translated with a premium translation provider first, then automatically falls back to AI only if needed. This reduces high-demand failures and improves consistency.",
|
||||||
|
"processingHint": "Large documents may be translated in multiple chunks with retries and provider fallback. Keep this page open until the job completes.",
|
||||||
|
"sourceDetected": "Detected source",
|
||||||
|
"translationEngine": "Translation engine",
|
||||||
"resultTitle": "Translation"
|
"resultTitle": "Translation"
|
||||||
},
|
},
|
||||||
"tableExtractor": {
|
"tableExtractor": {
|
||||||
|
|||||||
@@ -827,9 +827,16 @@
|
|||||||
},
|
},
|
||||||
"translatePdf": {
|
"translatePdf": {
|
||||||
"title": "Traduire un PDF",
|
"title": "Traduire un PDF",
|
||||||
"description": "Traduisez le contenu de votre document PDF dans n'importe quelle langue grâce à l'IA.",
|
"description": "Traduisez vos PDF avec un pipeline premium, un fallback automatique et une meilleure prise en charge des fichiers scannés.",
|
||||||
"shortDesc": "Traduire le PDF",
|
"shortDesc": "Traduire le PDF",
|
||||||
|
"sourceLang": "Langue source",
|
||||||
"targetLang": "Langue cible",
|
"targetLang": "Langue cible",
|
||||||
|
"autoDetect": "Détection automatique",
|
||||||
|
"engineTitle": "Traduction documentaire de niveau production",
|
||||||
|
"engineDescription": "Votre fichier passe d'abord par un fournisseur de traduction premium, puis bascule automatiquement vers l'IA seulement si nécessaire. Cela réduit les erreurs de forte demande et améliore la stabilité.",
|
||||||
|
"processingHint": "Les documents volumineux peuvent être traduits en plusieurs segments avec retries et fallback entre fournisseurs. Laissez cette page ouverte jusqu'à la fin du traitement.",
|
||||||
|
"sourceDetected": "Source détectée",
|
||||||
|
"translationEngine": "Moteur de traduction",
|
||||||
"resultTitle": "Traduction"
|
"resultTitle": "Traduction"
|
||||||
},
|
},
|
||||||
"tableExtractor": {
|
"tableExtractor": {
|
||||||
|
|||||||
@@ -237,6 +237,10 @@ export interface TaskResult {
|
|||||||
summary?: string;
|
summary?: string;
|
||||||
translation?: string;
|
translation?: string;
|
||||||
target_language?: string;
|
target_language?: string;
|
||||||
|
source_language?: string;
|
||||||
|
detected_source_language?: string;
|
||||||
|
provider?: string;
|
||||||
|
chunks_translated?: number;
|
||||||
pages_analyzed?: number;
|
pages_analyzed?: number;
|
||||||
// Table extraction fields
|
// Table extraction fields
|
||||||
tables?: Array<{ page: number; table_index: number; headers: string[]; rows: string[][] }>;
|
tables?: Array<{ page: number; table_index: number; headers: string[]; rows: string[][] }>;
|
||||||
|
|||||||
Reference in New Issue
Block a user