تم الانتهاء من آخر دفعة تحسينات على المشروع، وتشمل:
تحويل لوحة الإدارة الداخلية من secret header إلى session auth حقيقي مع صلاحيات admin. إضافة دعم إدارة الأدوار من داخل لوحة الإدارة نفسها، مع حماية الحسابات المعتمدة عبر INTERNAL_ADMIN_EMAILS. تحسين بيانات المستخدم في الواجهة والباكند لتشمل role وis_allowlisted_admin. إضافة اختبار frontend مخصص لصفحة /internal/admin بدل الاعتماد فقط على build واختبار routes. تحسين إضافي في الأداء عبر إزالة الاعتماد على pdfjs-dist/pdf.worker في عدّ صفحات PDF واستبداله بمسار أخف باستخدام pdf-lib. تحسين تقسيم الـ chunks في build لتقليل أثر الحزم الكبيرة وفصل أجزاء مثل network, icons, pdf-core, وeditor. التحقق الذي تم: نجاح build للواجهة. نجاح اختبار صفحة الإدارة الداخلية في frontend. نجاح اختبارات auth/admin في backend. نجاح full backend suite مسبقًا مع EXIT:0. ولو تريد نسخة أقصر جدًا، استخدم هذه: آخر التحديثات: تم تحسين نظام الإدارة الداخلية ليعتمد على صلاحيات وجلسات حقيقية بدل secret header، مع إضافة إدارة أدوار من لوحة admin نفسها، وإضافة اختبارات frontend مخصصة للوحة، وتحسين أداء الواجهة عبر إزالة pdf.worker وتحسين تقسيم الـ chunks في build. جميع الاختبارات والتحققات الأساسية المطلوبة نجح
This commit is contained in:
316
backend/app/services/pdf_extra_service.py
Normal file
316
backend/app/services/pdf_extra_service.py
Normal file
@@ -0,0 +1,316 @@
|
||||
"""Extended PDF tools — Crop, Flatten, Repair, Metadata Editor."""
|
||||
import os
|
||||
import io
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFExtraError(Exception):
|
||||
"""Custom exception for extended PDF tool failures."""
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Crop PDF
|
||||
# ---------------------------------------------------------------------------
|
||||
def crop_pdf(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
margin_left: float = 0,
|
||||
margin_right: float = 0,
|
||||
margin_top: float = 0,
|
||||
margin_bottom: float = 0,
|
||||
pages: str = "all",
|
||||
) -> dict:
|
||||
"""Crop margins from PDF pages.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF
|
||||
output_path: Path for the cropped output
|
||||
margin_left/right/top/bottom: Points to crop from each side
|
||||
pages: "all" or comma-separated page numbers (1-based)
|
||||
|
||||
Returns:
|
||||
dict with total_pages and output_size
|
||||
|
||||
Raises:
|
||||
PDFExtraError: If cropping fails
|
||||
"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader(input_path)
|
||||
writer = PdfWriter()
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
if total_pages == 0:
|
||||
raise PDFExtraError("PDF has no pages.")
|
||||
|
||||
target_indices = _parse_pages(pages, total_pages)
|
||||
|
||||
for i, page in enumerate(reader.pages):
|
||||
if i in target_indices:
|
||||
box = page.mediabox
|
||||
box.lower_left = (
|
||||
float(box.lower_left[0]) + margin_left,
|
||||
float(box.lower_left[1]) + margin_bottom,
|
||||
)
|
||||
box.upper_right = (
|
||||
float(box.upper_right[0]) - margin_right,
|
||||
float(box.upper_right[1]) - margin_top,
|
||||
)
|
||||
page.mediabox = box
|
||||
page.cropbox = box
|
||||
writer.add_page(page)
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
logger.info(f"Crop PDF: {len(target_indices)} pages cropped ({output_size} bytes)")
|
||||
return {
|
||||
"total_pages": total_pages,
|
||||
"cropped_pages": len(target_indices),
|
||||
"output_size": output_size,
|
||||
}
|
||||
|
||||
except PDFExtraError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise PDFExtraError(f"Failed to crop PDF: {str(e)}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Flatten PDF (remove interactive form fields, annotations)
|
||||
# ---------------------------------------------------------------------------
|
||||
def flatten_pdf(input_path: str, output_path: str) -> dict:
|
||||
"""Flatten a PDF — burn form fields and annotations into static content.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF
|
||||
output_path: Path for the flattened output
|
||||
|
||||
Returns:
|
||||
dict with total_pages and output_size
|
||||
|
||||
Raises:
|
||||
PDFExtraError: If flatten fails
|
||||
"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader(input_path)
|
||||
writer = PdfWriter()
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
if total_pages == 0:
|
||||
raise PDFExtraError("PDF has no pages.")
|
||||
|
||||
for page in reader.pages:
|
||||
# Remove annotations to flatten
|
||||
if "/Annots" in page:
|
||||
del page["/Annots"]
|
||||
writer.add_page(page)
|
||||
|
||||
# Remove AcroForm (interactive forms) at document level
|
||||
if "/AcroForm" in writer._root_object:
|
||||
del writer._root_object["/AcroForm"]
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
logger.info(f"Flatten PDF: {total_pages} pages ({output_size} bytes)")
|
||||
return {"total_pages": total_pages, "output_size": output_size}
|
||||
|
||||
except PDFExtraError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise PDFExtraError(f"Failed to flatten PDF: {str(e)}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Repair PDF
|
||||
# ---------------------------------------------------------------------------
|
||||
def repair_pdf(input_path: str, output_path: str) -> dict:
|
||||
"""Attempt to repair a damaged PDF by re-writing it.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF
|
||||
output_path: Path for the repaired output
|
||||
|
||||
Returns:
|
||||
dict with total_pages, output_size, and repaired flag
|
||||
|
||||
Raises:
|
||||
PDFExtraError: If repair fails
|
||||
"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
from PyPDF2.errors import PdfReadError
|
||||
|
||||
try:
|
||||
reader = PdfReader(input_path, strict=False)
|
||||
except PdfReadError as e:
|
||||
raise PDFExtraError(f"Cannot read PDF — file may be severely corrupted: {str(e)}")
|
||||
|
||||
writer = PdfWriter()
|
||||
total_pages = len(reader.pages)
|
||||
|
||||
if total_pages == 0:
|
||||
raise PDFExtraError("PDF has no recoverable pages.")
|
||||
|
||||
recovered = 0
|
||||
for i, page in enumerate(reader.pages):
|
||||
try:
|
||||
writer.add_page(page)
|
||||
recovered += 1
|
||||
except Exception:
|
||||
logger.warning(f"Repair: skipped unrecoverable page {i + 1}")
|
||||
|
||||
if recovered == 0:
|
||||
raise PDFExtraError("No pages could be recovered from the PDF.")
|
||||
|
||||
# Copy metadata if available
|
||||
try:
|
||||
if reader.metadata:
|
||||
writer.add_metadata(reader.metadata)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
logger.info(f"Repair PDF: {recovered}/{total_pages} pages recovered ({output_size} bytes)")
|
||||
return {
|
||||
"total_pages": total_pages,
|
||||
"recovered_pages": recovered,
|
||||
"output_size": output_size,
|
||||
"repaired": True,
|
||||
}
|
||||
|
||||
except PDFExtraError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise PDFExtraError(f"Failed to repair PDF: {str(e)}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PDF Metadata Editor
|
||||
# ---------------------------------------------------------------------------
|
||||
def edit_pdf_metadata(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
title: str | None = None,
|
||||
author: str | None = None,
|
||||
subject: str | None = None,
|
||||
keywords: str | None = None,
|
||||
creator: str | None = None,
|
||||
) -> dict:
|
||||
"""Edit PDF metadata fields.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF
|
||||
output_path: Path for the output PDF
|
||||
title/author/subject/keywords/creator: New metadata values (None = keep existing)
|
||||
|
||||
Returns:
|
||||
dict with updated metadata and output_size
|
||||
|
||||
Raises:
|
||||
PDFExtraError: If metadata edit fails
|
||||
"""
|
||||
try:
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader(input_path)
|
||||
writer = PdfWriter()
|
||||
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
# Build metadata dict
|
||||
metadata = {}
|
||||
if title is not None:
|
||||
metadata["/Title"] = title
|
||||
if author is not None:
|
||||
metadata["/Author"] = author
|
||||
if subject is not None:
|
||||
metadata["/Subject"] = subject
|
||||
if keywords is not None:
|
||||
metadata["/Keywords"] = keywords
|
||||
if creator is not None:
|
||||
metadata["/Creator"] = creator
|
||||
|
||||
if not metadata:
|
||||
raise PDFExtraError("At least one metadata field must be provided.")
|
||||
|
||||
writer.add_metadata(metadata)
|
||||
|
||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||
with open(output_path, "wb") as f:
|
||||
writer.write(f)
|
||||
|
||||
output_size = os.path.getsize(output_path)
|
||||
|
||||
# Read back to confirm
|
||||
current_meta = {}
|
||||
try:
|
||||
r2 = PdfReader(output_path)
|
||||
if r2.metadata:
|
||||
current_meta = {
|
||||
"title": r2.metadata.get("/Title", ""),
|
||||
"author": r2.metadata.get("/Author", ""),
|
||||
"subject": r2.metadata.get("/Subject", ""),
|
||||
"keywords": r2.metadata.get("/Keywords", ""),
|
||||
"creator": r2.metadata.get("/Creator", ""),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"Edit metadata: updated {len(metadata)} fields ({output_size} bytes)")
|
||||
return {
|
||||
"total_pages": len(reader.pages),
|
||||
"output_size": output_size,
|
||||
"metadata": current_meta,
|
||||
}
|
||||
|
||||
except PDFExtraError:
|
||||
raise
|
||||
except Exception as e:
|
||||
raise PDFExtraError(f"Failed to edit PDF metadata: {str(e)}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
def _parse_pages(pages_spec: str, total_pages: int) -> set[int]:
|
||||
"""Parse page specification to set of 0-based indices."""
|
||||
if pages_spec.strip().lower() == "all":
|
||||
return set(range(total_pages))
|
||||
|
||||
indices = set()
|
||||
for part in pages_spec.split(","):
|
||||
part = part.strip()
|
||||
if "-" in part:
|
||||
try:
|
||||
start, end = part.split("-", 1)
|
||||
start = max(1, int(start))
|
||||
end = min(total_pages, int(end))
|
||||
for p in range(start, end + 1):
|
||||
indices.add(p - 1)
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
p = int(part)
|
||||
if 1 <= p <= total_pages:
|
||||
indices.add(p - 1)
|
||||
except ValueError:
|
||||
continue
|
||||
return indices
|
||||
Reference in New Issue
Block a user