feat: Initialize frontend with React, Vite, and Tailwind CSS
- Set up main entry point for React application. - Create About, Home, NotFound, Privacy, and Terms pages with SEO support. - Implement API service for file uploads and task management. - Add global styles using Tailwind CSS. - Create utility functions for SEO and text processing. - Configure Vite for development and production builds. - Set up Nginx configuration for serving frontend and backend. - Add scripts for cleanup of expired files and sitemap generation. - Implement deployment script for production environment.
This commit is contained in:
170
backend/app/services/pdf_service.py
Normal file
170
backend/app/services/pdf_service.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""PDF conversion service using LibreOffice headless."""
|
||||
import os
|
||||
import subprocess
|
||||
import logging
|
||||
import tempfile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFConversionError(Exception):
|
||||
"""Custom exception for PDF conversion failures."""
|
||||
pass
|
||||
|
||||
|
||||
def pdf_to_word(input_path: str, output_dir: str) -> str:
|
||||
"""
|
||||
Convert a PDF file to Word (DOCX) format using LibreOffice headless.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input PDF file
|
||||
output_dir: Directory for the output file
|
||||
|
||||
Returns:
|
||||
Path to the converted DOCX file
|
||||
|
||||
Raises:
|
||||
PDFConversionError: If conversion fails
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Use a unique user profile per process to avoid lock conflicts
|
||||
user_install_dir = tempfile.mkdtemp(prefix="lo_pdf2word_")
|
||||
|
||||
cmd = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--norestore",
|
||||
f"-env:UserInstallation=file://{user_install_dir}",
|
||||
"--infilter=writer_pdf_import",
|
||||
"--convert-to", "docx",
|
||||
"--outdir", output_dir,
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
logger.info(f"Running LibreOffice PDF→Word: {' '.join(cmd)}")
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120, # 2 minute timeout
|
||||
env={**os.environ, "HOME": user_install_dir},
|
||||
)
|
||||
|
||||
logger.info(f"LibreOffice stdout: {result.stdout}")
|
||||
logger.info(f"LibreOffice stderr: {result.stderr}")
|
||||
logger.info(f"LibreOffice returncode: {result.returncode}")
|
||||
|
||||
# LibreOffice names output based on input filename
|
||||
input_basename = os.path.splitext(os.path.basename(input_path))[0]
|
||||
output_path = os.path.join(output_dir, f"{input_basename}.docx")
|
||||
|
||||
# Check output file first — LibreOffice may return non-zero
|
||||
# due to harmless warnings (e.g. javaldx) even on success
|
||||
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
logger.info(f"PDF→Word conversion successful: {output_path}")
|
||||
return output_path
|
||||
|
||||
# No output file — now treat as real error
|
||||
if result.returncode != 0:
|
||||
# Filter out known harmless warnings
|
||||
stderr = result.stderr or ""
|
||||
real_errors = [
|
||||
line for line in stderr.strip().splitlines()
|
||||
if not line.startswith("Warning: failed to launch javaldx")
|
||||
]
|
||||
error_msg = "\n".join(real_errors) if real_errors else stderr
|
||||
logger.error(f"LibreOffice PDF→Word failed: {error_msg}")
|
||||
raise PDFConversionError(
|
||||
f"Conversion failed: {error_msg or 'Unknown error'}"
|
||||
)
|
||||
|
||||
# Return code 0 but no output file
|
||||
files_in_dir = os.listdir(output_dir) if os.path.exists(output_dir) else []
|
||||
logger.error(
|
||||
f"Expected output not found at {output_path}. "
|
||||
f"Files in output dir: {files_in_dir}"
|
||||
)
|
||||
raise PDFConversionError("Output file was not created.")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise PDFConversionError("Conversion timed out. File may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise PDFConversionError("LibreOffice is not installed on the server.")
|
||||
finally:
|
||||
# Cleanup temporary user profile
|
||||
import shutil
|
||||
shutil.rmtree(user_install_dir, ignore_errors=True)
|
||||
|
||||
|
||||
def word_to_pdf(input_path: str, output_dir: str) -> str:
|
||||
"""
|
||||
Convert a Word (DOC/DOCX) file to PDF format using LibreOffice headless.
|
||||
|
||||
Args:
|
||||
input_path: Path to the input Word file
|
||||
output_dir: Directory for the output file
|
||||
|
||||
Returns:
|
||||
Path to the converted PDF file
|
||||
|
||||
Raises:
|
||||
PDFConversionError: If conversion fails
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Use a unique user profile per process to avoid lock conflicts
|
||||
user_install_dir = tempfile.mkdtemp(prefix="lo_word2pdf_")
|
||||
|
||||
cmd = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--norestore",
|
||||
f"-env:UserInstallation=file://{user_install_dir}",
|
||||
"--convert-to", "pdf",
|
||||
"--outdir", output_dir,
|
||||
input_path,
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
env={**os.environ, "HOME": user_install_dir},
|
||||
)
|
||||
|
||||
input_basename = os.path.splitext(os.path.basename(input_path))[0]
|
||||
output_path = os.path.join(output_dir, f"{input_basename}.pdf")
|
||||
|
||||
# Check output file first — LibreOffice may return non-zero
|
||||
# due to harmless warnings (e.g. javaldx) even on success
|
||||
if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
|
||||
logger.info(f"Word→PDF conversion successful: {output_path}")
|
||||
return output_path
|
||||
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr or ""
|
||||
real_errors = [
|
||||
line for line in stderr.strip().splitlines()
|
||||
if not line.startswith("Warning: failed to launch javaldx")
|
||||
]
|
||||
error_msg = "\n".join(real_errors) if real_errors else stderr
|
||||
logger.error(f"LibreOffice Word→PDF failed: {error_msg}")
|
||||
raise PDFConversionError(
|
||||
f"Conversion failed: {error_msg or 'Unknown error'}"
|
||||
)
|
||||
|
||||
raise PDFConversionError("Output file was not created.")
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise PDFConversionError("Conversion timed out. File may be too large.")
|
||||
except FileNotFoundError:
|
||||
raise PDFConversionError("LibreOffice is not installed on the server.")
|
||||
finally:
|
||||
# Cleanup temporary user profile
|
||||
import shutil
|
||||
shutil.rmtree(user_install_dir, ignore_errors=True)
|
||||
Reference in New Issue
Block a user