From 0a0c069a58da3c13d941df56cec06e13b871775b Mon Sep 17 00:00:00 2001 From: Your Name <119736744+aborayan2022@users.noreply.github.com> Date: Sun, 8 Mar 2026 22:51:50 +0200 Subject: [PATCH] =?UTF-8?q?=D9=85=D9=8A=D8=B2=D9=87:=20=D8=A5=D8=B6=D8=A7?= =?UTF-8?q?=D9=81=D8=A9=20=D9=85=D9=8A=D8=B2=D8=A7=D8=AA=20=D8=AC=D8=AF?= =?UTF-8?q?=D9=8A=D8=AF=D8=A9=20=D9=84=D8=AA=D8=AD=D8=B1=D9=8A=D8=B1=20PDF?= =?UTF-8?q?=D8=8C=20OCR=D8=8C=20=D9=88=D8=A5=D8=B2=D8=A7=D9=84=D8=A9=20?= =?UTF-8?q?=D8=A7=D9=84=D8=AE=D9=84=D9=81=D9=8A=D8=A9=20=D9=85=D8=B9=20?= =?UTF-8?q?=D8=AA=D9=81=D8=B9=D9=8A=D9=84=20=D8=AE=D9=8A=D8=A7=D8=B1=D8=A7?= =?UTF-8?q?=D8=AA=20=D9=81=D9=8A=20=D9=85=D9=84=D9=81=20=D8=A7=D9=84=D8=A8?= =?UTF-8?q?=D9=8A=D8=A6=D8=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 5 ++ backend/Dockerfile | 6 ++ backend/app/routes/ocr.py | 4 +- backend/app/routes/removebg.py | 2 +- backend/app/services/pdf_ai_service.py | 66 +++++++++++------- backend/celerybeat-schedule | Bin 0 -> 16384 bytes backend/config/__init__.py | 8 ++- backend/tests/test_ocr.py | 14 ++-- backend/tests/test_removebg.py | 6 +- frontend/src/App.tsx | 3 + .../src/components/shared/ErrorBoundary.tsx | 48 +++++++++++++ .../src/components/tools/QrCodeGenerator.tsx | 2 +- .../src/components/tools/RemoveBackground.tsx | 11 +++ frontend/src/i18n/ar.json | 43 ++++++++++-- frontend/src/i18n/en.json | 43 ++++++++++-- frontend/src/i18n/fr.json | 43 ++++++++++-- 16 files changed, 242 insertions(+), 62 deletions(-) create mode 100644 backend/celerybeat-schedule create mode 100644 frontend/src/components/shared/ErrorBoundary.tsx diff --git a/.env.example b/.env.example index 59c2a5d..9ed3769 100644 --- a/.env.example +++ b/.env.example @@ -33,3 +33,8 @@ VITE_ADSENSE_SLOT_HOME_TOP=1234567890 VITE_ADSENSE_SLOT_HOME_BOTTOM=1234567891 VITE_ADSENSE_SLOT_TOP_BANNER=1234567892 VITE_ADSENSE_SLOT_BOTTOM_BANNER=1234567893 + +# Feature Flags (set to "false" to disable a specific tool) +FEATURE_EDITOR=true +FEATURE_OCR=true +FEATURE_REMOVEBG=true diff --git a/backend/Dockerfile b/backend/Dockerfile index 6d2ae7d..b44a8f4 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -17,10 +17,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ tesseract-ocr-eng \ tesseract-ocr-ara \ tesseract-ocr-fra \ + poppler-utils \ + default-jre-headless \ curl \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +# Ensure Java is on PATH for tabula-py (extract-tables, pdf-to-excel) +ENV JAVA_HOME=/usr/lib/jvm/default-java +ENV PATH="${JAVA_HOME}/bin:${PATH}" + # Set working directory WORKDIR /app diff --git a/backend/app/routes/ocr.py b/backend/app/routes/ocr.py index 717dbac..7162322 100644 --- a/backend/app/routes/ocr.py +++ b/backend/app/routes/ocr.py @@ -22,8 +22,8 @@ ALLOWED_OCR_TYPES = ALLOWED_IMAGE_TYPES + ["pdf"] def _check_feature_flag(): - """Return an error response if FEATURE_EDITOR is disabled.""" - if not current_app.config.get("FEATURE_EDITOR", False): + """Return an error response if FEATURE_OCR is disabled.""" + if not current_app.config.get("FEATURE_OCR", True): return jsonify({"error": "This feature is not enabled."}), 403 return None diff --git a/backend/app/routes/removebg.py b/backend/app/routes/removebg.py index 48d181b..49634b8 100644 --- a/backend/app/routes/removebg.py +++ b/backend/app/routes/removebg.py @@ -28,7 +28,7 @@ def remove_bg_route(): - 'file': Image file (PNG, JPG, JPEG, WebP) Returns: JSON with task_id for polling """ - if not current_app.config.get("FEATURE_EDITOR", False): + if not current_app.config.get("FEATURE_REMOVEBG", True): return jsonify({"error": "This feature is not enabled."}), 403 if "file" not in request.files: diff --git a/backend/app/services/pdf_ai_service.py b/backend/app/services/pdf_ai_service.py index 742ed93..ffd8975 100644 --- a/backend/app/services/pdf_ai_service.py +++ b/backend/app/services/pdf_ai_service.py @@ -8,7 +8,7 @@ import requests logger = logging.getLogger(__name__) # Configuration -OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "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") OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3-8b-instruct") OPENROUTER_BASE_URL = os.getenv( "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions" @@ -219,38 +219,50 @@ def extract_tables(input_path: str) -> dict: {"tables": [...], "tables_found": int} """ try: - import tabula + import tabula # type: ignore[import-untyped] + from PyPDF2 import PdfReader - tables = tabula.read_pdf( - input_path, pages="all", multiple_tables=True, silent=True - ) + # Get total page count + reader = PdfReader(input_path) + total_pages = len(reader.pages) - if not tables: + result_tables = [] + table_index = 0 + + for page_num in range(1, total_pages + 1): + page_tables = tabula.read_pdf( + input_path, pages=str(page_num), multiple_tables=True, silent=True + ) + if not page_tables: + continue + for df in page_tables: + if df.empty: + continue + headers = [str(c) for c in df.columns] + rows = [] + for _, row in df.iterrows(): + cells = [] + for col in df.columns: + val = row[col] + if isinstance(val, float) and str(val) == "nan": + cells.append("") + else: + cells.append(str(val)) + rows.append(cells) + + result_tables.append({ + "page": page_num, + "table_index": table_index, + "headers": headers, + "rows": rows, + }) + table_index += 1 + + if not result_tables: raise PdfAiError( "No tables found in the PDF. This tool works best with PDFs containing tabular data." ) - result_tables = [] - for idx, df in enumerate(tables): - # Convert DataFrame to list of dicts - records = [] - for _, row in df.iterrows(): - record = {} - for col in df.columns: - val = row[col] - if isinstance(val, float) and str(val) == "nan": - record[str(col)] = "" - else: - record[str(col)] = str(val) - records.append(record) - - result_tables.append({ - "index": idx + 1, - "columns": [str(c) for c in df.columns], - "rows": len(records), - "data": records, - }) - logger.info(f"Extracted {len(result_tables)} tables from PDF") return { diff --git a/backend/celerybeat-schedule b/backend/celerybeat-schedule new file mode 100644 index 0000000000000000000000000000000000000000..51e74e35b113a4bf064b27f09a1e6757410bc7b2 GIT binary patch literal 16384 zcmeI(O>5LZ7zglKH?mv%VzCtjwI~ScN@H75JO~zw2U!r*ir~SJWT$RuIvbLS*oB2X ziOSqMz4!q8)d)}u#< ze52)eN21-TzyS_}_Xp;~?#zexZ$F+6f`@#><-1`z?IacW?~Xs5;cNxq0Ea;VTR(dL z)l@0t;Oanr#`EN+6Y{6EfW!72!lz#*xRCj?S$lkqLX*A&!gG6`Z(c4QkzK~lxqP*c zx;aGUU-LAddBTgn*VDdv{>_V|BZ8pst#AAbfJUtLU-U2ebJ~|(YBlr1EUJ2EaY6eg zC36u*DR1??C;rdg)*J5kgPvB0o#!R2CmzBE0kO&D_pD1>Y83S zQ2Si1dt(R3E`Op|qB2QkB4`-32V90y=5$I`?{)-F#8QW<_o#Z^RH=O^C`FdU%PH57 z^(}2LzF9GKQ<_YTyNfB@RhW+Bydz{7nQKvA
+ }> {/* Pages */} @@ -140,6 +142,7 @@ export default function App() { } /> +