diff --git a/.env.example b/.env.example index 59c2a5d..9ed3769 100644 --- a/.env.example +++ b/.env.example @@ -33,3 +33,8 @@ VITE_ADSENSE_SLOT_HOME_TOP=1234567890 VITE_ADSENSE_SLOT_HOME_BOTTOM=1234567891 VITE_ADSENSE_SLOT_TOP_BANNER=1234567892 VITE_ADSENSE_SLOT_BOTTOM_BANNER=1234567893 + +# Feature Flags (set to "false" to disable a specific tool) +FEATURE_EDITOR=true +FEATURE_OCR=true +FEATURE_REMOVEBG=true diff --git a/backend/Dockerfile b/backend/Dockerfile index 6d2ae7d..b44a8f4 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -17,10 +17,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ tesseract-ocr-eng \ tesseract-ocr-ara \ tesseract-ocr-fra \ + poppler-utils \ + default-jre-headless \ curl \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* +# Ensure Java is on PATH for tabula-py (extract-tables, pdf-to-excel) +ENV JAVA_HOME=/usr/lib/jvm/default-java +ENV PATH="${JAVA_HOME}/bin:${PATH}" + # Set working directory WORKDIR /app diff --git a/backend/app/routes/ocr.py b/backend/app/routes/ocr.py index 717dbac..7162322 100644 --- a/backend/app/routes/ocr.py +++ b/backend/app/routes/ocr.py @@ -22,8 +22,8 @@ ALLOWED_OCR_TYPES = ALLOWED_IMAGE_TYPES + ["pdf"] def _check_feature_flag(): - """Return an error response if FEATURE_EDITOR is disabled.""" - if not current_app.config.get("FEATURE_EDITOR", False): + """Return an error response if FEATURE_OCR is disabled.""" + if not current_app.config.get("FEATURE_OCR", True): return jsonify({"error": "This feature is not enabled."}), 403 return None diff --git a/backend/app/routes/removebg.py b/backend/app/routes/removebg.py index 48d181b..49634b8 100644 --- a/backend/app/routes/removebg.py +++ b/backend/app/routes/removebg.py @@ -28,7 +28,7 @@ def remove_bg_route(): - 'file': Image file (PNG, JPG, JPEG, WebP) Returns: JSON with task_id for polling """ - if not current_app.config.get("FEATURE_EDITOR", False): + if not current_app.config.get("FEATURE_REMOVEBG", True): return jsonify({"error": "This feature is not enabled."}), 403 if "file" not in request.files: diff --git a/backend/app/services/pdf_ai_service.py b/backend/app/services/pdf_ai_service.py index 742ed93..ffd8975 100644 --- a/backend/app/services/pdf_ai_service.py +++ b/backend/app/services/pdf_ai_service.py @@ -8,7 +8,7 @@ import requests logger = logging.getLogger(__name__) # Configuration -OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") +OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "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") OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3-8b-instruct") OPENROUTER_BASE_URL = os.getenv( "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions" @@ -219,38 +219,50 @@ def extract_tables(input_path: str) -> dict: {"tables": [...], "tables_found": int} """ try: - import tabula + import tabula # type: ignore[import-untyped] + from PyPDF2 import PdfReader - tables = tabula.read_pdf( - input_path, pages="all", multiple_tables=True, silent=True - ) + # Get total page count + reader = PdfReader(input_path) + total_pages = len(reader.pages) - if not tables: + result_tables = [] + table_index = 0 + + for page_num in range(1, total_pages + 1): + page_tables = tabula.read_pdf( + input_path, pages=str(page_num), multiple_tables=True, silent=True + ) + if not page_tables: + continue + for df in page_tables: + if df.empty: + continue + headers = [str(c) for c in df.columns] + rows = [] + for _, row in df.iterrows(): + cells = [] + for col in df.columns: + val = row[col] + if isinstance(val, float) and str(val) == "nan": + cells.append("") + else: + cells.append(str(val)) + rows.append(cells) + + result_tables.append({ + "page": page_num, + "table_index": table_index, + "headers": headers, + "rows": rows, + }) + table_index += 1 + + if not result_tables: raise PdfAiError( "No tables found in the PDF. This tool works best with PDFs containing tabular data." ) - result_tables = [] - for idx, df in enumerate(tables): - # Convert DataFrame to list of dicts - records = [] - for _, row in df.iterrows(): - record = {} - for col in df.columns: - val = row[col] - if isinstance(val, float) and str(val) == "nan": - record[str(col)] = "" - else: - record[str(col)] = str(val) - records.append(record) - - result_tables.append({ - "index": idx + 1, - "columns": [str(c) for c in df.columns], - "rows": len(records), - "data": records, - }) - logger.info(f"Extracted {len(result_tables)} tables from PDF") return { diff --git a/backend/celerybeat-schedule b/backend/celerybeat-schedule new file mode 100644 index 0000000..51e74e3 Binary files /dev/null and b/backend/celerybeat-schedule differ diff --git a/backend/config/__init__.py b/backend/config/__init__.py index 023a961..bbd1de7 100644 --- a/backend/config/__init__.py +++ b/backend/config/__init__.py @@ -80,7 +80,7 @@ class BaseConfig: RATELIMIT_DEFAULT = "100/hour" # OpenRouter AI - OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") + OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "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") OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3-8b-instruct") OPENROUTER_BASE_URL = os.getenv( "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions" @@ -95,8 +95,10 @@ class BaseConfig: SMTP_USE_TLS = os.getenv("SMTP_USE_TLS", "true").lower() == "true" FRONTEND_URL = os.getenv("FRONTEND_URL", "http://localhost:5173") - # Feature flags - FEATURE_EDITOR = os.getenv("FEATURE_EDITOR", "false").lower() == "true" + # Feature flags (default: enabled — set to "false" to disable a feature) + FEATURE_EDITOR = os.getenv("FEATURE_EDITOR", "true").lower() == "true" + FEATURE_OCR = os.getenv("FEATURE_OCR", "true").lower() == "true" + FEATURE_REMOVEBG = os.getenv("FEATURE_REMOVEBG", "true").lower() == "true" class DevelopmentConfig(BaseConfig): diff --git a/backend/tests/test_ocr.py b/backend/tests/test_ocr.py index 4069bde..0b81b30 100644 --- a/backend/tests/test_ocr.py +++ b/backend/tests/test_ocr.py @@ -13,7 +13,7 @@ from tests.conftest import make_png_bytes, make_pdf_bytes # ========================================================================= class TestOcrFeatureFlag: def test_ocr_image_disabled_by_default(self, client): - """OCR image should return 403 when FEATURE_EDITOR is off.""" + """OCR image should return 403 when FEATURE_OCR is off.""" data = {"file": (io.BytesIO(make_png_bytes()), "test.png")} response = client.post( "/api/ocr/image", @@ -24,7 +24,7 @@ class TestOcrFeatureFlag: assert "not enabled" in response.get_json()["error"] def test_ocr_pdf_disabled_by_default(self, client): - """OCR PDF should return 403 when FEATURE_EDITOR is off.""" + """OCR PDF should return 403 when FEATURE_OCR is off.""" data = {"file": (io.BytesIO(make_pdf_bytes()), "scan.pdf")} response = client.post( "/api/ocr/pdf", @@ -50,14 +50,14 @@ class TestOcrFeatureFlag: class TestOcrValidation: def test_ocr_image_no_file(self, client, app): """Should return 400 when no file provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_OCR"] = True response = client.post("/api/ocr/image") assert response.status_code == 400 assert "No file" in response.get_json()["error"] def test_ocr_pdf_no_file(self, client, app): """Should return 400 when no file provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_OCR"] = True response = client.post("/api/ocr/pdf") assert response.status_code == 400 assert "No file" in response.get_json()["error"] @@ -69,7 +69,7 @@ class TestOcrValidation: class TestOcrSuccess: def test_ocr_image_success(self, client, app, monkeypatch): """Should return 202 with task_id when valid image provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_OCR"] = True mock_task = MagicMock() mock_task.id = "ocr-img-task-1" @@ -101,7 +101,7 @@ class TestOcrSuccess: def test_ocr_pdf_success(self, client, app, monkeypatch): """Should return 202 with task_id when valid PDF provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_OCR"] = True mock_task = MagicMock() mock_task.id = "ocr-pdf-task-1" @@ -133,7 +133,7 @@ class TestOcrSuccess: def test_ocr_image_invalid_lang_falls_back(self, client, app, monkeypatch): """Invalid lang should fall back to 'eng' without error.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_OCR"] = True mock_task = MagicMock() mock_task.id = "ocr-lang-task" diff --git a/backend/tests/test_removebg.py b/backend/tests/test_removebg.py index 7892bdf..82fc830 100644 --- a/backend/tests/test_removebg.py +++ b/backend/tests/test_removebg.py @@ -12,7 +12,7 @@ from tests.conftest import make_png_bytes, make_pdf_bytes # ========================================================================= class TestRemoveBgFeatureFlag: def test_removebg_disabled_by_default(self, client): - """Should return 403 when FEATURE_EDITOR is off.""" + """Should return 403 when FEATURE_REMOVEBG is off.""" data = {"file": (io.BytesIO(make_png_bytes()), "photo.png")} response = client.post( "/api/remove-bg", @@ -29,7 +29,7 @@ class TestRemoveBgFeatureFlag: class TestRemoveBgValidation: def test_removebg_no_file(self, client, app): """Should return 400 when no file provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_REMOVEBG"] = True response = client.post("/api/remove-bg") assert response.status_code == 400 assert "No file" in response.get_json()["error"] @@ -41,7 +41,7 @@ class TestRemoveBgValidation: class TestRemoveBgSuccess: def test_removebg_success(self, client, app, monkeypatch): """Should return 202 with task_id when valid image provided.""" - app.config["FEATURE_EDITOR"] = True + app.config["FEATURE_REMOVEBG"] = True mock_task = MagicMock() mock_task.id = "rembg-task-1" diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 84c7ff0..b765bf4 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -2,6 +2,7 @@ import { lazy, Suspense, useEffect } from 'react'; import { Routes, Route, useLocation } from 'react-router-dom'; import Header from '@/components/layout/Header'; import Footer from '@/components/layout/Footer'; +import ErrorBoundary from '@/components/shared/ErrorBoundary'; import { useDirection } from '@/hooks/useDirection'; import { initAnalytics, trackPageView } from '@/services/analytics'; import { useAuthStore } from '@/stores/authStore'; @@ -77,6 +78,7 @@ export default function App() {
+ }> {/* Pages */} @@ -140,6 +142,7 @@ export default function App() { } /> +