ميزة: إضافة مكوني ProcedureSelection و StepProgress لأداة مخططات التدفق بصيغة PDF

- تنفيذ مكون ProcedureSelection لتمكين المستخدمين من اختيار الإجراءات من قائمة، وإدارة الاختيارات، ومعالجة الإجراءات المرفوضة. - إنشاء مكون StepProgress لعرض تقدم معالج متعدد الخطوات بشكل مرئي. - تعريف أنواع مشتركة للإجراءات، وخطوات التدفق، ورسائل الدردشة في ملف types.ts. - إضافة اختبارات وحدة لخطافات useFileUpload و useTaskPolling لضمان الأداء السليم ومعالجة الأخطاء. - تنفيذ اختبارات واجهة برمجة التطبيقات (API) للتحقق من تنسيقات نقاط النهاية وضمان اتساق ربط الواجهة الأمامية بالخلفية.
2026-03-06 17:16:09 +02:00
parent 2e97741d60
commit cfbcc8bd79
62 changed files with 10567 additions and 101 deletions
--- a/backend/app/init.py
+++ b/backend/app/init.py
@@ -62,6 +62,7 @@ def create_app(config_name=None):
    from app.routes.tasks import tasks_bp
    from app.routes.download import download_bp
    from app.routes.pdf_tools import pdf_tools_bp
+    from app.routes.flowchart import flowchart_bp

    app.register_blueprint(health_bp, url_prefix="/api")
    app.register_blueprint(convert_bp, url_prefix="/api/convert")
@@ -69,6 +70,7 @@ def create_app(config_name=None):
    app.register_blueprint(image_bp, url_prefix="/api/image")
    app.register_blueprint(video_bp, url_prefix="/api/video")
    app.register_blueprint(pdf_tools_bp, url_prefix="/api/pdf-tools")
+    app.register_blueprint(flowchart_bp, url_prefix="/api/flowchart")
    app.register_blueprint(tasks_bp, url_prefix="/api/tasks")
    app.register_blueprint(download_bp, url_prefix="/api/download")

--- a/backend/app/routes/flowchart.py
+++ b/backend/app/routes/flowchart.py
@@ -0,0 +1,103 @@
+"""Flowchart route — POST /api/flowchart/extract, /chat, /generate-manual."""
+import logging
+from flask import Blueprint, request, jsonify
+
+from app.extensions import limiter
+from app.utils.file_validator import validate_file, FileValidationError
+from app.utils.sanitizer import generate_safe_path
+from app.tasks.flowchart_tasks import extract_flowchart_task
+
+logger = logging.getLogger(__name__)
+
+flowchart_bp = Blueprint("flowchart", __name__)
+
+
+@flowchart_bp.route("/extract", methods=["POST"])
+@limiter.limit("10/minute")
+def extract_flowchart_route():
+    """
+    Extract procedures from a PDF and generate flowcharts.
+
+    Accepts: multipart/form-data with a single 'file' field (PDF)
+    Returns: JSON with task_id for polling
+    """
+    if "file" not in request.files:
+        return jsonify({"error": "No file uploaded."}), 400
+
+    file = request.files["file"]
+
+    try:
+        original_filename, ext = validate_file(file, allowed_types=["pdf"])
+    except FileValidationError as e:
+        return jsonify({"error": e.message}), e.code
+
+    task_id, input_path = generate_safe_path(ext)
+    file.save(input_path)
+
+    task = extract_flowchart_task.delay(input_path, task_id, original_filename)
+
+    return jsonify({
+        "task_id": task.id,
+        "message": "Flowchart extraction started.",
+    }), 202
+
+
+@flowchart_bp.route("/chat", methods=["POST"])
+@limiter.limit("20/minute")
+def flowchart_chat_route():
+    """
+    AI chat endpoint for flowchart improvement suggestions.
+
+    Accepts JSON: { message, flow_id, flow_data }
+    Returns JSON: { reply, updated_flow? }
+    """
+    data = request.get_json(silent=True)
+    if not data or not data.get("message"):
+        return jsonify({"error": "Message is required."}), 400
+
+    message = str(data["message"])[:2000]  # Limit message length
+    flow_data = data.get("flow_data")
+
+    try:
+        from app.services.ai_chat_service import chat_about_flowchart
+        result = chat_about_flowchart(message, flow_data)
+        return jsonify(result), 200
+    except Exception as e:
+        logger.error(f"Flowchart chat error: {e}")
+        return jsonify({"reply": "Sorry, I couldn't process your request. Please try again."}), 200
+
+
+@flowchart_bp.route("/generate-manual", methods=["POST"])
+@limiter.limit("10/minute")
+def generate_manual_flowchart_route():
+    """
+    Generate a flowchart from manually specified procedure data.
+
+    Accepts JSON: { title, description, pages (list of page texts) }
+    Returns JSON: { flowchart }
+    """
+    data = request.get_json(silent=True)
+    if not data or not data.get("title"):
+        return jsonify({"error": "Title is required."}), 400
+
+    title = str(data["title"])[:200]
+    description = str(data.get("description", ""))[:500]
+    page_texts = data.get("pages", [])
+
+    from app.services.flowchart_service import generate_flowchart
+
+    # Build a synthetic procedure
+    procedure = {
+        "id": f"manual-{hash(title) % 100000}",
+        "title": title,
+        "description": description,
+        "pages": list(range(1, len(page_texts) + 1)),
+    }
+
+    pages_data = [
+        {"page": i + 1, "text": str(p.get("text", ""))[:5000]}
+        for i, p in enumerate(page_texts)
+    ]
+
+    flowchart = generate_flowchart(procedure, pages_data)
+    return jsonify({"flowchart": flowchart}), 200
--- a/backend/app/routes/pdf_tools.py
+++ b/backend/app/routes/pdf_tools.py
@@ -93,6 +93,11 @@ def split_pdf_route():
    if mode not in ("all", "range"):
        mode = "all"

+    if mode == "range" and (not pages or not pages.strip()):
+        return jsonify({
+            "error": "Please specify which pages to extract (e.g. 1,3,5-8)."
+        }), 400
+
    try:
        original_filename, ext = validate_file(file, allowed_types=["pdf"])
    except FileValidationError as e:
--- a/backend/app/routes/tasks.py
+++ b/backend/app/routes/tasks.py
@@ -3,11 +3,13 @@ from flask import Blueprint, jsonify
 from celery.result import AsyncResult

 from app.extensions import celery
+from app.middleware.rate_limiter import limiter

 tasks_bp = Blueprint("tasks", __name__)


@tasks_bp.route("/<task_id>/status", methods=["GET"])
+@limiter.limit("300/minute", override_defaults=True)
 def get_task_status(task_id: str):
    """
    Get the status of an async task.
--- a/backend/app/services/ai_chat_service.py
+++ b/backend/app/services/ai_chat_service.py
@@ -0,0 +1,142 @@
+"""AI Chat Service — OpenRouter integration for flowchart improvement."""
+import os
+import json
+import logging
+import requests
+
+logger = logging.getLogger(__name__)
+
+# Configuration
+OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
+OPENROUTER_MODEL = os.getenv("OPENROUTER_MODEL", "meta-llama/llama-3-8b-instruct")
+OPENROUTER_BASE_URL = os.getenv(
+    "OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1/chat/completions"
+)
+
+SYSTEM_PROMPT = """You are a flowchart improvement assistant. You help users improve their flowcharts by:
+1. Suggesting better step titles and descriptions
+2. Identifying missing steps or decision points
+3. Recommending better flow structure
+4. Simplifying complex flows
+
+When the user asks you to modify the flowchart, respond with your suggestion in plain text.
+Keep responses concise and actionable. Reply in the same language the user uses."""
+
+
+def chat_about_flowchart(message: str, flow_data: dict | None = None) -> dict:
+    """
+    Send a message to the AI about a flowchart and get improvement suggestions.
+
+    Args:
+        message: User message
+        flow_data: Current flowchart data (optional)
+
+    Returns:
+        {"reply": "...", "updated_flow": {...} | None}
+    """
+    if not OPENROUTER_API_KEY:
+        return {
+            "reply": _fallback_response(message, flow_data),
+            "updated_flow": None,
+        }
+
+    # Build context
+    context = ""
+    if flow_data:
+        steps_summary = []
+        for s in flow_data.get("steps", []):
+            steps_summary.append(
+                f"- [{s.get('type', 'process')}] {s.get('title', '')}"
+            )
+        context = (
+            f"\nCurrent flowchart: {flow_data.get('title', 'Untitled')}\n"
+            f"Steps:\n" + "\n".join(steps_summary)
+        )
+
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"{message}{context}"},
+    ]
+
+    try:
+        response = requests.post(
+            OPENROUTER_BASE_URL,
+            headers={
+                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": OPENROUTER_MODEL,
+                "messages": messages,
+                "max_tokens": 500,
+                "temperature": 0.7,
+            },
+            timeout=30,
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        reply = (
+            data.get("choices", [{}])[0]
+            .get("message", {})
+            .get("content", "")
+            .strip()
+        )
+
+        if not reply:
+            reply = "I couldn't generate a response. Please try again."
+
+        return {"reply": reply, "updated_flow": None}
+
+    except requests.exceptions.Timeout:
+        logger.warning("OpenRouter API timeout")
+        return {
+            "reply": "The AI service is taking too long. Please try again.",
+            "updated_flow": None,
+        }
+    except Exception as e:
+        logger.error(f"OpenRouter API error: {e}")
+        return {
+            "reply": _fallback_response(message, flow_data),
+            "updated_flow": None,
+        }
+
+
+def _fallback_response(message: str, flow_data: dict | None) -> str:
+    """Provide a helpful response when the AI API is unavailable."""
+    msg_lower = message.lower()
+
+    if flow_data:
+        steps = flow_data.get("steps", [])
+        title = flow_data.get("title", "your flowchart")
+        step_count = len(steps)
+        decision_count = sum(1 for s in steps if s.get("type") == "decision")
+
+        if any(
+            w in msg_lower for w in ["simplify", "reduce", "shorter", "بسط", "اختصر"]
+        ):
+            return (
+                f"Your flowchart '{title}' has {step_count} steps. "
+                f"To simplify, consider merging consecutive process steps "
+                f"that perform related actions into a single step."
+            )
+
+        if any(
+            w in msg_lower for w in ["missing", "add", "more", "ناقص", "أضف"]
+        ):
+            return (
+                f"Your flowchart has {decision_count} decision points. "
+                f"Consider adding error handling or validation steps "
+                f"between critical process nodes."
+            )
+
+        return (
+            f"Your flowchart '{title}' contains {step_count} steps "
+            f"({decision_count} decisions). To get AI-powered suggestions, "
+            f"please configure the OPENROUTER_API_KEY environment variable."
+        )
+
+    return (
+        "AI chat requires the OPENROUTER_API_KEY to be configured. "
+        "Please set up the environment variable for full AI functionality."
+    )
--- a/backend/app/services/flowchart_service.py
+++ b/backend/app/services/flowchart_service.py
@@ -0,0 +1,410 @@
+"""Flowchart service — Extract procedures from PDF and generate flowchart data."""
+import os
+import re
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class FlowchartError(Exception):
+    """Custom exception for flowchart operations."""
+    pass
+
+
+# ---------------------------------------------------------------------------
+# Heuristic keywords that signal procedural content
+# ---------------------------------------------------------------------------
+_PROCEDURE_KEYWORDS = [
+    "procedure", "protocol", "checklist", "sequence", "instruction",
+    "steps", "process", "workflow", "troubleshoot", "maintenance",
+    "startup", "shutdown", "emergency", "inspection", "replacement",
+    "installation", "calibration", "operation", "safety", "guide",
+]
+
+_STEP_PATTERNS = re.compile(
+    r"(?:^|\n)\s*(?:"
+    r"(?:step\s*\d+)|"           # Step 1, Step 2 …
+    r"(?:\d+[\.\)]\s+)|"         # 1. or 1) …
+    r"(?:[a-z][\.\)]\s+)|"       # a. or a) …
+    r"(?:•\s)|"                  # bullet •
+    r"(?:-\s)|"                  # dash -
+    r"(?:✓\s)"                   # checkmark ✓
+    r")",
+    re.IGNORECASE,
+)
+
+_DECISION_KEYWORDS = re.compile(
+    r"\b(?:if|whether|check|verify|confirm|decide|inspect|compare|ensure|"
+    r"is\s+\w+\s*\?|does|should|can)\b",
+    re.IGNORECASE,
+)
+
+
+def extract_text_from_pdf(input_path: str) -> list[dict]:
+    """
+    Extract text from each page of a PDF.
+
+    Returns:
+        List of dicts: [{"page": 1, "text": "..."}, ...]
+    """
+    try:
+        from PyPDF2 import PdfReader
+
+        if not os.path.exists(input_path):
+            raise FlowchartError(f"File not found: {input_path}")
+
+        reader = PdfReader(input_path)
+        pages = []
+        for i, page in enumerate(reader.pages, start=1):
+            text = page.extract_text() or ""
+            pages.append({"page": i, "text": text.strip()})
+
+        return pages
+
+    except FlowchartError:
+        raise
+    except Exception as e:
+        raise FlowchartError(f"Failed to extract text from PDF: {str(e)}")
+
+
+def identify_procedures(pages: list[dict]) -> list[dict]:
+    """
+    Analyse extracted PDF text and identify procedures/sections.
+
+    Uses heuristic analysis:
+    1. Look for headings (lines in UPPER CASE or short bold-like lines)
+    2. Match procedure keywords
+    3. Group consecutive pages under the same heading
+
+    Returns:
+        List of procedures: [
+            {
+                "id": "proc-1",
+                "title": "Emergency Shutdown Protocol",
+                "description": "Extracted first paragraph...",
+                "pages": [8, 9],
+                "step_count": 6
+            },
+            ...
+        ]
+    """
+    procedures = []
+    current_proc = None
+    proc_counter = 0
+
+    for page_data in pages:
+        text = page_data["text"]
+        page_num = page_data["page"]
+
+        if not text:
+            continue
+
+        lines = text.split("\n")
+        heading_candidates = []
+
+        for line in lines:
+            stripped = line.strip()
+            if not stripped:
+                continue
+
+            # Heading heuristic: short line, mostly uppercase or title-like
+            is_heading = (
+                len(stripped) < 80
+                and (
+                    stripped.isupper()
+                    or (stripped == stripped.title() and len(stripped.split()) <= 8)
+                    or any(kw in stripped.lower() for kw in _PROCEDURE_KEYWORDS)
+                )
+                and not stripped.endswith(",")
+            )
+
+            if is_heading:
+                heading_candidates.append(stripped)
+
+        # Check if this page has procedural content
+        has_steps = bool(_STEP_PATTERNS.search(text))
+        has_keywords = any(kw in text.lower() for kw in _PROCEDURE_KEYWORDS)
+
+        if heading_candidates and (has_steps or has_keywords):
+            best_heading = heading_candidates[0]
+
+            # Check if this is a continuation of the current procedure
+            if current_proc and _is_continuation(current_proc["title"], best_heading, text):
+                current_proc["pages"].append(page_num)
+                current_proc["_text"] += "\n" + text
+            else:
+                # Save previous procedure
+                if current_proc:
+                    _finalize_procedure(current_proc)
+                    procedures.append(current_proc)
+
+                proc_counter += 1
+                first_paragraph = _extract_first_paragraph(text, best_heading)
+                current_proc = {
+                    "id": f"proc-{proc_counter}",
+                    "title": _clean_title(best_heading),
+                    "description": first_paragraph,
+                    "pages": [page_num],
+                    "_text": text,
+                }
+        elif current_proc and has_steps:
+            # Continuation — same procedure on next page
+            current_proc["pages"].append(page_num)
+            current_proc["_text"] += "\n" + text
+
+    # Don't forget the last one
+    if current_proc:
+        _finalize_procedure(current_proc)
+        procedures.append(current_proc)
+
+    # If no procedures found via headings, try splitting by page with step content
+    if not procedures:
+        procedures = _fallback_extraction(pages)
+
+    return procedures
+
+
+def generate_flowchart(procedure: dict, page_texts: list[dict]) -> dict:
+    """
+    Generate a flowchart (list of nodes + connections) from a procedure.
+
+    Args:
+        procedure: Procedure dict with id, title, pages
+        page_texts: All page text data
+
+    Returns:
+        Flowchart dict: {
+            "id": "flow-1",
+            "procedureId": "proc-1",
+            "title": "...",
+            "steps": [ {id, type, title, description, connections}, ... ]
+        }
+    """
+    # Gather text for the procedure's pages
+    text = ""
+    for pt in page_texts:
+        if pt["page"] in procedure["pages"]:
+            text += pt["text"] + "\n"
+
+    steps = _extract_steps_from_text(text, procedure["title"])
+
+    return {
+        "id": f"flow-{procedure['id']}",
+        "procedureId": procedure["id"],
+        "title": procedure["title"],
+        "steps": steps,
+    }
+
+
+def extract_and_generate(input_path: str) -> dict:
+    """
+    Full pipeline: extract text → identify procedures → generate flowcharts.
+
+    Returns:
+        {
+            "procedures": [...],
+            "flowcharts": [...],
+            "total_pages": int
+        }
+    """
+    pages = extract_text_from_pdf(input_path)
+    procedures = identify_procedures(pages)
+
+    flowcharts = []
+    for proc in procedures:
+        flow = generate_flowchart(proc, pages)
+        flowcharts.append(flow)
+
+    # Remove internal text field
+    for proc in procedures:
+        proc.pop("_text", None)
+
+    return {
+        "procedures": procedures,
+        "flowcharts": flowcharts,
+        "total_pages": len(pages),
+        "pages": pages,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _is_continuation(current_title: str, new_heading: str, text: str) -> bool:
+    """Check if a page is a continuation of the current procedure."""
+    continued_markers = ["(continued)", "(cont.)", "(cont'd)"]
+    heading_lower = new_heading.lower()
+
+    # Explicit continuation marker
+    if any(m in heading_lower for m in continued_markers):
+        return True
+
+    # Same title repeated
+    if current_title.lower().rstrip() in heading_lower:
+        return True
+
+    return False
+
+
+def _clean_title(title: str) -> str:
+    """Clean up a procedure title."""
+    # Remove continuation markers
+    title = re.sub(r"\s*\(continued\).*", "", title, flags=re.IGNORECASE)
+    title = re.sub(r"\s*\(cont[\.\']?d?\).*", "", title, flags=re.IGNORECASE)
+    # Remove leading numbers like "3.1"
+    title = re.sub(r"^\d+[\.\)]\s*", "", title)
+    title = re.sub(r"^\d+\.\d+\s*", "", title)
+    return title.strip()
+
+
+def _extract_first_paragraph(text: str, heading: str) -> str:
+    """Extract the first meaningful paragraph after a heading."""
+    idx = text.find(heading)
+    if idx >= 0:
+        after_heading = text[idx + len(heading):].strip()
+    else:
+        after_heading = text.strip()
+
+    lines = after_heading.split("\n")
+    paragraph = []
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            if paragraph:
+                break
+            continue
+        if stripped.isupper() and len(stripped) > 10:
+            break
+        paragraph.append(stripped)
+
+    desc = " ".join(paragraph)[:200]
+    return desc if desc else "Procedural content extracted from document."
+
+
+def _finalize_procedure(proc: dict):
+    """Calculate step count from the accumulated text."""
+    text = proc.get("_text", "")
+    matches = _STEP_PATTERNS.findall(text)
+    proc["step_count"] = max(len(matches), 2)
+
+
+def _fallback_extraction(pages: list[dict]) -> list[dict]:
+    """When no heading-based procedures found, detect pages with step-like content."""
+    procedures = []
+    proc_counter = 0
+
+    for page_data in pages:
+        text = page_data["text"]
+        if not text:
+            continue
+
+        has_steps = bool(_STEP_PATTERNS.search(text))
+        if has_steps:
+            proc_counter += 1
+            first_line = text.split("\n")[0].strip()[:60]
+            procedures.append({
+                "id": f"proc-{proc_counter}",
+                "title": first_line or f"Procedure (Page {page_data['page']})",
+                "description": text[:150].strip(),
+                "pages": [page_data["page"]],
+                "step_count": len(_STEP_PATTERNS.findall(text)),
+            })
+
+    return procedures
+
+
+def _extract_steps_from_text(text: str, procedure_title: str) -> list[dict]:
+    """
+    Parse text into flowchart steps (nodes).
+
+    Strategy:
+    1. Split text by numbered/bulleted lines
+    2. Classify each as process or decision
+    3. Add start/end nodes
+    4. Wire connections
+    """
+    lines = text.split("\n")
+    raw_steps = []
+    current_step_lines = []
+    step_counter = 0
+
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            continue
+
+        # Is this the start of a new step?
+        is_step_start = bool(re.match(
+            r"^\s*(?:\d+[\.\)]\s+|[a-z][\.\)]\s+|•\s|-\s|✓\s|step\s*\d+)",
+            stripped,
+            re.IGNORECASE,
+        ))
+
+        if is_step_start:
+            if current_step_lines:
+                raw_steps.append(" ".join(current_step_lines))
+            current_step_lines = [re.sub(r"^\s*(?:\d+[\.\)]\s*|[a-z][\.\)]\s*|•\s*|-\s*|✓\s*|step\s*\d+[:\.\)]\s*)", "", stripped, flags=re.IGNORECASE)]
+        elif current_step_lines:
+            current_step_lines.append(stripped)
+
+    if current_step_lines:
+        raw_steps.append(" ".join(current_step_lines))
+
+    # Limit to reasonable number of steps
+    if len(raw_steps) > 15:
+        raw_steps = raw_steps[:15]
+
+    # Build flowchart nodes
+    nodes = []
+    step_id = 0
+
+    # Start node
+    step_id += 1
+    nodes.append({
+        "id": str(step_id),
+        "type": "start",
+        "title": f"Begin: {procedure_title[:40]}",
+        "description": "Start of procedure",
+        "connections": [str(step_id + 1)] if raw_steps else [],
+    })
+
+    for i, step_text in enumerate(raw_steps):
+        step_id += 1
+        # Classify as decision or process
+        is_decision = bool(_DECISION_KEYWORDS.search(step_text))
+
+        node_type = "decision" if is_decision else "process"
+        title = step_text[:60]
+        description = step_text[:150]
+
+        connections = []
+        if i < len(raw_steps) - 1:
+            if is_decision:
+                # Decision: Yes goes to next, No could loop back or skip
+                connections = [str(step_id + 1)]
+            else:
+                connections = [str(step_id + 1)]
+        else:
+            connections = [str(step_id + 1)]  # Connect to end
+
+        nodes.append({
+            "id": str(step_id),
+            "type": node_type,
+            "title": title,
+            "description": description,
+            "connections": connections,
+        })
+
+    # End node
+    step_id += 1
+    nodes.append({
+        "id": str(step_id),
+        "type": "end",
+        "title": "Procedure Complete",
+        "description": "End of procedure",
+        "connections": [],
+    })
+
+    return nodes
--- a/backend/app/services/pdf_tools_service.py
+++ b/backend/app/services/pdf_tools_service.py
@@ -140,20 +140,75 @@ def split_pdf(

 def _parse_page_range(spec: str, total: int) -> list[int]:
    """Parse a page specification like '1,3,5-8' into 0-based indices."""
+    if not spec or not spec.strip():
+        raise PDFToolsError("Please specify at least one page (e.g. 1,3,5-8).")
+
    indices = set()
-    for part in spec.split(","):
-        part = part.strip()
+    invalid_tokens = []
+    out_of_range_tokens = []
+
+    for raw_part in spec.split(","):
+        part = raw_part.strip()
+
+        if not part:
+            continue
+
        if "-" in part:
+            if part.count("-") != 1:
+                invalid_tokens.append(part)
+                continue
+
            start_s, end_s = part.split("-", 1)
-            start = max(1, int(start_s.strip()))
-            end = min(total, int(end_s.strip()))
+            start_s = start_s.strip()
+            end_s = end_s.strip()
+
+            if not start_s.isdigit() or not end_s.isdigit():
+                invalid_tokens.append(part)
+                continue
+
+            start = int(start_s)
+            end = int(end_s)
+
+            if start > end:
+                invalid_tokens.append(part)
+                continue
+
+            if start < 1 or end > total:
+                out_of_range_tokens.append(f"{start}-{end}")
+                continue
+
            indices.update(range(start - 1, end))
        else:
+            if not part.isdigit():
+                invalid_tokens.append(part)
+                continue
+
            page = int(part)
-            if 1 <= page <= total:
-                indices.add(page - 1)
+            if page < 1 or page > total:
+                out_of_range_tokens.append(str(page))
+                continue
+
+            indices.add(page - 1)
+
+    if invalid_tokens:
+        tokens = ", ".join(invalid_tokens)
+        raise PDFToolsError(
+            f"Invalid page format: {tokens}. Use a format like 1,3,5-8."
+        )
+
+    if out_of_range_tokens:
+        tokens = ", ".join(out_of_range_tokens)
+        page_word = "page" if total == 1 else "pages"
+        raise PDFToolsError(
+            f"Selected pages ({tokens}) are out of range. This PDF has only {total} {page_word}."
+        )
+
    if not indices:
-        raise PDFToolsError("No valid pages specified.")
+        page_word = "page" if total == 1 else "pages"
+        raise PDFToolsError(
+            f"No pages selected. This PDF has {total} {page_word}."
+        )
+
    return sorted(indices)


--- a/backend/app/tasks/flowchart_tasks.py
+++ b/backend/app/tasks/flowchart_tasks.py
@@ -0,0 +1,79 @@
+"""Celery tasks for PDF-to-Flowchart extraction and generation."""
+import os
+import json
+import logging
+
+from app.extensions import celery
+from app.services.flowchart_service import extract_and_generate, FlowchartError
+from app.services.storage_service import storage
+from app.utils.sanitizer import cleanup_task_files
+
+logger = logging.getLogger(__name__)
+
+
+def _cleanup(task_id: str):
+    cleanup_task_files(task_id, keep_outputs=not storage.use_s3)
+
+
+@celery.task(bind=True, name="app.tasks.flowchart_tasks.extract_flowchart_task")
+def extract_flowchart_task(
+    self, input_path: str, task_id: str, original_filename: str
+):
+    """
+    Async task: Extract procedures from PDF and generate flowcharts.
+
+    Returns a JSON result containing procedures and their flowcharts.
+    """
+    output_dir = os.path.join("/tmp/outputs", task_id)
+    os.makedirs(output_dir, exist_ok=True)
+
+    try:
+        self.update_state(
+            state="PROCESSING",
+            meta={"step": "Extracting text from PDF..."},
+        )
+
+        result = extract_and_generate(input_path)
+
+        self.update_state(
+            state="PROCESSING",
+            meta={"step": "Saving flowchart data..."},
+        )
+
+        # Save flowchart JSON to a file and upload
+        output_path = os.path.join(output_dir, f"{task_id}_flowcharts.json")
+        with open(output_path, "w", encoding="utf-8") as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+
+        s3_key = storage.upload_file(output_path, task_id, folder="outputs")
+        download_url = storage.generate_presigned_url(
+            s3_key, original_filename="flowcharts.json"
+        )
+
+        final_result = {
+            "status": "completed",
+            "download_url": download_url,
+            "filename": "flowcharts.json",
+            "procedures": result["procedures"],
+            "flowcharts": result["flowcharts"],
+            "pages": result["pages"],
+            "total_pages": result["total_pages"],
+            "procedures_count": len(result["procedures"]),
+        }
+
+        _cleanup(task_id)
+        logger.info(
+            f"Task {task_id}: Flowchart extraction completed — "
+            f"{len(result['procedures'])} procedures, "
+            f"{result['total_pages']} pages"
+        )
+        return final_result
+
+    except FlowchartError as e:
+        logger.error(f"Task {task_id}: Flowchart error — {e}")
+        _cleanup(task_id)
+        return {"status": "failed", "error": str(e)}
+    except Exception as e:
+        logger.error(f"Task {task_id}: Unexpected error — {e}")
+        _cleanup(task_id)
+        return {"status": "failed", "error": "An unexpected error occurred."}
--- a/backend/app/utils/file_validator.py
+++ b/backend/app/utils/file_validator.py
@@ -1,7 +1,12 @@
 """File validation utilities — multi-layer security checks."""
 import os

-import magic
+try:
+    import magic
+    HAS_MAGIC = True
+except (ImportError, OSError):
+    HAS_MAGIC = False
+
 from flask import current_app
 from werkzeug.utils import secure_filename

@@ -72,18 +77,19 @@ def validate_file(file_storage, allowed_types: list[str] | None = None):
    if file_size == 0:
        raise FileValidationError("File is empty.")

-    # Layer 4: Check MIME type using magic bytes
+    # Layer 4: Check MIME type using magic bytes (if libmagic is available)
    file_header = file_storage.read(8192)
    file_storage.seek(0)

-    detected_mime = magic.from_buffer(file_header, mime=True)
-    expected_mimes = valid_extensions.get(ext, [])
+    if HAS_MAGIC:
+        detected_mime = magic.from_buffer(file_header, mime=True)
+        expected_mimes = valid_extensions.get(ext, [])

-    if detected_mime not in expected_mimes:
-        raise FileValidationError(
-            f"File content does not match extension '.{ext}'. "
-            f"Detected type: {detected_mime}"
-        )
+        if detected_mime not in expected_mimes:
+            raise FileValidationError(
+                f"File content does not match extension '.{ext}'. "
+                f"Detected type: {detected_mime}"
+            )

    # Layer 5: Additional content checks for specific types
    if ext == "pdf":