SaaS-PDF/backend/app/services/translation_guardrails.py

"""Translation guardrails — admission control, caching, and cost protection.

This module implements the guardrail model described in
docs/tool-portfolio/05-ai-cost-and-performance-plan.md.
"""

import hashlib
import logging
import os
from typing import Optional

from flask import current_app

logger = logging.getLogger(__name__)

# ── Page-count admission tiers ──────────────────────────────────────
# These limits define the maximum number of pages allowed per plan.
# Free/anonymous users get a lower cap; Pro users get a higher cap.
FREE_TRANSLATE_MAX_PAGES = int(os.getenv("FREE_TRANSLATE_MAX_PAGES", "10"))
PRO_TRANSLATE_MAX_PAGES = int(os.getenv("PRO_TRANSLATE_MAX_PAGES", "50"))


class TranslationAdmissionError(Exception):
    """Raised when a translation job is rejected at admission."""

    def __init__(self, message: str, status_code: int = 400):
        super().__init__(message)
        self.message = message
        self.status_code = status_code


def get_page_limit(plan: str) -> int:
    """Return the page cap for a given plan."""
    from app.services.account_service import normalize_plan

    if normalize_plan(plan) == "pro":
        return PRO_TRANSLATE_MAX_PAGES
    return FREE_TRANSLATE_MAX_PAGES


def count_pdf_pages(file_path: str) -> int:
    """Return the number of pages in a PDF file."""
    try:
        from PyPDF2 import PdfReader

        reader = PdfReader(file_path)
        return len(reader.pages)
    except Exception as e:
        logger.warning("Failed to count PDF pages for admission: %s", e)
        # If we can't count pages, allow the job through but log it
        return 0


def check_page_admission(file_path: str, plan: str) -> int:
    """Verify a PDF is within the page limit for the given plan.

    Returns the page count on success.
    Raises TranslationAdmissionError if the file exceeds the limit.
    """
    page_count = count_pdf_pages(file_path)
    if page_count == 0:
        # Can't determine — allow through (OCR fallback scenario)
        return page_count

    limit = get_page_limit(plan)
    if page_count > limit:
        raise TranslationAdmissionError(
            f"This PDF has {page_count} pages. "
            f"Your plan allows up to {limit} pages for translation. "
            f"Please upgrade your plan or use a smaller file.",
            status_code=413,
        )
    return page_count


# ── Content-hash caching ────────────────────────────────────────────
# Redis-based cache keyed by file-content hash + target language.
# Avoids re-translating identical documents.

TRANSLATION_CACHE_TTL = int(os.getenv("TRANSLATION_CACHE_TTL", str(7 * 24 * 3600)))  # 7 days


def _get_redis():
    """Get Redis connection from Flask app config."""
    try:
        import redis

        redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
        return redis.Redis.from_url(redis_url, decode_responses=True)
    except Exception as e:
        logger.debug("Redis not available for translation cache: %s", e)
        return None


def _compute_content_hash(file_path: str) -> str:
    """Compute SHA-256 hash of file contents."""
    sha = hashlib.sha256()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            sha.update(chunk)
    return sha.hexdigest()


def _cache_key(content_hash: str, target_language: str, source_language: str) -> str:
    """Build a Redis key for the translation cache."""
    return f"translate_cache:{content_hash}:{source_language}:{target_language}"


def get_cached_translation(
    file_path: str, target_language: str, source_language: str = "auto"
) -> Optional[dict]:
    """Look up a cached translation result. Returns None on miss."""
    r = _get_redis()
    if r is None:
        return None

    try:
        content_hash = _compute_content_hash(file_path)
        key = _cache_key(content_hash, target_language, source_language)
        import json

        cached = r.get(key)
        if cached:
            logger.info("Translation cache hit for %s", key)
            return json.loads(cached)
    except Exception as e:
        logger.debug("Translation cache lookup failed: %s", e)

    return None


def store_cached_translation(
    file_path: str,
    target_language: str,
    source_language: str,
    result: dict,
) -> None:
    """Store a successful translation result in Redis."""
    r = _get_redis()
    if r is None:
        return

    try:
        import json

        content_hash = _compute_content_hash(file_path)
        key = _cache_key(content_hash, target_language, source_language)
        r.setex(key, TRANSLATION_CACHE_TTL, json.dumps(result, ensure_ascii=False))
        logger.info("Translation cached: %s (TTL=%ds)", key, TRANSLATION_CACHE_TTL)
    except Exception as e:
        logger.debug("Translation cache store failed: %s", e)