LingoAdmin Login
-
Hi {display_name},
+
Hi {safe_display_name},
Your one-time passcode is:
{otp}
@@ -229,5 +250,5 @@ def _build_otp_message(to_email: str, otp: str, display_name: str) -> "EmailMess
"""
msg.set_content(text)
- msg.add_alternative(html, subtype="html")
+ msg.add_alternative(html_body, subtype="html")
return msg
diff --git a/backend-service/app/services/notification_campaign/__init__.py b/backend-service/app/services/notification_campaign/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend-service/app/services/notification_campaign/apply.py b/backend-service/app/services/notification_campaign/apply.py
new file mode 100644
index 00000000..999f4ec8
--- /dev/null
+++ b/backend-service/app/services/notification_campaign/apply.py
@@ -0,0 +1,90 @@
+"""Apply service — coordinate FCM/in-app dispatch for a campaign job."""
+
+from __future__ import annotations
+
+import logging
+import uuid
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.notification_campaign import NotificationCampaignJob
+from app.services.notification_campaign.segmenter import segment_users
+from app.services.notification_campaign.sender import (
+ send_campaign_in_app,
+ send_campaign_push,
+)
+from app.services.notification_campaign_jobs import NotificationCampaignJobService
+
+logger = logging.getLogger(__name__)
+
+
+class NotificationCampaignApplyService:
+ @staticmethod
+ async def apply(
+ db: AsyncSession,
+ job: NotificationCampaignJob,
+ ) -> dict:
+ cfg = job.config
+ content_cfg = cfg.get("content", {})
+ audience_cfg = cfg.get("audience", {})
+
+ title = content_cfg.get("title", "")
+ body = content_cfg.get("body", "")
+ notification_type = content_cfg.get("notification_type", "campaign")
+ deep_link = content_cfg.get("deep_link")
+
+ # If AI rewrite was used, prefer the AI-generated copy stored in artifact
+ if job.artifact and job.artifact.get("ai_copy"):
+ ai_copy = job.artifact["ai_copy"]
+ title = ai_copy.get("title", title)
+ body = ai_copy.get("body", body)
+
+ # Re-segment to get fresh user list (may have changed since preview)
+ segment = await segment_users(
+ db,
+ audience_type=audience_cfg.get("type", "all"),
+ filters=audience_cfg.get("filters", {}),
+ )
+
+ if job.job_type in ("targeted_push", "scheduled_push"):
+ result = await send_campaign_push(
+ fcm_token_map=segment.fcm_token_map,
+ title=title,
+ body=body,
+ notification_type=notification_type,
+ deep_link=deep_link,
+ )
+ elif job.job_type == "in_app_broadcast":
+ result = await send_campaign_in_app(
+ db,
+ user_ids=segment.user_ids,
+ title=title,
+ body=body,
+ notification_type=notification_type,
+ deep_link=deep_link,
+ )
+ else:
+ raise ValueError(f"Unknown job_type: {job.job_type!r}")
+
+ await NotificationCampaignJobService.set_delivery_stats(
+ db,
+ job,
+ sent=result.sent,
+ failed=result.failed,
+ skipped=result.skipped,
+ )
+
+ logger.info(
+ "Campaign job %s applied: sent=%d failed=%d skipped=%d",
+ job.id,
+ result.sent,
+ result.failed,
+ result.skipped,
+ )
+
+ return {
+ "sent": result.sent,
+ "failed": result.failed,
+ "skipped": result.skipped,
+ "total": result.sent + result.failed + result.skipped,
+ }
diff --git a/backend-service/app/services/notification_campaign/segmenter.py b/backend-service/app/services/notification_campaign/segmenter.py
new file mode 100644
index 00000000..7fdfdc49
--- /dev/null
+++ b/backend-service/app/services/notification_campaign/segmenter.py
@@ -0,0 +1,143 @@
+"""User segmentation engine for Notification Campaign Agent."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from datetime import UTC, datetime, timedelta
+
+from sqlalchemy import select, and_, exists
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.gamification import LeaderboardEntry
+from app.models.progress import Streak
+from app.models.user import User, UserDevice
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SegmentResult:
+ user_ids: list[str]
+ fcm_token_map: dict[str, list[str]] # user_id → [fcm_token, ...]
+ audience_size: int
+ sample_users: list[dict]
+ filter_summary: dict
+
+
+async def segment_users(
+ db: AsyncSession,
+ *,
+ audience_type: str,
+ filters: dict,
+) -> SegmentResult:
+ """Resolve the target user IDs and their FCM tokens based on audience config."""
+ has_fcm_required = filters.get("has_fcm_token", True)
+ leagues = filters.get("leagues")
+ cefr_levels = filters.get("cefr_levels")
+ min_streak = filters.get("min_streak")
+ inactive_days = filters.get("inactive_days")
+
+ conditions = [User.is_active == True]
+
+ if cefr_levels:
+ normalized = [lvl.upper() for lvl in cefr_levels]
+ conditions.append(User.level.in_(normalized))
+
+ if inactive_days is not None:
+ cutoff = datetime.now(UTC) - timedelta(days=inactive_days)
+ conditions.append(
+ (User.last_login < cutoff) | (User.last_login == None)
+ )
+
+ if has_fcm_required:
+ conditions.append(
+ exists(
+ select(UserDevice.id).where(
+ UserDevice.user_id == User.id,
+ UserDevice.fcm_token != None,
+ UserDevice.fcm_token != "",
+ )
+ )
+ )
+
+ if leagues:
+ # Match users with a LeaderboardEntry in any of the given leagues (any week)
+ conditions.append(
+ exists(
+ select(LeaderboardEntry.id).where(
+ LeaderboardEntry.user_id == User.id,
+ LeaderboardEntry.league.in_(leagues),
+ )
+ )
+ )
+
+ if min_streak is not None:
+ conditions.append(
+ exists(
+ select(Streak.id).where(
+ Streak.user_id == User.id,
+ Streak.current_streak >= min_streak,
+ )
+ )
+ )
+
+ query = (
+ select(User.id, User.username, User.email, User.level, User.last_login)
+ .where(and_(*conditions))
+ .order_by(User.created_at.desc())
+ )
+ rows = (await db.execute(query)).all()
+
+ user_ids = [str(row.id) for row in rows]
+
+ # Fetch FCM tokens for matched users
+ fcm_token_map: dict[str, list[str]] = {}
+ if user_ids:
+ device_rows = (
+ await db.execute(
+ select(UserDevice.user_id, UserDevice.fcm_token).where(
+ UserDevice.user_id.in_([row.id for row in rows]),
+ UserDevice.fcm_token != None,
+ UserDevice.fcm_token != "",
+ )
+ )
+ ).all()
+ for dr in device_rows:
+ uid = str(dr.user_id)
+ fcm_token_map.setdefault(uid, []).append(dr.fcm_token)
+
+ sample_users = [
+ {
+ "id": str(r.id),
+ "username": r.username,
+ "email": r.email,
+ "cefr_level": r.level,
+ "last_login": r.last_login.isoformat() if r.last_login else None,
+ "has_fcm": str(r.id) in fcm_token_map,
+ }
+ for r in rows[:5]
+ ]
+
+ filter_summary = {
+ "audience_type": audience_type,
+ "leagues": leagues,
+ "cefr_levels": cefr_levels,
+ "min_streak": min_streak,
+ "inactive_days": inactive_days,
+ "has_fcm_token": has_fcm_required,
+ }
+
+ logger.info(
+ "Segmentation complete: %d users matched (%d with FCM tokens)",
+ len(user_ids),
+ len(fcm_token_map),
+ )
+
+ return SegmentResult(
+ user_ids=user_ids,
+ fcm_token_map=fcm_token_map,
+ audience_size=len(user_ids),
+ sample_users=sample_users,
+ filter_summary=filter_summary,
+ )
diff --git a/backend-service/app/services/notification_campaign/sender.py b/backend-service/app/services/notification_campaign/sender.py
new file mode 100644
index 00000000..31124be4
--- /dev/null
+++ b/backend-service/app/services/notification_campaign/sender.py
@@ -0,0 +1,119 @@
+"""FCM batch sender for Notification Campaign Agent."""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+
+from fastapi.concurrency import run_in_threadpool
+from firebase_admin import messaging
+
+from app.core.firebase_auth import _init_firebase_app
+
+logger = logging.getLogger(__name__)
+
+_FCM_BATCH_SIZE = 500 # FCM multicast supports up to 500 tokens per call
+
+
+@dataclass
+class SendResult:
+ sent: int
+ failed: int
+ skipped: int
+
+
+async def send_campaign_push(
+ *,
+ fcm_token_map: dict[str, list[str]],
+ title: str,
+ body: str,
+ notification_type: str = "campaign",
+ deep_link: str | None = None,
+) -> SendResult:
+ """Send a push notification campaign to all users in fcm_token_map."""
+ all_tokens = [token for tokens in fcm_token_map.values() for token in tokens]
+ if not all_tokens:
+ logger.info("Campaign send skipped — no FCM tokens in segment")
+ return SendResult(sent=0, failed=0, skipped=len(fcm_token_map))
+
+ try:
+ _init_firebase_app()
+ except Exception as exc:
+ logger.warning("Firebase not configured; skipping campaign push: %s", exc)
+ return SendResult(sent=0, failed=0, skipped=len(all_tokens))
+
+ data: dict[str, str] = {
+ "type": notification_type,
+ "route": deep_link or "/",
+ }
+
+ total_sent = 0
+ total_failed = 0
+
+ # Batch in chunks of FCM_BATCH_SIZE
+ for i in range(0, len(all_tokens), _FCM_BATCH_SIZE):
+ chunk = all_tokens[i : i + _FCM_BATCH_SIZE]
+ message = messaging.MulticastMessage(
+ notification=messaging.Notification(title=title, body=body),
+ data=data,
+ tokens=chunk,
+ )
+ try:
+ response = await run_in_threadpool(messaging.send_each_for_multicast, message)
+ total_sent += int(getattr(response, "success_count", 0) or 0)
+ total_failed += int(getattr(response, "failure_count", 0) or 0)
+ except Exception as exc:
+ logger.exception("FCM batch %d failed: %s", i // _FCM_BATCH_SIZE, exc)
+ total_failed += len(chunk)
+
+ logger.info(
+ "Campaign push complete: sent=%d failed=%d total_tokens=%d",
+ total_sent,
+ total_failed,
+ len(all_tokens),
+ )
+ return SendResult(sent=total_sent, failed=total_failed, skipped=0)
+
+
+async def send_campaign_in_app(
+ db,
+ *,
+ user_ids: list[str],
+ title: str,
+ body: str,
+ notification_type: str = "campaign",
+ deep_link: str | None = None,
+) -> SendResult:
+ """Create persisted Notification records for in-app broadcast."""
+ import uuid
+ from datetime import datetime, timezone
+
+ from sqlalchemy import insert
+
+ from app.models.notification import Notification
+
+ if not user_ids:
+ return SendResult(sent=0, failed=0, skipped=0)
+
+ now = datetime.now(timezone.utc)
+ rows = [
+ {
+ "id": uuid.uuid4(),
+ "user_id": uuid.UUID(uid),
+ "title": title,
+ "body": body,
+ "type": notification_type,
+ "data": {"route": deep_link or "/", "campaign": True},
+ "is_read": False,
+ "created_at": now,
+ }
+ for uid in user_ids
+ ]
+
+ try:
+ await db.execute(insert(Notification), rows)
+ await db.flush()
+ return SendResult(sent=len(rows), failed=0, skipped=0)
+ except Exception as exc:
+ logger.exception("In-app broadcast DB insert failed: %s", exc)
+ return SendResult(sent=0, failed=len(rows), skipped=0)
diff --git a/backend-service/app/services/notification_campaign_jobs.py b/backend-service/app/services/notification_campaign_jobs.py
new file mode 100644
index 00000000..a490a058
--- /dev/null
+++ b/backend-service/app/services/notification_campaign_jobs.py
@@ -0,0 +1,167 @@
+"""Application service for Notification Campaign Agent jobs."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import UTC, datetime
+
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.notification_campaign import NotificationCampaignJob
+
+ACTIVE_STATUSES: frozenset[str] = frozenset(
+ {"queued", "segmenting", "generating", "validating", "preview_ready", "sending"}
+)
+TERMINAL_STATUSES: frozenset[str] = frozenset({"completed", "failed", "cancelled"})
+
+ALLOWED_TRANSITIONS: dict[str, set[str]] = {
+ "queued": {"segmenting", "cancelled", "failed"},
+ "segmenting": {"generating", "cancelled", "failed"},
+ "generating": {"validating", "cancelled", "failed"},
+ "validating": {"preview_ready", "cancelled", "failed"},
+ "preview_ready": {"sending", "cancelled"},
+ "sending": {"completed", "failed"},
+ "failed": {"queued"},
+ "cancelled": {"queued"},
+ "completed": set(),
+}
+
+
+def _utcnow() -> datetime:
+ return datetime.now(UTC)
+
+
+class NotificationCampaignJobService:
+ @staticmethod
+ async def create(
+ db: AsyncSession,
+ *,
+ requested_by_id: uuid.UUID,
+ job_type: str,
+ config: dict,
+ ) -> NotificationCampaignJob:
+ job = NotificationCampaignJob(
+ requested_by_id=requested_by_id,
+ job_type=job_type,
+ config=config,
+ progress={"stage": "queued", "percent": 0, "counters": {}},
+ delivery_stats={},
+ )
+ db.add(job)
+ await db.flush()
+ return job
+
+ @staticmethod
+ async def get(
+ db: AsyncSession, job_id: uuid.UUID, *, lock: bool = False
+ ) -> NotificationCampaignJob | None:
+ query = select(NotificationCampaignJob).where(NotificationCampaignJob.id == job_id)
+ if lock:
+ query = query.with_for_update().execution_options(populate_existing=True)
+ return await db.scalar(query)
+
+ @staticmethod
+ async def list_jobs(
+ db: AsyncSession,
+ *,
+ limit: int = 50,
+ offset: int = 0,
+ requested_by_id: uuid.UUID | None = None,
+ ) -> list[NotificationCampaignJob]:
+ query = (
+ select(NotificationCampaignJob)
+ .order_by(NotificationCampaignJob.created_at.desc())
+ .limit(limit)
+ .offset(offset)
+ )
+ if requested_by_id is not None:
+ query = query.where(NotificationCampaignJob.requested_by_id == requested_by_id)
+ result = await db.execute(query)
+ return list(result.scalars().all())
+
+ @staticmethod
+ async def count_active_by_requester(
+ db: AsyncSession, requester_id: uuid.UUID
+ ) -> int:
+ result = await db.execute(
+ select(func.count(NotificationCampaignJob.id)).where(
+ NotificationCampaignJob.requested_by_id == requester_id,
+ NotificationCampaignJob.status.in_(ACTIVE_STATUSES),
+ )
+ )
+ return result.scalar_one() or 0
+
+ @staticmethod
+ async def transition(
+ db: AsyncSession,
+ job: NotificationCampaignJob,
+ new_status: str,
+ *,
+ percent: int | None = None,
+ stage: str | None = None,
+ ) -> None:
+ allowed = ALLOWED_TRANSITIONS.get(job.status, set())
+ if new_status not in allowed:
+ raise ValueError(
+ f"Cannot transition notification-campaign job from {job.status!r} to {new_status!r}"
+ )
+ job.status = new_status
+ now = _utcnow()
+ progress = dict(job.progress or {})
+ if percent is not None:
+ progress["percent"] = percent
+ if stage is not None:
+ progress["stage"] = stage
+ else:
+ progress["stage"] = new_status
+ job.progress = progress
+ job.updated_at = now
+ if new_status in ("segmenting",) and job.started_at is None:
+ job.started_at = now
+ if new_status in TERMINAL_STATUSES:
+ job.completed_at = now
+ await db.flush()
+
+ @staticmethod
+ async def set_preview(
+ db: AsyncSession,
+ job: NotificationCampaignJob,
+ *,
+ artifact: dict,
+ warnings: list[str],
+ blocking_errors: list[str],
+ ) -> None:
+ await NotificationCampaignJobService.transition(db, job, "preview_ready", percent=100)
+ job.artifact = artifact
+ job.warnings = warnings
+ job.blocking_errors = blocking_errors
+ await db.flush()
+
+ @staticmethod
+ async def set_failed(
+ db: AsyncSession,
+ job: NotificationCampaignJob,
+ error: str,
+ ) -> None:
+ await NotificationCampaignJobService.transition(db, job, "failed")
+ job.error_message = error
+ await db.flush()
+
+ @staticmethod
+ async def set_delivery_stats(
+ db: AsyncSession,
+ job: NotificationCampaignJob,
+ *,
+ sent: int,
+ failed: int,
+ skipped: int,
+ ) -> None:
+ job.delivery_stats = {
+ "sent": sent,
+ "failed": failed,
+ "skipped": skipped,
+ "total": sent + failed + skipped,
+ }
+ job.updated_at = _utcnow()
+ await db.flush()
diff --git a/backend-service/app/services/push_notification_service.py b/backend-service/app/services/push_notification_service.py
index d222955d..45907ff4 100644
--- a/backend-service/app/services/push_notification_service.py
+++ b/backend-service/app/services/push_notification_service.py
@@ -139,3 +139,40 @@ async def send_streak_at_risk(
except Exception as exc: # pragma: no cover - external IO
logger.exception("Failed to send streak alert push: %s", exc)
return False
+
+ async def send_word_of_day(
+ self,
+ *,
+ tokens: list[str],
+ word: str,
+ definition: str,
+ ) -> bool:
+ """Send the daily Word of the Day push notification."""
+ clean_tokens = [t for t in tokens if t]
+ if not clean_tokens:
+ return False
+
+ try:
+ _init_firebase_app()
+ except Exception as exc:
+ logger.warning("Firebase not configured; skipping word-of-day push: %s", exc)
+ return False
+
+ message = messaging.MulticastMessage(
+ notification=messaging.Notification(
+ title=f"Word of the Day: {word}",
+ body=definition[:100],
+ ),
+ data={
+ "type": "word_of_day",
+ "route": "/vocabulary/word-of-day",
+ "word": word,
+ },
+ tokens=clean_tokens,
+ )
+ try:
+ response = await run_in_threadpool(messaging.send_each_for_multicast, message)
+ return int(getattr(response, "success_count", 0) or 0) > 0
+ except Exception as exc: # pragma: no cover - external IO
+ logger.exception("Failed to send word-of-day push: %s", exc)
+ return False
diff --git a/backend-service/app/services/ranking_agent/__init__.py b/backend-service/app/services/ranking_agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/backend-service/app/services/ranking_agent/achievement_batch.py b/backend-service/app/services/ranking_agent/achievement_batch.py
new file mode 100644
index 00000000..793875b5
--- /dev/null
+++ b/backend-service/app/services/ranking_agent/achievement_batch.py
@@ -0,0 +1,114 @@
+"""Achievement batch engine — computes preview for bulk achievement granting."""
+
+from __future__ import annotations
+
+from sqlalchemy import and_, func, not_, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.gamification import Achievement, LeaderboardEntry, UserAchievement
+from app.models.progress import Streak
+from app.models.user import User
+from app.models.vocabulary import UserVocabulary, VocabularyStatus
+
+
+class AchievementBatchEngine:
+ async def calculate(self, db: AsyncSession, config: dict) -> dict:
+ slugs: list[str] = config.get("achievement_slugs", [])
+ criteria: dict = config.get("criteria", {})
+
+ eligible_user_ids = await self._resolve_eligible_users(db, criteria)
+
+ achievement_rows = await db.execute(
+ select(Achievement).where(Achievement.slug.in_(slugs))
+ )
+ achievements = achievement_rows.scalars().all()
+
+ missing_slugs = set(slugs) - {a.slug for a in achievements}
+ blocking_errors = [f"Achievement slug not found: '{s}'" for s in missing_slugs]
+
+ results = []
+ all_affected_user_ids: set[str] = set()
+ total_xp = 0
+ total_gems = 0
+
+ for ach in achievements:
+ already_unlocked = await db.scalar(
+ select(func.count(UserAchievement.id)).where(
+ and_(
+ UserAchievement.achievement_id == ach.id,
+ UserAchievement.user_id.in_(eligible_user_ids),
+ )
+ )
+ ) or 0
+
+ to_grant = len(eligible_user_ids) - already_unlocked
+
+ results.append(
+ {
+ "slug": ach.slug,
+ "name": ach.name,
+ "xp_reward": ach.xp_reward,
+ "gems_reward": ach.gems_reward,
+ "rarity": ach.rarity,
+ "eligible_users": len(eligible_user_ids),
+ "already_unlocked": already_unlocked,
+ "to_grant": to_grant,
+ }
+ )
+ if to_grant > 0:
+ all_affected_user_ids.update(
+ str(uid) for uid in eligible_user_ids
+ )
+ total_xp += to_grant * (ach.xp_reward or 0)
+ total_gems += to_grant * (ach.gems_reward or 0)
+
+ return {
+ "achievements": results,
+ "total_users_affected": len(all_affected_user_ids),
+ "total_xp_to_award": total_xp,
+ "total_gems_to_award": total_gems,
+ "blocking_errors": blocking_errors,
+ }
+
+ async def _resolve_eligible_users(
+ self, db: AsyncSession, criteria: dict
+ ) -> list:
+ query = select(User.id).where(User.is_active.is_(True))
+
+ min_streak = criteria.get("min_streak")
+ if min_streak is not None:
+ streak_user_ids = select(Streak.user_id).where(
+ Streak.current_streak >= min_streak
+ )
+ query = query.where(User.id.in_(streak_user_ids))
+
+ min_vocab = criteria.get("min_vocabulary_mastered")
+ if min_vocab is not None:
+ vocab_user_ids = (
+ select(UserVocabulary.user_id)
+ .where(UserVocabulary.status == VocabularyStatus.MASTERED)
+ .group_by(UserVocabulary.user_id)
+ .having(func.count(UserVocabulary.id) >= min_vocab)
+ )
+ query = query.where(User.id.in_(vocab_user_ids))
+
+ leagues = criteria.get("leagues")
+ if leagues:
+ from app.crud.gamification import LeaderboardCRUD
+ week_start, _ = LeaderboardCRUD.get_current_week_range()
+ league_user_ids = select(LeaderboardEntry.user_id).where(
+ and_(
+ LeaderboardEntry.week_start == week_start,
+ LeaderboardEntry.league.in_([l.lower() for l in leagues]),
+ )
+ )
+ query = query.where(User.id.in_(league_user_ids))
+
+ cefr_levels = criteria.get("cefr_levels")
+ if cefr_levels:
+ query = query.where(
+ User.proficiency_level.in_([l.upper() for l in cefr_levels])
+ )
+
+ result = await db.execute(query)
+ return result.scalars().all()
diff --git a/backend-service/app/services/ranking_agent/apply.py b/backend-service/app/services/ranking_agent/apply.py
new file mode 100644
index 00000000..e3ad48aa
--- /dev/null
+++ b/backend-service/app/services/ranking_agent/apply.py
@@ -0,0 +1,366 @@
+"""Transactional application of Ranking/Gamification Agent job artifacts."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import UTC, datetime, timedelta
+
+from sqlalchemy import and_, select
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.crud.gamification import AchievementCRUD, LeaderboardCRUD, WalletCRUD
+from app.models.gamification import (
+ Achievement,
+ ActivityFeed,
+ LeaderboardEntry,
+ ShopItem,
+ UserAchievement,
+ UserInventory,
+)
+from app.models.user import User
+from app.models.ranking_agent import RankingAgentJob
+from app.services.ranking_agent_jobs import RankingAgentJobService
+from app.services.rank_service import apply_rank_info_to_user, calculate_rank
+
+_LEAGUE_ORDER = [
+ "bronze", "silver", "gold", "platinum", "sapphire", "ruby", "amethyst", "master"
+]
+
+
+class RankingAgentApplyService:
+ @staticmethod
+ async def apply(
+ db: AsyncSession, job_id: uuid.UUID
+ ) -> tuple[RankingAgentJob, dict]:
+ job = await RankingAgentJobService.get(db, job_id, lock=True)
+ if job is None:
+ raise LookupError("Ranking-agent job not found")
+ if job.status == "completed":
+ return job, job.created_entity_ids
+ if job.status != "preview_ready":
+ raise ValueError("Only preview-ready jobs can be applied")
+ if job.blocking_errors:
+ raise ValueError("Job has blocking validation errors")
+ if not job.artifact:
+ raise ValueError("Job has no preview artifact")
+
+ await RankingAgentJobService.transition(db, job, "applying", percent=100)
+
+ if job.job_type == "league_reset":
+ result = await _apply_league_reset(db, job.artifact)
+ elif job.job_type == "xp_event":
+ result = await _apply_xp_event(db, job.artifact, job.config)
+ elif job.job_type == "achievement_batch":
+ result = await _apply_achievement_batch(db, job.artifact, job.config)
+ else:
+ raise ValueError(f"Unknown job_type: {job.job_type}")
+
+ job.created_entity_ids = result
+ job.completed_at = datetime.now(UTC)
+ await RankingAgentJobService.transition(db, job, "completed", percent=100)
+ await db.flush()
+ return job, result
+
+
+async def _apply_league_reset(db: AsyncSession, artifact: dict) -> dict:
+ week_start = datetime.fromisoformat(artifact["week_start"])
+ week_end = datetime.fromisoformat(artifact["week_end"])
+ next_week_start = week_end
+ next_week_end = next_week_start + timedelta(days=7)
+
+ promoted_ids: list[str] = []
+ demoted_ids: list[str] = []
+
+ for entry_data in artifact.get("promotions", []):
+ await _process_league_change(
+ db,
+ entry_data=entry_data,
+ old_league=entry_data["league"],
+ new_league=entry_data["to"],
+ week_start=week_start,
+ next_week_start=next_week_start,
+ next_week_end=next_week_end,
+ is_promotion=True,
+ )
+ promoted_ids.append(entry_data["user_id"])
+
+ for entry_data in artifact.get("demotions", []):
+ await _process_league_change(
+ db,
+ entry_data=entry_data,
+ old_league=entry_data["league"],
+ new_league=entry_data["to"],
+ week_start=week_start,
+ next_week_start=next_week_start,
+ next_week_end=next_week_end,
+ is_promotion=False,
+ )
+ demoted_ids.append(entry_data["user_id"])
+
+ # Create next-week entries for unchanged users
+ unchanged_entries = await db.execute(
+ select(LeaderboardEntry).where(
+ and_(
+ LeaderboardEntry.week_start == week_start,
+ LeaderboardEntry.is_promoted.is_(False),
+ LeaderboardEntry.is_demoted.is_(False),
+ )
+ )
+ )
+ for entry in unchanged_entries.scalars():
+ if str(entry.user_id) not in promoted_ids and str(entry.user_id) not in demoted_ids:
+ await _upsert_next_week_entry(
+ db, entry.user_id, entry.league, next_week_start, next_week_end
+ )
+
+ return {
+ "promoted_user_ids": promoted_ids,
+ "demoted_user_ids": demoted_ids,
+ "week_start": artifact["week_start"],
+ "week_end": artifact["week_end"],
+ }
+
+
+async def _process_league_change(
+ db: AsyncSession,
+ *,
+ entry_data: dict,
+ old_league: str,
+ new_league: str,
+ week_start: datetime,
+ next_week_start: datetime,
+ next_week_end: datetime,
+ is_promotion: bool,
+) -> None:
+ user_id = uuid.UUID(entry_data["user_id"])
+
+ # Mark old entry
+ old_entry = await db.scalar(
+ select(LeaderboardEntry).where(
+ and_(
+ LeaderboardEntry.user_id == user_id,
+ LeaderboardEntry.week_start == week_start,
+ )
+ )
+ )
+ if old_entry:
+ if is_promotion:
+ old_entry.is_promoted = True
+ else:
+ old_entry.is_demoted = True
+ await db.flush()
+
+ # Create new week entry in new league
+ await _upsert_next_week_entry(db, user_id, new_league, next_week_start, next_week_end)
+
+ # Update User.rank
+ user = await db.get(User, user_id)
+ if user:
+ proficiency = getattr(user, "proficiency_level", None) or "A1"
+ numeric = getattr(user, "numeric_level", None) or 1
+ rank_info = calculate_rank(numeric, proficiency)
+ apply_rank_info_to_user(user, rank_info)
+ await db.flush()
+
+ # Activity feed entry
+ direction = "promoted" if is_promotion else "demoted"
+ db.add(
+ ActivityFeed(
+ user_id=user_id,
+ activity_type="league_change",
+ activity_data={
+ "from_league": old_league,
+ "to_league": new_league,
+ "direction": direction,
+ },
+ message=(
+ f"You were {direction} to {new_league.capitalize()} league!"
+ if is_promotion
+ else f"You were moved to {new_league.capitalize()} league."
+ ),
+ is_public=True,
+ )
+ )
+ await db.flush()
+
+
+async def _upsert_next_week_entry(
+ db: AsyncSession,
+ user_id: uuid.UUID,
+ league: str,
+ week_start: datetime,
+ week_end: datetime,
+) -> None:
+ existing = await db.scalar(
+ select(LeaderboardEntry).where(
+ and_(
+ LeaderboardEntry.user_id == user_id,
+ LeaderboardEntry.week_start == week_start,
+ )
+ )
+ )
+ if existing:
+ existing.league = league
+ else:
+ db.add(
+ LeaderboardEntry(
+ user_id=user_id,
+ week_start=week_start,
+ week_end=week_end,
+ league=league,
+ )
+ )
+ await db.flush()
+
+
+async def _apply_xp_event(db: AsyncSession, artifact: dict, config: dict) -> dict:
+ expires_at = datetime.fromisoformat(artifact["expires_at"])
+ target = artifact.get("target", "all")
+ duration_hours = int(config.get("duration_hours", 24))
+ event_name = artifact.get("event_name", "XP Event")
+
+ # Find or create double_xp ShopItem
+ shop_item = await db.scalar(
+ select(ShopItem).where(
+ and_(
+ ShopItem.item_type == "double_xp",
+ ShopItem.is_available.is_(True),
+ )
+ )
+ )
+ if shop_item is None:
+ shop_item = ShopItem(
+ name=event_name,
+ description=f"System XP boost: {artifact.get('multiplier', 2.0)}x for {duration_hours}h",
+ item_type="double_xp",
+ price_gems=0,
+ effects={"duration_hours": duration_hours, "multiplier": artifact.get("multiplier", 2.0)},
+ is_available=True,
+ )
+ db.add(shop_item)
+ await db.flush()
+
+ # Determine target users from artifact sample + full query
+ from app.models.gamification import LeaderboardEntry
+ from app.models.user import User
+
+ query = select(User.id).where(User.is_active.is_(True))
+ if target.startswith("league:"):
+ league = target.split(":", 1)[1]
+ week_start, _ = LeaderboardCRUD.get_current_week_range()
+ query = query.where(
+ User.id.in_(
+ select(LeaderboardEntry.user_id).where(
+ and_(
+ LeaderboardEntry.week_start == week_start,
+ LeaderboardEntry.league == league,
+ )
+ )
+ )
+ )
+ elif target.startswith("cefr:"):
+ level = target.split(":", 1)[1].upper()
+ query = query.where(User.proficiency_level == level)
+
+ result = await db.execute(query)
+ user_ids = result.scalars().all()
+
+ granted = 0
+ now = datetime.now(UTC)
+ for uid in user_ids:
+ existing = await db.scalar(
+ select(UserInventory).where(
+ and_(
+ UserInventory.user_id == uid,
+ UserInventory.shop_item_id == shop_item.id,
+ UserInventory.is_active.is_(True),
+ )
+ )
+ )
+ if existing:
+ continue
+ db.add(
+ UserInventory(
+ user_id=uid,
+ shop_item_id=shop_item.id,
+ quantity=1,
+ is_active=True,
+ activated_at=now,
+ expires_at=expires_at,
+ purchased_at=now,
+ )
+ )
+ granted += 1
+
+ await db.flush()
+ return {
+ "shop_item_id": str(shop_item.id),
+ "granted_count": granted,
+ "expires_at": expires_at.isoformat(),
+ "target": target,
+ }
+
+
+async def _apply_achievement_batch(
+ db: AsyncSession, artifact: dict, config: dict
+) -> dict:
+ slugs: list[str] = config.get("achievement_slugs", [])
+ criteria: dict = config.get("criteria", {})
+
+ from app.services.ranking_agent.achievement_batch import AchievementBatchEngine
+
+ eligible_user_ids = await AchievementBatchEngine()._resolve_eligible_users(db, criteria)
+
+ ach_rows = await db.execute(
+ select(Achievement).where(Achievement.slug.in_(slugs))
+ )
+ achievements = ach_rows.scalars().all()
+
+ granted_records: list[str] = []
+ now = datetime.now(UTC)
+
+ for ach in achievements:
+ xp = ach.xp_reward or 0
+ gems = ach.gems_reward or 0
+
+ for uid in eligible_user_ids:
+ existing = await db.scalar(
+ select(UserAchievement).where(
+ and_(
+ UserAchievement.user_id == uid,
+ UserAchievement.achievement_id == ach.id,
+ )
+ )
+ )
+ if existing:
+ continue
+
+ ua = UserAchievement(
+ user_id=uid,
+ achievement_id=ach.id,
+ unlocked_at=now,
+ )
+ try:
+ async with db.begin_nested():
+ db.add(ua)
+ await db.flush()
+ granted_records.append(f"{uid}:{ach.slug}")
+
+ if gems > 0:
+ await WalletCRUD.add_gems(
+ db,
+ uid,
+ gems,
+ source="achievement_batch",
+ description=f"Achievement: {ach.name}",
+ commit=False,
+ )
+ except IntegrityError:
+ pass # race: already unlocked
+
+ return {
+ "granted_count": len(granted_records),
+ "achievement_slugs": slugs,
+ "eligible_user_count": len(eligible_user_ids),
+ }
diff --git a/backend-service/app/services/ranking_agent/league_reset.py b/backend-service/app/services/ranking_agent/league_reset.py
new file mode 100644
index 00000000..78497d36
--- /dev/null
+++ b/backend-service/app/services/ranking_agent/league_reset.py
@@ -0,0 +1,125 @@
+"""League reset engine — computes promotion/demotion preview for a given week."""
+
+from __future__ import annotations
+
+import math
+from datetime import UTC, datetime, timedelta
+
+from sqlalchemy import and_, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.gamification import LeaderboardEntry
+from app.models.user import User
+
+LEAGUE_ORDER = [
+ "bronze", "silver", "gold", "platinum", "sapphire", "ruby", "amethyst", "master"
+]
+
+
+def _week_range_for(week_start_iso: str | None) -> tuple[datetime, datetime]:
+ """Return (week_start, week_end) for the given ISO date or the last completed week."""
+ if week_start_iso:
+ week_start = datetime.fromisoformat(week_start_iso).replace(
+ hour=0, minute=0, second=0, microsecond=0, tzinfo=UTC
+ )
+ else:
+ now = datetime.now(UTC)
+ days_since_monday = now.weekday()
+ last_monday = now - timedelta(days=days_since_monday + 7)
+ week_start = last_monday.replace(hour=0, minute=0, second=0, microsecond=0)
+ week_end = week_start + timedelta(days=7)
+ return week_start, week_end
+
+
+def _next_league(current: str, direction: int) -> str:
+ idx = LEAGUE_ORDER.index(current)
+ new_idx = max(0, min(len(LEAGUE_ORDER) - 1, idx + direction))
+ return LEAGUE_ORDER[new_idx]
+
+
+class LeagueResetEngine:
+ def __init__(
+ self,
+ promotion_threshold: float = 0.10,
+ demotion_threshold: float = 0.10,
+ ) -> None:
+ self._promo_pct = promotion_threshold
+ self._demo_pct = demotion_threshold
+
+ async def calculate(
+ self,
+ db: AsyncSession,
+ config: dict,
+ ) -> dict:
+ week_start, week_end = _week_range_for(config.get("week_start"))
+
+ rows = await db.execute(
+ select(LeaderboardEntry, User.username, User.email)
+ .join(User, LeaderboardEntry.user_id == User.id)
+ .where(
+ and_(
+ LeaderboardEntry.week_start == week_start,
+ LeaderboardEntry.week_end == week_end,
+ )
+ )
+ .order_by(LeaderboardEntry.league, LeaderboardEntry.xp_earned.desc())
+ )
+ entries_with_users = rows.all()
+
+ by_league: dict[str, list[dict]] = {l: [] for l in LEAGUE_ORDER}
+ for entry, username, email in entries_with_users:
+ by_league[entry.league].append(
+ {
+ "user_id": str(entry.user_id),
+ "username": username or email.split("@")[0],
+ "league": entry.league,
+ "xp_earned": entry.xp_earned,
+ "lessons_completed": entry.lessons_completed,
+ "entry_id": str(entry.id),
+ }
+ )
+
+ promotions = []
+ demotions = []
+ unchanged_count = 0
+ league_summary: dict[str, dict] = {}
+
+ for league, members in by_league.items():
+ n = len(members)
+ league_summary[league] = {"total": n, "promoted": 0, "demoted": 0, "unchanged": 0}
+ if n == 0:
+ continue
+
+ promo_cut = math.ceil(n * self._promo_pct)
+ demo_cut = math.ceil(n * self._demo_pct)
+
+ for i, m in enumerate(members):
+ rank_pos = i + 1
+ if league != "master" and rank_pos <= promo_cut:
+ to_league = _next_league(league, +1)
+ promotions.append({**m, "to": to_league, "rank_pos": rank_pos})
+ league_summary[league]["promoted"] += 1
+ elif league != "bronze" and rank_pos > n - demo_cut:
+ to_league = _next_league(league, -1)
+ demotions.append({**m, "to": to_league, "rank_pos": rank_pos})
+ league_summary[league]["demoted"] += 1
+ else:
+ unchanged_count += 1
+ league_summary[league]["unchanged"] += 1
+
+ total = sum(d["total"] for d in league_summary.values())
+ week_label = (
+ f"{week_start.strftime('%Y-%m-%d')} → "
+ f"{(week_end - timedelta(seconds=1)).strftime('%Y-%m-%d')}"
+ )
+
+ return {
+ "week": week_label,
+ "week_start": week_start.isoformat(),
+ "week_end": week_end.isoformat(),
+ "total_participants": total,
+ "promotions": promotions,
+ "demotions": demotions,
+ "unchanged": unchanged_count,
+ "league_summary": league_summary,
+ }
diff --git a/backend-service/app/services/ranking_agent/xp_event.py b/backend-service/app/services/ranking_agent/xp_event.py
new file mode 100644
index 00000000..03b44bcb
--- /dev/null
+++ b/backend-service/app/services/ranking_agent/xp_event.py
@@ -0,0 +1,67 @@
+"""XP Event engine — computes preview for a system-wide XP boost grant."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime, timedelta
+
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.gamification import LeaderboardEntry, UserInventory
+from app.models.progress import Streak
+from app.models.user import User
+
+_VALID_LEAGUES = {"bronze", "silver", "gold", "platinum", "sapphire", "ruby", "amethyst", "master"}
+_VALID_CEFR = {"A1", "A2", "B1", "B2", "C1", "C2"}
+
+_SAMPLE_LIMIT = 10
+
+
+class XPEventEngine:
+ async def calculate(self, db: AsyncSession, config: dict) -> dict:
+ target: str = config.get("target", "all")
+ duration_hours: int = int(config.get("duration_hours", 24))
+ multiplier: float = float(config.get("multiplier", 2.0))
+ name: str = config.get("name", "XP Event")
+
+ query = select(User.id, User.username, User.email)
+
+ if target.startswith("league:"):
+ league = target.split(":", 1)[1]
+ from app.crud.gamification import LeaderboardCRUD
+ week_start, week_end = LeaderboardCRUD.get_current_week_range()
+ league_user_ids = select(LeaderboardEntry.user_id).where(
+ LeaderboardEntry.week_start == week_start,
+ LeaderboardEntry.league == league,
+ )
+ query = query.where(User.id.in_(league_user_ids))
+ elif target.startswith("cefr:"):
+ level = target.split(":", 1)[1].upper()
+ query = query.where(User.proficiency_level == level)
+
+ query = query.where(User.is_active.is_(True))
+
+ result = await db.execute(query)
+ users = result.all()
+ user_ids = [str(row[0]) for row in users]
+ count = len(user_ids)
+
+ sample = [
+ {"user_id": str(r[0]), "username": r[1] or r[2].split("@")[0]}
+ for r in users[:_SAMPLE_LIMIT]
+ ]
+
+ expires_at = datetime.now(UTC) + timedelta(hours=duration_hours)
+ estimated_xp_delta = count * 50 * (multiplier - 1)
+
+ return {
+ "event_name": name,
+ "target": target,
+ "multiplier": multiplier,
+ "duration_hours": duration_hours,
+ "expires_at": expires_at.isoformat(),
+ "target_user_count": count,
+ "sample_users": sample,
+ "estimated_total_xp_delta": f"+{estimated_xp_delta:,.0f} XP",
+ "item_type": "double_xp",
+ }
diff --git a/backend-service/app/services/ranking_agent_ai_client.py b/backend-service/app/services/ranking_agent_ai_client.py
new file mode 100644
index 00000000..e47bde54
--- /dev/null
+++ b/backend-service/app/services/ranking_agent_ai_client.py
@@ -0,0 +1,49 @@
+"""HTTP client for Ranking Agent AI insights via ai-service."""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Optional
+
+import httpx
+
+from app.core.config import settings
+
+logger = logging.getLogger(__name__)
+
+
+class RankingAgentAIClient:
+ def __init__(self) -> None:
+ self._base_url = settings.AI_SERVICE_URL.rstrip("/")
+ self._api_key = os.getenv("AI_ADMIN_API_KEY", "").strip()
+ self._timeout = httpx.Timeout(settings.RANKING_AGENT_AI_INSIGHTS_TIMEOUT_SECONDS)
+
+ async def get_insights(
+ self, job_type: str, artifact: dict[str, Any]
+ ) -> Optional[str]:
+ """
+ Call ai-service for Groq-generated insights.
+ Returns None on any error so the caller degrades gracefully.
+ """
+ if not self._api_key:
+ logger.warning("AI_ADMIN_API_KEY not set — skipping AI insights")
+ return None
+ try:
+ async with httpx.AsyncClient(timeout=self._timeout) as client:
+ response = await client.post(
+ f"{self._base_url}/internal/ranking-agent/insights",
+ headers={"X-Admin-Api-Key": self._api_key},
+ json={"job_type": job_type, "artifact": artifact},
+ )
+ if response.status_code != 200:
+ logger.warning(
+ "ranking_agent_ai_client: ai-service returned %d",
+ response.status_code,
+ )
+ return None
+ data = response.json()
+ return data.get("insight")
+ except Exception:
+ logger.exception("ranking_agent_ai_client: request to ai-service failed")
+ return None
diff --git a/backend-service/app/services/ranking_agent_jobs.py b/backend-service/app/services/ranking_agent_jobs.py
new file mode 100644
index 00000000..8999b588
--- /dev/null
+++ b/backend-service/app/services/ranking_agent_jobs.py
@@ -0,0 +1,154 @@
+"""Application service for durable Ranking/Gamification Agent jobs."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import UTC, datetime
+
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.ranking_agent import RankingAgentJob
+
+ACTIVE_STATUSES: frozenset[str] = frozenset(
+ {"queued", "calculating", "validating", "preview_ready", "applying"}
+)
+TERMINAL_STATUSES: frozenset[str] = frozenset({"completed", "failed", "cancelled"})
+
+ALLOWED_TRANSITIONS: dict[str, set[str]] = {
+ "queued": {"calculating", "cancelled", "failed"},
+ "calculating": {"validating", "cancelled", "failed"},
+ "validating": {"preview_ready", "cancelled", "failed"},
+ "preview_ready": {"applying", "cancelled", "queued", "failed"},
+ "applying": {"completed", "failed"},
+ "failed": {"queued"},
+ "cancelled": {"queued"},
+ "completed": set(),
+}
+
+
+def _utcnow() -> datetime:
+ return datetime.now(UTC)
+
+
+class RankingAgentJobService:
+ @staticmethod
+ async def create(
+ db: AsyncSession,
+ *,
+ requested_by_id: uuid.UUID,
+ job_type: str,
+ config: dict,
+ ) -> RankingAgentJob:
+ job = RankingAgentJob(
+ requested_by_id=requested_by_id,
+ job_type=job_type,
+ config=config,
+ progress={"stage": "queued", "percent": 0, "counters": {}},
+ )
+ db.add(job)
+ await db.flush()
+ return job
+
+ @staticmethod
+ async def get(
+ db: AsyncSession, job_id: uuid.UUID, *, lock: bool = False
+ ) -> RankingAgentJob | None:
+ query = select(RankingAgentJob).where(RankingAgentJob.id == job_id)
+ if lock:
+ query = query.with_for_update().execution_options(populate_existing=True)
+ return await db.scalar(query)
+
+ @staticmethod
+ async def count_active_by_requester(
+ db: AsyncSession, requester_id: uuid.UUID
+ ) -> int:
+ return await db.scalar(
+ select(func.count(RankingAgentJob.id)).where(
+ RankingAgentJob.requested_by_id == requester_id,
+ RankingAgentJob.status.in_(ACTIVE_STATUSES),
+ )
+ ) or 0
+
+ @staticmethod
+ async def list(
+ db: AsyncSession, *, limit: int = 50, offset: int = 0
+ ) -> list[RankingAgentJob]:
+ result = await db.execute(
+ select(RankingAgentJob)
+ .order_by(RankingAgentJob.created_at.desc())
+ .offset(offset)
+ .limit(limit)
+ )
+ return list(result.scalars().all())
+
+ @staticmethod
+ async def transition(
+ db: AsyncSession,
+ job: RankingAgentJob,
+ status: str,
+ *,
+ percent: int | None = None,
+ counters: dict | None = None,
+ ) -> RankingAgentJob:
+ if status != job.status and status not in ALLOWED_TRANSITIONS.get(job.status, set()):
+ raise ValueError(f"Invalid ranking-agent job transition: {job.status} -> {status}")
+ now = _utcnow()
+ if job.started_at is None and status not in {"queued", "cancelled"}:
+ job.started_at = now
+ job.status = status
+ progress = dict(job.progress or {})
+ progress["stage"] = status
+ if percent is not None:
+ progress["percent"] = max(0, min(percent, 100))
+ if counters is not None:
+ progress["counters"] = counters
+ job.progress = progress
+ job.updated_at = now
+ if status in TERMINAL_STATUSES:
+ job.completed_at = now
+ await db.flush()
+ return job
+
+ @staticmethod
+ async def set_preview(
+ db: AsyncSession,
+ job: RankingAgentJob,
+ *,
+ artifact: dict,
+ warnings: list[str],
+ blocking_errors: list[str],
+ ) -> RankingAgentJob:
+ job.artifact = artifact
+ job.warnings = warnings
+ job.blocking_errors = blocking_errors
+ return await RankingAgentJobService.transition(
+ db, job, "preview_ready", percent=100
+ )
+
+ @staticmethod
+ async def fail(
+ db: AsyncSession, job: RankingAgentJob, message: str
+ ) -> RankingAgentJob:
+ job.error_message = message[:2000]
+ if job.status in TERMINAL_STATUSES:
+ return job
+ return await RankingAgentJobService.transition(db, job, "failed")
+
+ @staticmethod
+ async def cancel(db: AsyncSession, job: RankingAgentJob) -> RankingAgentJob:
+ if job.status in TERMINAL_STATUSES:
+ raise ValueError(f"Cannot cancel a {job.status} job")
+ return await RankingAgentJobService.transition(db, job, "cancelled")
+
+ @staticmethod
+ async def retry(db: AsyncSession, job: RankingAgentJob) -> RankingAgentJob:
+ if job.status not in {"failed", "cancelled", "preview_ready"}:
+ raise ValueError(f"Cannot retry a {job.status} job")
+ job.error_message = None
+ job.blocking_errors = []
+ job.warnings = []
+ job.artifact = None
+ job.completed_at = None
+ job.progress = {"stage": "queued", "percent": 0, "counters": {}}
+ return await RankingAgentJobService.transition(db, job, "queued", percent=0)
diff --git a/backend-service/app/services/streak_service.py b/backend-service/app/services/streak_service.py
new file mode 100644
index 00000000..53a28bb7
--- /dev/null
+++ b/backend-service/app/services/streak_service.py
@@ -0,0 +1,135 @@
+import logging
+from datetime import date, timedelta
+from uuid import UUID
+from sqlalchemy import select, and_
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.progress import Streak, DailyActivity
+from app.core.cache import build_cache_key, delete_cached
+from app.services import check_achievements_for_user
+
+logger = logging.getLogger(__name__)
+
+async def update_user_streak(db: AsyncSession, user_id: UUID) -> tuple[Streak, bool, bool, list]:
+ """
+ Unified function to update user's streak.
+ Handles:
+ - Creating streak record if first time
+ - Incrementing streak for consecutive days
+ - Using streak freeze if available when a gap is 1 day
+ - Resetting streak if gap > 1 day, saving current streak to previous_streak
+ - Ensuring DailyActivity record exists
+ - Checking streak-based achievements
+ - Invalidating redis cache
+
+ Returns:
+ - (streak, streak_increased, streak_saved, unlocked_achievements)
+ """
+ result = await db.execute(
+ select(Streak).where(Streak.user_id == user_id)
+ )
+ streak = result.scalar_one_or_none()
+
+ today = date.today()
+ streak_increased = False
+ streak_saved = False
+
+ if not streak:
+ # Create new streak
+ streak = Streak(
+ user_id=user_id,
+ current_streak=1,
+ longest_streak=1,
+ last_activity_date=today,
+ total_days_active=1,
+ freeze_count=0,
+ previous_streak=0,
+ restores_used_this_month=0
+ )
+ db.add(streak)
+ streak_increased = True
+ else:
+ last_date = streak.last_activity_date
+
+ if last_date == today:
+ # Already active today, no change
+ pass
+ elif last_date == today - timedelta(days=1):
+ # Consecutive day - increment streak
+ streak.current_streak += 1
+ streak.total_days_active += 1
+ streak.last_activity_date = today
+ streak_increased = True
+
+ if streak.current_streak > streak.longest_streak:
+ streak.longest_streak = streak.current_streak
+ elif last_date and last_date < today - timedelta(days=1):
+ # Gap in activity
+ days_missed = (today - last_date).days - 1
+
+ if getattr(streak, 'freeze_count', 0) > 0 and days_missed == 1:
+ # Use freeze to save streak
+ streak.freeze_count -= 1
+ streak.current_streak += 1
+ streak.total_days_active += 1
+ streak.last_activity_date = today
+ streak_saved = True
+ streak_increased = True
+
+ if streak.current_streak > streak.longest_streak:
+ streak.longest_streak = streak.current_streak
+ else:
+ # Reset streak: save current streak to previous_streak before resetting
+ streak.previous_streak = streak.current_streak
+ streak.current_streak = 1
+ streak.total_days_active += 1
+ streak.last_activity_date = today
+ streak_increased = True
+ else:
+ # First activity ever
+ streak.current_streak = 1
+ streak.total_days_active = 1
+ streak.last_activity_date = today
+ streak_increased = True
+
+ if streak.current_streak > streak.longest_streak:
+ streak.longest_streak = streak.current_streak
+
+ # Ensure a DailyActivity record exists for today so weekly_activity is accurate
+ daily_result = await db.execute(
+ select(DailyActivity).where(
+ and_(
+ DailyActivity.user_id == user_id,
+ DailyActivity.activity_date == today,
+ )
+ )
+ )
+ daily_activity = daily_result.scalar_one_or_none()
+ if not daily_activity:
+ daily_activity = DailyActivity(
+ user_id=user_id,
+ activity_date=today,
+ xp_earned=0,
+ lessons_completed=0,
+ study_time_minutes=0,
+ vocabulary_reviewed=0,
+ )
+ db.add(daily_activity)
+
+ await db.flush() # flush to DB before check_achievements
+
+ # Check streak-based achievements
+ unlocked_achievements = []
+ try:
+ unlocked_achievements = await check_achievements_for_user(
+ db, user_id, "streak_update"
+ )
+ except Exception as e:
+ logger.error("Achievement check error: %s", e, exc_info=True)
+
+ # Invalidate caches
+ uid_str = str(user_id)
+ await delete_cached(build_cache_key("progress_streak", user_id=uid_str))
+ await delete_cached(build_cache_key("progress_me", user_id=uid_str))
+
+ return streak, streak_increased, streak_saved, unlocked_achievements
diff --git a/backend-service/app/services/user_deletion_service.py b/backend-service/app/services/user_deletion_service.py
new file mode 100644
index 00000000..6ca38bfb
--- /dev/null
+++ b/backend-service/app/services/user_deletion_service.py
@@ -0,0 +1,82 @@
+"""
+User Deletion Service
+
+Shared GDPR-style hard delete used by both the user's own account
+deletion and the admin permanent-delete action.
+"""
+from sqlalchemy import delete as sa_delete
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.user import User, UserDevice, RefreshToken
+from app.models.progress import (
+ UserProgress, LessonAttempt, Streak, UserCourseProgress, LessonCompletion,
+ QuestionAttempt, UserVocabKnowledge, DailyReviewSession, DailyActivity,
+)
+from app.models.gamification import (
+ UserAchievement, UserWallet, WalletTransaction, LeaderboardEntry,
+ UserFollowing, ActivityFeed, UserInventory, ChallengeRewardClaim,
+)
+from app.models.vocabulary import UserVocabulary, VocabularyReview, VocabularyDeck
+from app.models.notification import Notification
+from app.models.proficiency import (
+ UserProficiencyProfile, UserSkillScore, UserLevelHistory,
+ ExerciseAttempt, LevelAssessmentTest,
+)
+from app.models.reminder import UserReminderPreference, ReminderDelivery
+from app.models.rbac import AuditLog
+from app.models.games import GameSession, XPTransaction
+from app.models.reward_grant import UserRewardGrant
+
+# Dependency order matters: children before parents
+_USER_ID_SCOPED_MODELS = (
+ ReminderDelivery,
+ UserReminderPreference,
+ Notification,
+ AuditLog,
+ ExerciseAttempt,
+ LevelAssessmentTest,
+ UserSkillScore,
+ UserLevelHistory,
+ UserProficiencyProfile,
+ ChallengeRewardClaim,
+ UserRewardGrant,
+ ActivityFeed,
+ UserInventory,
+ WalletTransaction,
+ LeaderboardEntry,
+ UserWallet,
+ UserAchievement,
+ XPTransaction,
+ GameSession,
+ VocabularyReview,
+ VocabularyDeck,
+ UserVocabulary,
+ DailyReviewSession,
+ UserVocabKnowledge,
+ QuestionAttempt,
+ DailyActivity,
+ LessonAttempt,
+ LessonCompletion,
+ UserCourseProgress,
+ UserProgress,
+ Streak,
+ RefreshToken,
+ UserDevice,
+)
+
+
+async def permanently_delete_user(db: AsyncSession, user: User) -> None:
+ """Cascade-delete all data owned by `user`, then the user row itself. Caller must commit."""
+ uid = user.id
+
+ for model in _USER_ID_SCOPED_MODELS:
+ await db.execute(sa_delete(model).where(model.user_id == uid))
+
+ # UserFollowing uses follower_id / following_id instead of user_id
+ await db.execute(
+ sa_delete(UserFollowing).where(
+ (UserFollowing.follower_id == uid) | (UserFollowing.following_id == uid)
+ )
+ )
+
+ await db.delete(user)
diff --git a/backend-service/app/services/user_stats_service.py b/backend-service/app/services/user_stats_service.py
new file mode 100644
index 00000000..67eaf797
--- /dev/null
+++ b/backend-service/app/services/user_stats_service.py
@@ -0,0 +1,171 @@
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+from sqlalchemy import func, select, delete as sa_delete
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.gamification import (
+ ActivityFeed, ChallengeRewardClaim, LeaderboardEntry,
+ UserAchievement, UserFollowing, UserInventory, UserWallet, WalletTransaction,
+)
+from app.models.games import GameSession, XPTransaction
+from app.models.notification import Notification
+from app.models.proficiency import (
+ ExerciseAttempt, LevelAssessmentTest, UserLevelHistory,
+ UserProficiencyProfile, UserSkillScore,
+)
+from app.models.progress import (
+ DailyActivity, DailyReviewSession, LessonAttempt, LessonCompletion,
+ QuestionAttempt, Streak, UserCourseProgress, UserProgress, UserVocabKnowledge,
+)
+from app.models.rbac import AuditLog
+from app.models.reminder import ReminderDelivery, UserReminderPreference
+from app.models.reward_grant import UserRewardGrant
+from app.models.user import User, RefreshToken, UserDevice
+from app.models.vocabulary import UserVocabulary, VocabularyDeck, VocabularyReview
+from app.schemas.level import (
+ UserStatsResponse, WeeklyActivityData, WeeklyActivityResponse,
+)
+from app.services.level_service import LevelService
+
+
+async def get_user_stats(db: AsyncSession, user: User) -> UserStatsResponse:
+ level_status = LevelService.calculate_level_status(user.total_xp)
+
+ courses_enrolled = (await db.execute(
+ select(func.count(UserCourseProgress.id)).where(UserCourseProgress.user_id == user.id)
+ )).scalar() or 0
+
+ courses_completed = (await db.execute(
+ select(func.count(UserCourseProgress.id)).where(
+ UserCourseProgress.user_id == user.id,
+ UserCourseProgress.progress_percentage >= 100,
+ )
+ )).scalar() or 0
+
+ lessons_completed = (await db.execute(
+ select(func.count(LessonCompletion.id)).where(
+ LessonCompletion.user_id == user.id,
+ LessonCompletion.is_passed == True,
+ )
+ )).scalar() or 0
+
+ raw_time = (await db.execute(
+ select(func.sum(LessonAttempt.time_spent_ms)).where(LessonAttempt.user_id == user.id)
+ )).scalar() or 0
+ total_study_time = int(raw_time / 60000) if raw_time else 0
+
+ streak = (await db.execute(
+ select(Streak).where(Streak.user_id == user.id)
+ )).scalar_one_or_none()
+ current_streak = streak.current_streak if streak else 0
+ longest_streak = streak.longest_streak if streak else 0
+
+ words_learned = (await db.execute(
+ select(func.count(UserVocabulary.id)).where(UserVocabulary.user_id == user.id)
+ )).scalar() or 0
+
+ words_mastered = (await db.execute(
+ select(func.count(UserVocabulary.id)).where(
+ UserVocabulary.user_id == user.id,
+ UserVocabulary.status == "mastered",
+ )
+ )).scalar() or 0
+
+ achievements_unlocked = (await db.execute(
+ select(func.count(UserAchievement.id)).where(UserAchievement.user_id == user.id)
+ )).scalar() or 0
+
+ wallet = (await db.execute(
+ select(UserWallet).where(UserWallet.user_id == user.id)
+ )).scalar_one_or_none()
+ total_gems = wallet.gems if wallet else 0
+
+ return UserStatsResponse(
+ total_xp=user.total_xp,
+ level=level_status,
+ courses_enrolled=courses_enrolled,
+ courses_completed=courses_completed,
+ lessons_completed=lessons_completed,
+ total_study_time=total_study_time,
+ current_streak=current_streak,
+ longest_streak=longest_streak,
+ words_learned=words_learned,
+ words_mastered=words_mastered,
+ achievements_unlocked=achievements_unlocked,
+ total_gems=total_gems,
+ )
+
+
+async def get_weekly_activity(db: AsyncSession, user: User) -> WeeklyActivityResponse:
+ today = datetime.now(timezone.utc).date()
+ week_ago = today - timedelta(days=6)
+ day_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+
+ week_data: list[WeeklyActivityData] = []
+ total_xp = total_lessons = total_study_time = 0
+
+ for i in range(7):
+ day_date = week_ago + timedelta(days=i)
+ day_start = datetime.combine(day_date, datetime.min.time())
+ day_end = datetime.combine(day_date, datetime.max.time())
+
+ row = (await db.execute(
+ select(
+ func.count(LessonAttempt.id).label("count"),
+ func.coalesce(func.sum(LessonAttempt.xp_earned), 0).label("xp"),
+ func.coalesce(func.sum(LessonAttempt.time_spent_ms), 0).label("time"),
+ ).where(
+ LessonAttempt.user_id == user.id,
+ LessonAttempt.finished_at >= day_start,
+ LessonAttempt.finished_at <= day_end,
+ LessonAttempt.passed == True,
+ )
+ )).first()
+
+ day_lessons = int(row.count) if row and row.count else 0
+ day_xp = int(row.xp) if row and row.xp else 0
+ day_time = int(row.time / 60000) if row and row.time else 0
+
+ week_data.append(WeeklyActivityData(
+ day=day_names[day_date.weekday()],
+ xp=day_xp,
+ lessons=day_lessons,
+ study_time=day_time,
+ ))
+ total_xp += day_xp
+ total_lessons += day_lessons
+ total_study_time += day_time
+
+ return WeeklyActivityResponse(
+ week_data=week_data,
+ total_xp=total_xp,
+ total_lessons=total_lessons,
+ total_study_time=total_study_time,
+ )
+
+
+_DELETE_ORDER = (
+ ReminderDelivery, UserReminderPreference, Notification, AuditLog,
+ ExerciseAttempt, LevelAssessmentTest, UserSkillScore, UserLevelHistory,
+ UserProficiencyProfile, ChallengeRewardClaim, UserRewardGrant,
+ ActivityFeed, UserInventory, WalletTransaction, LeaderboardEntry,
+ UserWallet, UserAchievement, XPTransaction, GameSession,
+ VocabularyReview, VocabularyDeck, UserVocabulary, DailyReviewSession,
+ UserVocabKnowledge, QuestionAttempt, DailyActivity, LessonAttempt,
+ LessonCompletion, UserCourseProgress, UserProgress, Streak,
+ RefreshToken, UserDevice,
+)
+
+
+async def delete_user_permanently(db: AsyncSession, user: User) -> None:
+ uid = user.id
+ for model in _DELETE_ORDER:
+ await db.execute(sa_delete(model).where(model.user_id == uid))
+ await db.execute(
+ sa_delete(UserFollowing).where(
+ (UserFollowing.follower_id == uid) | (UserFollowing.following_id == uid)
+ )
+ )
+ await db.delete(user)
+ await db.commit()
diff --git a/backend-service/app/services/vocabulary_catalog.py b/backend-service/app/services/vocabulary_catalog.py
new file mode 100644
index 00000000..de0ddd95
--- /dev/null
+++ b/backend-service/app/services/vocabulary_catalog.py
@@ -0,0 +1,186 @@
+"""Concurrency-safe vocabulary upsert for the content-agent ETL pipeline.
+
+Design:
+* Never loads the full vocabulary table — only queries keys present in the
+ current artifact.
+* Uses SELECT … FOR UPDATE to lock matching rows before any write, preventing
+ lost-update races between concurrent apply jobs.
+* INSERT … ON CONFLICT DO NOTHING handles duplicate inserts from concurrent
+ transactions; a follow-up SELECT re-fetches rows inserted by others.
+* Curated fields (non-blank definition, non-null translation, pronunciation,
+ audio_url) are never overwritten by content-agent data.
+"""
+
+from __future__ import annotations
+
+import re
+import unicodedata
+import uuid
+from typing import Any
+
+from sqlalchemy import select, text
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.models.vocabulary import VocabularyItem
+
+
+def normalize_word(word: str) -> str:
+ """Canonical form for matching: NFKC → casefold → strip apostrophes/hyphens → collapse whitespace."""
+ normalized = unicodedata.normalize("NFKC", word)
+ normalized = (
+ normalized.casefold()
+ .replace("’", "'") # right single quotation mark → apostrophe
+ .replace("‘", "'") # left single quotation mark → apostrophe
+ .replace("–", "-") # en dash → hyphen
+ .replace("—", "-") # em dash → hyphen
+ .replace("‑", "-") # non-breaking hyphen → hyphen
+ )
+ normalized = re.sub(r"\s+", " ", normalized).strip()
+ return normalized
+
+
+async def upsert_vocabulary_batch(
+ session: AsyncSession,
+ vocab_items: list[dict[str, Any]],
+) -> dict[tuple[str, str], uuid.UUID]:
+ """Batch-upsert vocabulary items and return a stable ``(word_key, pos) → id`` map.
+
+ Parameters
+ ----------
+ session:
+ Active async SQLAlchemy session (must be inside an open transaction).
+ vocab_items:
+ Each dict must have ``word`` (raw), ``part_of_speech``, ``definition``,
+ and optionally ``translation``, ``pronunciation``, ``audio_url``,
+ ``difficulty_level``, ``topic``, ``source_name``.
+
+ Returns
+ -------
+ dict mapping ``(normalized_word, part_of_speech)`` → ``vocabulary_item.id``.
+ """
+ if not vocab_items:
+ return {}
+
+ # --- Step 1: Build canonical key set (de-duplicate within batch) ----------
+ seen: dict[tuple[str, str], dict[str, Any]] = {}
+ for item in vocab_items:
+ key = (normalize_word(item["word"]), item["part_of_speech"])
+ if key not in seen:
+ seen[key] = item
+
+ keys = list(seen.keys())
+ norm_words = [k[0] for k in keys]
+ pos_values = [k[1] for k in keys]
+
+ # --- Step 2: Lock existing rows (FOR UPDATE) so concurrent writers wait ---
+ # SQLite doesn't support FOR UPDATE; skip locking for test environments.
+ dialect = session.bind.dialect.name if session.bind else "postgresql" # type: ignore[union-attr]
+
+ if dialect == "postgresql":
+ lock_q = (
+ select(VocabularyItem)
+ .where(
+ VocabularyItem.word.in_(norm_words),
+ VocabularyItem.part_of_speech.in_(pos_values),
+ )
+ .with_for_update()
+ .execution_options(populate_existing=True)
+ )
+ else:
+ lock_q = select(VocabularyItem).where(
+ VocabularyItem.word.in_(norm_words),
+ VocabularyItem.part_of_speech.in_(pos_values),
+ )
+
+ existing_rows = (await session.scalars(lock_q)).all()
+ existing: dict[tuple[str, str], VocabularyItem] = {
+ (normalize_word(row.word), row.part_of_speech): row for row in existing_rows
+ }
+
+ # --- Step 3: Update curated fields on existing rows (never overwrite non-blank) ---
+ for key, item in seen.items():
+ row = existing.get(key)
+ if row is None:
+ continue
+ if not (row.definition or "").strip():
+ row.definition = item.get("definition") or ""
+ if row.translation is None:
+ translation = item.get("translation")
+ if translation:
+ row.translation = translation
+ if not row.pronunciation:
+ row.pronunciation = item.get("pronunciation")
+ if not row.audio_url:
+ row.audio_url = item.get("audio_url")
+ await session.flush()
+
+ # --- Step 4: Insert missing rows (ON CONFLICT DO NOTHING on PostgreSQL) ---
+ missing_keys = [k for k in keys if k not in existing]
+ if missing_keys:
+ if dialect == "postgresql":
+ rows_to_insert = []
+ for key in missing_keys:
+ item = seen[key]
+ rows_to_insert.append(
+ {
+ "id": uuid.uuid4(),
+ "word": key[0],
+ "definition": item.get("definition") or "",
+ "translation": item.get("translation"),
+ "pronunciation": item.get("pronunciation"),
+ "audio_url": item.get("audio_url"),
+ "part_of_speech": key[1],
+ "difficulty_level": item.get("difficulty_level") or "A1",
+ "tags": {
+ "source": ["content-agent", item.get("source_name", "generated")],
+ "topic": [item.get("topic", "general")],
+ },
+ }
+ )
+ stmt = pg_insert(VocabularyItem).values(rows_to_insert)
+ stmt = stmt.on_conflict_do_nothing(
+ index_elements=["word", "part_of_speech"]
+ )
+ await session.execute(stmt)
+ await session.flush()
+ else:
+ # SQLite fallback (tests): insert one by one, ignore IntegrityError
+ from sqlalchemy.exc import IntegrityError
+
+ for key in missing_keys:
+ item = seen[key]
+ new_row = VocabularyItem(
+ word=key[0],
+ definition=item.get("definition") or "",
+ translation=item.get("translation"),
+ pronunciation=item.get("pronunciation"),
+ audio_url=item.get("audio_url"),
+ part_of_speech=key[1],
+ difficulty_level=item.get("difficulty_level") or "A1",
+ tags={
+ "source": ["content-agent", item.get("source_name", "generated")],
+ "topic": [item.get("topic", "general")],
+ },
+ )
+ session.add(new_row)
+ try:
+ await session.flush()
+ except IntegrityError:
+ await session.rollback()
+
+ # --- Step 5: Re-select to get stable IDs for all keys --------------------
+ final_rows = (
+ await session.scalars(
+ select(VocabularyItem).where(
+ VocabularyItem.word.in_(norm_words),
+ VocabularyItem.part_of_speech.in_(pos_values),
+ )
+ )
+ ).all()
+
+ identity: dict[tuple[str, str], uuid.UUID] = {
+ (normalize_word(row.word), row.part_of_speech): row.id
+ for row in final_rows
+ }
+ return identity
diff --git a/backend-service/app/services/xp_service.py b/backend-service/app/services/xp_service.py
index 86fcb737..fb84eb2e 100644
--- a/backend-service/app/services/xp_service.py
+++ b/backend-service/app/services/xp_service.py
@@ -33,6 +33,9 @@
"lesson": 50,
"daily_challenge": 50,
}
+# Sources where the same activity must never be awarded twice.
+# source_id is required for these to enforce idempotency via the DB partial unique index.
+REPEAT_SENSITIVE_SOURCES: frozenset[str] = frozenset({"game", "lesson", "daily_challenge"})
@dataclass(frozen=True)
@@ -114,6 +117,11 @@ async def award_xp_transaction(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Unsupported XP source: {source}",
)
+ if source in REPEAT_SENSITIVE_SOURCES and not source_id:
+ raise HTTPException(
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+ detail=f"source_id is required for source '{source}' to prevent duplicate awards.",
+ )
if base_xp < 0 or base_xp > MAX_SINGLE_AWARD:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
diff --git a/backend-service/app/tasks/content_agent.py b/backend-service/app/tasks/content_agent.py
new file mode 100644
index 00000000..3d70196d
--- /dev/null
+++ b/backend-service/app/tasks/content_agent.py
@@ -0,0 +1,320 @@
+"""Celery orchestration for CEFR course generation."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import uuid
+from collections.abc import Awaitable, Callable
+from datetime import UTC, datetime
+from typing import TypeVar
+
+import httpx
+from pydantic import ValidationError
+from sqlalchemy import delete
+
+from app.core.celery_app import celery_app
+from app.core.config import settings
+from app.core.database import AsyncSessionLocal
+from app.models.content_agent import ContentAgentUpload
+from app.schemas.content_agent import ContentAgentArtifact
+from app.services.content_agent_apply import ContentAgentApplyService
+from app.services.content_agent_client import ContentAgentClient
+from app.services.content_agent_jobs import ContentAgentJobService
+from app.services.content_agent_sources import (
+ SourceResolutionError,
+ get_source_catalog,
+ resolve_snapshots,
+)
+
+logger = logging.getLogger(__name__)
+T = TypeVar("T")
+
+
+class JobCancelled(Exception):
+ pass
+
+
+@celery_app.task(name="app.tasks.content_agent.run_content_agent")
+def run_content_agent(job_id: str) -> dict:
+ return asyncio.run(_run_content_agent(uuid.UUID(job_id)))
+
+
+async def _locked_active_job(db, job_id: uuid.UUID):
+ job = await ContentAgentJobService.get(db, job_id, lock=True)
+ if job is None:
+ raise LookupError(f"Content-agent job {job_id} does not exist")
+ if job.status == "cancelled":
+ raise JobCancelled
+ return job
+
+
+def _is_transient_ai_error(exc: Exception) -> bool:
+ if isinstance(exc, (httpx.TimeoutException, httpx.TransportError)):
+ return True
+ return (
+ isinstance(exc, httpx.HTTPStatusError)
+ and exc.response.status_code in {408, 429, 500, 502, 503, 504}
+ )
+
+
+async def _with_transient_retry(
+ operation: Callable[[], Awaitable[T]],
+ *,
+ attempts: int = 3,
+) -> T:
+ for attempt in range(attempts):
+ try:
+ return await operation()
+ except Exception as exc:
+ if attempt == attempts - 1 or not _is_transient_ai_error(exc):
+ raise
+ await asyncio.sleep(0.25 * (2**attempt))
+ raise RuntimeError("unreachable")
+
+
+def _public_error_message(exc: Exception) -> str:
+ if isinstance(exc, ValidationError):
+ return "Generated content failed schema validation"
+ if isinstance(exc, (httpx.TimeoutException, httpx.TransportError)):
+ return "AI content service is temporarily unavailable"
+ if isinstance(exc, httpx.HTTPStatusError):
+ return f"AI content service request failed with status {exc.response.status_code}"
+ if isinstance(exc, SourceResolutionError):
+ return "Source resolution failed — check that all requested sources are approved and active"
+ if isinstance(exc, ValueError):
+ return "Content-agent input or generated content was invalid"
+ return "Content-agent generation failed"
+
+
+async def _ingest_batches(
+ client: ContentAgentClient,
+ job_id: uuid.UUID,
+ records: list[dict],
+ *,
+ batch_size: int = 1000,
+) -> None:
+ for start in range(0, len(records), batch_size):
+ batch = records[start : start + batch_size]
+ await _with_transient_retry(
+ lambda batch=batch: client.ingest_records(job_id, batch)
+ )
+
+
+async def _attach_pinned_snapshots(
+ client: ContentAgentClient,
+ job_id: uuid.UUID,
+ pinned_snapshots: list[dict],
+) -> None:
+ if not pinned_snapshots:
+ return
+ await _with_transient_retry(
+ lambda: client.attach_snapshots(job_id, pinned_snapshots)
+ )
+
+
+async def _run_content_agent(job_id: uuid.UUID) -> dict:
+ async with AsyncSessionLocal() as db:
+ job = await ContentAgentJobService.get(db, job_id, lock=True)
+ if job is None:
+ raise LookupError(f"Content-agent job {job_id} does not exist")
+ if not settings.CONTENT_AGENT_ENABLED:
+ await ContentAgentJobService.fail(db, job, "Content agent is disabled")
+ await db.commit()
+ return {"job_id": str(job_id), "status": "failed"}
+ if job.status != "queued":
+ return {"job_id": str(job_id), "status": job.status}
+
+ client: ContentAgentClient | None = None
+ try:
+ client = ContentAgentClient()
+
+ # -------------------------------------------------------------------
+ # Stage: resolving_sources — pin snapshot IDs at job creation time.
+ # On retry, reuse the snapshots already stored in config so results
+ # are deterministic even if the catalog changes between attempts.
+ # -------------------------------------------------------------------
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.transition(
+ db, job, "resolving_sources", percent=5
+ )
+ await db.commit()
+
+ sources: list[str] = job.config.get("sources", [])
+ pinned_snapshots: list[dict] = list(
+ job.config.get("pinned_snapshots", [])
+ )
+ if not pinned_snapshots:
+ catalog = await _with_transient_retry(
+ lambda: get_source_catalog(client)
+ )
+ pinned_snapshots = resolve_snapshots(sources, catalog)
+ async with db.begin_nested():
+ job = await _locked_active_job(db, job_id)
+ config = dict(job.config)
+ config["pinned_snapshots"] = pinned_snapshots
+ job.config = config
+ await db.flush()
+ await db.commit()
+
+ # -------------------------------------------------------------------
+ # Stage: loading_snapshots — signal AI service to load pinned data
+ # -------------------------------------------------------------------
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.transition(
+ db, job, "loading_snapshots", percent=10
+ )
+ await db.commit()
+
+ await _attach_pinned_snapshots(
+ client,
+ job.id,
+ pinned_snapshots,
+ )
+
+ # -------------------------------------------------------------------
+ # Stage: normalizing_upload — ingest admin upload if present
+ # -------------------------------------------------------------------
+ records: list[dict] = []
+ if job.upload_id is not None and "admin_upload" in sources:
+ upload = await db.get(ContentAgentUpload, job.upload_id)
+ if upload is None:
+ raise ValueError("Referenced upload no longer exists")
+ if upload.expires_at <= datetime.now(UTC):
+ raise ValueError("Referenced upload has expired")
+ if not upload.rights_confirmed:
+ raise ValueError(
+ "Upload rights attestation is required before use in a job"
+ )
+ records = [
+ {
+ **record,
+ "raw_checksum": upload.checksum,
+ "source_version": record.get(
+ "source_version",
+ "job-upload-v1",
+ ),
+ "source_record_id": record.get("source_record_id")
+ or record.get("record_id"),
+ "license_id": "LicenseRef-Admin-Owned",
+ "license_url": (
+ "https://lexilingo.me/legal/content-upload-rights"
+ ),
+ "attribution_text": (
+ "Administrator-owned or licensed upload"
+ ),
+ }
+ for record in upload.records
+ ]
+
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.transition(
+ db, job, "normalizing_upload", percent=15
+ )
+ await db.commit()
+
+ if records:
+ await _ingest_batches(client, job.id, records)
+
+ # -------------------------------------------------------------------
+ # Stages: classifying → planning → generating
+ # -------------------------------------------------------------------
+ stages = [
+ ("classifying", 35),
+ ("planning", 55),
+ ("generating", 70),
+ ]
+ for stage, percent in stages:
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.transition(
+ db,
+ job,
+ stage,
+ percent=percent,
+ counters={"input_records": len(records)},
+ )
+ await db.commit()
+
+ try:
+ artifact_payload = await _with_transient_retry(
+ lambda: client.generate(job.id, dict(job.config))
+ )
+ except httpx.HTTPStatusError as exc:
+ if exc.response.status_code != 404 or not records:
+ raise
+ await _ingest_batches(client, job.id, records)
+ artifact_payload = await _with_transient_retry(
+ lambda: client.generate(job.id, dict(job.config))
+ )
+
+ # -------------------------------------------------------------------
+ # Stage: validating
+ # -------------------------------------------------------------------
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.transition(db, job, "validating", percent=90)
+ await db.commit()
+
+ artifact = ContentAgentArtifact.model_validate(artifact_payload)
+ job = await _locked_active_job(db, job_id)
+ await ContentAgentJobService.set_preview(
+ db,
+ job,
+ artifact=artifact.model_dump(mode="json"),
+ source_manifest=[
+ item.model_dump(mode="json")
+ for item in artifact.source_manifest
+ ],
+ warnings=artifact.quality.warnings,
+ blocking_errors=artifact.quality.blocking_errors,
+ )
+ await db.commit()
+
+ # apply_on_success is honoured but preview blocking is the default;
+ # admin must explicitly call /apply unless this flag is set.
+ if job.config.get("apply_on_success") and not job.blocking_errors:
+ await ContentAgentApplyService.apply(db, job.id)
+ await db.commit()
+
+ logger.info("Content-agent job %s reached %s", job.id, job.status)
+ return {"job_id": str(job.id), "status": job.status}
+ except JobCancelled:
+ await db.rollback()
+ return {"job_id": str(job_id), "status": "cancelled"}
+ except Exception as exc:
+ await db.rollback()
+ job = await ContentAgentJobService.get(db, job_id, lock=True)
+ if job is not None and job.status != "cancelled":
+ await ContentAgentJobService.fail(
+ db, job, _public_error_message(exc)
+ )
+ await db.commit()
+ logger.exception("Content-agent job %s failed", job_id)
+ raise
+ finally:
+ if client is not None:
+ try:
+ await client.delete_context(job_id)
+ except Exception:
+ logger.warning(
+ "Could not delete AI content-agent context for %s",
+ job_id,
+ exc_info=True,
+ )
+
+
+@celery_app.task(
+ name="app.tasks.content_agent.cleanup_expired_content_agent_uploads"
+)
+def cleanup_expired_content_agent_uploads() -> int:
+ return asyncio.run(_cleanup_expired_content_agent_uploads())
+
+
+async def _cleanup_expired_content_agent_uploads() -> int:
+ async with AsyncSessionLocal() as db:
+ result = await db.execute(
+ delete(ContentAgentUpload).where(
+ ContentAgentUpload.expires_at <= datetime.now(UTC)
+ )
+ )
+ await db.commit()
+ return result.rowcount or 0
diff --git a/backend-service/app/tasks/content_prefetch.py b/backend-service/app/tasks/content_prefetch.py
index b0179e80..1e8c187b 100644
--- a/backend-service/app/tasks/content_prefetch.py
+++ b/backend-service/app/tasks/content_prefetch.py
@@ -188,3 +188,59 @@ async def _fetch_rss_feed(feed_url: str) -> list[dict]:
from app.routes.podcasts import _fetch_rss_episodes
res = await _fetch_rss_episodes(feed_url=feed_url, limit=20)
return res.get("episodes", [])
+
+
+# ──────────────────────────────────────────────────────────
+# Word of the Day Notification
+# ──────────────────────────────────────────────────────────
+
+async def send_word_of_day_notification(db: AsyncSession) -> dict:
+ """Send the daily Word of the Day push notification to all users at 8:00 AM UTC."""
+ from sqlalchemy import func, select as sa_select
+ from app.models.vocabulary import VocabularyItem as VocabModel
+ from app.models.user import UserDevice
+ from app.services.push_notification_service import PushNotificationService
+
+ count_result = await db.execute(sa_select(func.count()).select_from(VocabModel))
+ total = count_result.scalar_one()
+ if total == 0:
+ logger.warning("Word-of-day task: no vocabulary items found, skipping push")
+ return {"sent": 0, "skipped": 0}
+
+ offset = date.today().toordinal() % total
+ item_result = await db.execute(
+ sa_select(VocabModel)
+ .order_by(VocabModel.created_at, VocabModel.id)
+ .offset(offset)
+ .limit(1)
+ )
+ vocab = item_result.scalar_one_or_none()
+ if vocab is None:
+ logger.warning("Word-of-day task: could not select vocabulary item")
+ return {"sent": 0, "skipped": 0}
+
+ devices_result = await db.execute(
+ sa_select(UserDevice).where(UserDevice.fcm_token.isnot(None))
+ )
+ devices = devices_result.scalars().all()
+ tokens = [d.fcm_token for d in devices if d.fcm_token]
+
+ if not tokens:
+ logger.info("Word-of-day task: no FCM tokens registered, skipping push")
+ return {"sent": 0, "skipped": 0}
+
+ push = PushNotificationService()
+ ok = await push.send_word_of_day(
+ tokens=tokens,
+ word=vocab.word,
+ definition=vocab.definition,
+ )
+
+ result = {
+ "word": vocab.word,
+ "sent": len(tokens) if ok else 0,
+ "skipped": len(tokens) if not ok else 0,
+ "ts": date.today().isoformat(),
+ }
+ logger.info("Word-of-day notification: %s", result)
+ return result
diff --git a/backend-service/app/tasks/notification_campaign.py b/backend-service/app/tasks/notification_campaign.py
new file mode 100644
index 00000000..327a1a97
--- /dev/null
+++ b/backend-service/app/tasks/notification_campaign.py
@@ -0,0 +1,200 @@
+"""Celery orchestration for Notification Campaign Agent jobs."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import os
+import uuid
+from datetime import UTC, datetime
+
+import httpx
+
+from app.core.celery_app import celery_app
+from app.core.config import settings
+from app.core.database import AsyncSessionLocal
+
+logger = logging.getLogger(__name__)
+
+
+class JobCancelled(Exception):
+ pass
+
+
+@celery_app.task(name="app.tasks.notification_campaign.run_notification_campaign_job")
+def run_notification_campaign_job(job_id: str) -> dict:
+ return asyncio.run(_run_notification_campaign_job(uuid.UUID(job_id)))
+
+
+async def _run_notification_campaign_job(job_id: uuid.UUID) -> dict:
+ from app.services.notification_campaign.segmenter import segment_users
+ from app.services.notification_campaign_jobs import NotificationCampaignJobService
+
+ async with AsyncSessionLocal() as db:
+ job = await NotificationCampaignJobService.get(db, job_id, lock=True)
+ if job is None:
+ raise LookupError(f"Notification-campaign job {job_id} does not exist")
+ if job.status == "cancelled":
+ return {"job_id": str(job_id), "status": "cancelled"}
+ if job.status != "queued":
+ return {"job_id": str(job_id), "status": job.status}
+
+ cfg = job.config
+ use_ai_copy: bool = bool(cfg.get("content", {}).get("use_ai_copy", False))
+
+ try:
+ # Stage 1: segmenting
+ await NotificationCampaignJobService.transition(
+ db, job, "segmenting", percent=20, stage="segmenting"
+ )
+ await db.commit()
+
+ audience_cfg = cfg.get("audience", {})
+ filters = dict(audience_cfg.get("filters", {}))
+ # in_app_broadcast doesn't use FCM — don't filter by token availability
+ if job.job_type == "in_app_broadcast":
+ filters["has_fcm_token"] = False
+ segment = await segment_users(
+ db,
+ audience_type=audience_cfg.get("type", "all"),
+ filters=filters,
+ )
+
+ # Stage 2: generating (AI copy or passthrough)
+ job = await NotificationCampaignJobService.get(db, job_id, lock=True)
+ if job is None or job.status == "cancelled":
+ raise JobCancelled
+
+ await NotificationCampaignJobService.transition(
+ db, job, "generating", percent=50, stage="generating"
+ )
+ await db.commit()
+
+ ai_copy: dict | None = None
+ if use_ai_copy:
+ ai_copy = await _fetch_ai_copy(cfg, segment)
+
+ # Stage 3: validating
+ job = await NotificationCampaignJobService.get(db, job_id, lock=True)
+ if job is None or job.status == "cancelled":
+ raise JobCancelled
+
+ await NotificationCampaignJobService.transition(
+ db, job, "validating", percent=80, stage="validating"
+ )
+ await db.commit()
+
+ warnings: list[str] = []
+ blocking_errors: list[str] = []
+
+ if segment.audience_size == 0:
+ blocking_errors.append(
+ "No users match the selected audience filters. "
+ "Adjust the segment criteria before applying."
+ )
+
+ content_cfg = cfg.get("content", {})
+ fcm_eligible = len(segment.fcm_token_map)
+
+ if job.job_type in ("targeted_push", "scheduled_push") and fcm_eligible == 0:
+ blocking_errors.append(
+ "No users in the segment have FCM tokens registered. "
+ "Use 'In-App Broadcast' instead, or wait for users to register devices."
+ )
+
+ artifact = {
+ "audience_size": segment.audience_size,
+ "fcm_eligible": fcm_eligible,
+ "sample_users": segment.sample_users,
+ "filter_summary": segment.filter_summary,
+ "content_preview": {
+ "title": content_cfg.get("title", ""),
+ "body": content_cfg.get("body", ""),
+ },
+ }
+
+ if ai_copy:
+ artifact["ai_copy"] = ai_copy
+ artifact["content_preview"] = {
+ "title": ai_copy.get("title", content_cfg.get("title", "")),
+ "body": ai_copy.get("body", content_cfg.get("body", "")),
+ }
+
+ if job.job_type == "scheduled_push":
+ send_at = cfg.get("send_at")
+ if send_at:
+ artifact["scheduled_for"] = send_at
+
+ job = await NotificationCampaignJobService.get(db, job_id, lock=True)
+ if job is None or job.status == "cancelled":
+ raise JobCancelled
+
+ await NotificationCampaignJobService.set_preview(
+ db,
+ job,
+ artifact=artifact,
+ warnings=warnings,
+ blocking_errors=blocking_errors,
+ )
+ await db.commit()
+
+ logger.info("Notification-campaign job %s reached preview_ready", job_id)
+ return {"job_id": str(job_id), "status": "preview_ready"}
+
+ except JobCancelled:
+ await db.rollback()
+ logger.info("Notification-campaign job %s was cancelled", job_id)
+ # Safety guard: ensure `cancelled` is persisted even if the cancel route
+ # committed between two of our commits (leaving an intermediate status).
+ async with AsyncSessionLocal() as cancel_db:
+ cancel_job = await NotificationCampaignJobService.get(cancel_db, job_id, lock=True)
+ if cancel_job and cancel_job.status not in ("cancelled", "completed", "failed"):
+ cancel_job.status = "cancelled"
+ cancel_job.updated_at = datetime.now(UTC)
+ await cancel_db.commit()
+ return {"job_id": str(job_id), "status": "cancelled"}
+ except Exception as exc:
+ await db.rollback()
+ logger.exception("Notification-campaign job %s failed: %s", job_id, exc)
+ async with AsyncSessionLocal() as err_db:
+ err_job = await NotificationCampaignJobService.get(err_db, job_id, lock=True)
+ if err_job:
+ await NotificationCampaignJobService.set_failed(err_db, err_job, str(exc))
+ await err_db.commit()
+ return {"job_id": str(job_id), "status": "failed", "error": str(exc)}
+
+
+async def _fetch_ai_copy(cfg: dict, segment) -> dict | None:
+ """Call ai-service to generate personalized notification copy."""
+ ai_service_url = os.getenv("AI_SERVICE_URL", "http://ai-service:8001")
+ timeout = settings.NOTIFICATION_CAMPAIGN_AI_TIMEOUT_SECONDS
+ ai_admin_key = os.getenv("AI_ADMIN_API_KEY", "").strip()
+
+ content = cfg.get("content", {})
+ audience = cfg.get("audience", {})
+
+ payload = {
+ "title": content.get("title", ""),
+ "body": content.get("body", ""),
+ "notification_type": content.get("notification_type", "campaign"),
+ "audience_profile": {
+ "size": segment.audience_size,
+ "cefr_levels": audience.get("filters", {}).get("cefr_levels"),
+ "leagues": audience.get("filters", {}).get("leagues"),
+ "inactive_days": audience.get("filters", {}).get("inactive_days"),
+ },
+ }
+
+ try:
+ async with httpx.AsyncClient(timeout=timeout) as client:
+ resp = await client.post(
+ f"{ai_service_url}/api/notification-agent/generate-content",
+ headers={"X-Admin-Api-Key": ai_admin_key},
+ json=payload,
+ )
+ if resp.status_code == 200:
+ data = resp.json()
+ return data.get("best_variant")
+ except Exception as exc:
+ logger.warning("AI copy generation failed (skipping): %s", exc)
+ return None
diff --git a/backend-service/app/tasks/ranking_agent.py b/backend-service/app/tasks/ranking_agent.py
new file mode 100644
index 00000000..f0160ef4
--- /dev/null
+++ b/backend-service/app/tasks/ranking_agent.py
@@ -0,0 +1,146 @@
+"""Celery orchestration for Ranking/Gamification Agent jobs."""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import uuid
+
+from app.core.celery_app import celery_app
+from app.core.database import AsyncSessionLocal
+
+logger = logging.getLogger(__name__)
+
+
+class JobCancelled(Exception):
+ pass
+
+
+@celery_app.task(name="app.tasks.ranking_agent.run_ranking_agent_job")
+def run_ranking_agent_job(job_id: str) -> dict:
+ return asyncio.run(_run_ranking_agent_job(uuid.UUID(job_id)))
+
+
+@celery_app.task(name="app.tasks.ranking_agent.auto_league_reset")
+def auto_league_reset() -> dict:
+ return asyncio.run(_auto_league_reset())
+
+
+async def _run_ranking_agent_job(job_id: uuid.UUID) -> dict:
+ from app.services.ranking_agent_jobs import RankingAgentJobService
+
+ async with AsyncSessionLocal() as db:
+ job = await RankingAgentJobService.get(db, job_id, lock=True)
+ if job is None:
+ raise LookupError(f"Ranking-agent job {job_id} does not exist")
+ if job.status == "cancelled":
+ return {"job_id": str(job_id), "status": "cancelled"}
+ if job.status != "queued":
+ return {"job_id": str(job_id), "status": job.status}
+
+ use_ai_insights: bool = bool(job.config.get("use_ai_insights", False))
+
+ try:
+ await RankingAgentJobService.transition(
+ db, job, "calculating", percent=20
+ )
+ await db.commit()
+
+ artifact = await _calculate_artifact(db, job.job_type, job.config)
+
+ job = await RankingAgentJobService.get(db, job_id, lock=True)
+ if job is None or job.status == "cancelled":
+ raise JobCancelled
+
+ await RankingAgentJobService.transition(
+ db, job, "validating", percent=80
+ )
+ await db.commit()
+
+ if use_ai_insights:
+ ai_insight = await _fetch_ai_insights(job.job_type, artifact)
+ if ai_insight:
+ artifact["ai_insights"] = ai_insight
+
+ blocking_errors = artifact.pop("blocking_errors", [])
+ warnings = artifact.pop("warnings", [])
+
+ job = await RankingAgentJobService.get(db, job_id, lock=True)
+ await RankingAgentJobService.set_preview(
+ db,
+ job,
+ artifact=artifact,
+ warnings=warnings,
+ blocking_errors=blocking_errors,
+ )
+ await db.commit()
+
+ logger.info("Ranking-agent job %s reached preview_ready", job_id)
+ return {"job_id": str(job_id), "status": "preview_ready"}
+
+ except JobCancelled:
+ await db.rollback()
+ return {"job_id": str(job_id), "status": "cancelled"}
+ except Exception as exc:
+ await db.rollback()
+ job = await RankingAgentJobService.get(db, job_id)
+ if job is not None and job.status not in {"cancelled", "completed"}:
+ await RankingAgentJobService.fail(db, job, str(exc)[:2000])
+ await db.commit()
+ logger.exception("Ranking-agent job %s failed", job_id)
+ raise
+
+
+async def _fetch_ai_insights(job_type: str, artifact: dict) -> str | None:
+ """Call ai-service for Groq insights; never raises — returns None on any failure."""
+ try:
+ from app.services.ranking_agent_ai_client import RankingAgentAIClient
+ return await RankingAgentAIClient().get_insights(job_type, artifact)
+ except Exception:
+ logger.exception("Could not fetch AI insights for ranking-agent job (non-fatal)")
+ return None
+
+
+async def _calculate_artifact(db, job_type: str, config: dict) -> dict:
+ if job_type == "league_reset":
+ from app.services.ranking_agent.league_reset import LeagueResetEngine
+ from app.core.config import settings
+ engine = LeagueResetEngine(
+ promotion_threshold=settings.LEAGUE_RESET_PROMOTION_THRESHOLD,
+ demotion_threshold=settings.LEAGUE_RESET_DEMOTION_THRESHOLD,
+ )
+ return await engine.calculate(db, config)
+
+ if job_type == "xp_event":
+ from app.services.ranking_agent.xp_event import XPEventEngine
+ return await XPEventEngine().calculate(db, config)
+
+ if job_type == "achievement_batch":
+ from app.services.ranking_agent.achievement_batch import AchievementBatchEngine
+ return await AchievementBatchEngine().calculate(db, config)
+
+ raise ValueError(f"Unknown job_type: {job_type}")
+
+
+async def _auto_league_reset() -> dict:
+ """Create and enqueue a league_reset job automatically (called by Celery Beat)."""
+ from app.services.ranking_agent_jobs import RankingAgentJobService
+
+ async with AsyncSessionLocal() as db:
+ job = await RankingAgentJobService.create(
+ db,
+ requested_by_id=None,
+ job_type="league_reset",
+ config={"job_type": "league_reset"},
+ )
+ await db.commit()
+ job_id = job.id
+ try:
+ result = run_ranking_agent_job.delay(str(job_id))
+ job = await RankingAgentJobService.get(db, job_id)
+ job.celery_task_id = result.id
+ await db.commit()
+ except Exception:
+ logger.exception("Could not enqueue auto league-reset job %s", job_id)
+ raise
+ return {"job_id": str(job_id), "status": "queued"}
diff --git a/backend-service/app/tasks/word_of_day.py b/backend-service/app/tasks/word_of_day.py
new file mode 100644
index 00000000..b1dd228c
--- /dev/null
+++ b/backend-service/app/tasks/word_of_day.py
@@ -0,0 +1,20 @@
+"""Celery task: Word of the Day push notification at 08:00 UTC."""
+
+from __future__ import annotations
+
+import asyncio
+
+from app.core.celery_app import celery_app
+from app.core.database import AsyncSessionLocal
+
+
+@celery_app.task(name="app.tasks.word_of_day.send_word_of_day")
+def send_word_of_day() -> dict:
+ return asyncio.run(_run())
+
+
+async def _run() -> dict:
+ from app.tasks.content_prefetch import send_word_of_day_notification
+
+ async with AsyncSessionLocal() as db:
+ return await send_word_of_day_notification(db)
diff --git a/backend-service/app/test_vocab_definitions.py b/backend-service/app/test_vocab_definitions.py
new file mode 100644
index 00000000..0ceec92e
--- /dev/null
+++ b/backend-service/app/test_vocab_definitions.py
@@ -0,0 +1,18 @@
+import asyncio
+import sys
+sys.path.insert(0, "/app")
+from sqlalchemy import select
+from app.core.database import engine
+from app.models.vocabulary import VocabularyItem
+
+async def main():
+ async with engine.connect() as conn:
+ for w in ['idiom', 'lexicon']:
+ res = await conn.execute(select(VocabularyItem.word, VocabularyItem.definition, VocabularyItem.part_of_speech).where(VocabularyItem.word == w))
+ for row in res.all():
+ print(f"Word: {row[0]}, POS: {row[2]}")
+ print(f"Def: {row[1]}")
+ print("-" * 30)
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/backend-service/render.yaml b/backend-service/render.yaml
index 45914c26..56230e70 100644
--- a/backend-service/render.yaml
+++ b/backend-service/render.yaml
@@ -50,8 +50,10 @@ services:
sync: false
# CORS Origins
- - key: ENABLE_APP_CORS
- value: "false"
+ # GATEWAY_HANDLES_CORS is unset (defaults to false) — backend handles CORS
+ # directly on Render since there is no Nginx gateway in the request path.
+ # Set GATEWAY_HANDLES_CORS=true only if you add an Nginx/Kong gateway
+ # in front of this service (gateway must use proxy_hide_header to avoid duplicates).
- key: ALLOWED_ORIGINS
value: https://lexilingo.me,https://www.lexilingo.me,https://admin.lexilingo.me
- key: CORS_ALLOW_ORIGIN_REGEX
@@ -68,7 +70,30 @@ services:
- key: LOG_LEVEL
value: INFO
-
+
+ # Email (SMTP) — required for forgot-password / verify-email to actually send.
+ # Without these, EmailService logs a warning and skips sending, but the
+ # /forgot-password endpoint still returns success (by design, to avoid
+ # leaking which emails are registered).
+ - key: SMTP_HOST
+ value: smtp.gmail.com
+ - key: SMTP_PORT
+ value: "465"
+ - key: SMTP_USE_TLS
+ value: "false"
+ - key: SMTP_USE_SSL
+ value: "true"
+ - key: SMTP_USERNAME
+ sync: false # Nhập manual: Gmail address used to send
+ - key: SMTP_PASSWORD
+ sync: false # Nhập manual: Gmail App Password (not the account password)
+ - key: EMAIL_FROM
+ value: thefirestar312@gmail.com
+ - key: PASSWORD_RESET_URL_BASE_PRODUCTION
+ value: lexilingo-app://reset-password
+ - key: EMAIL_VERIFICATION_URL_BASE_PRODUCTION
+ value: https://lexilingo.me/verify-email
+
# Redis (optional - dùng Upstash free tier)
- key: REDIS_URL
sync: false # redis://default:pass@host:port
diff --git a/backend-service/requirements.txt b/backend-service/requirements.txt
index 3b1e32a7..47859cd3 100644
--- a/backend-service/requirements.txt
+++ b/backend-service/requirements.txt
@@ -1,5 +1,5 @@
# FastAPI and Server
-fastapi>=0.136.3
+fastapi>=0.138.0
uvicorn[standard]>=0.49.0
python-multipart>=0.0.32
@@ -11,12 +11,12 @@ psycopg2-binary>=2.9.12
aiosqlite>=0.22.1
# SQLAlchemy (Async ORM)
-sqlalchemy[asyncio]>=2.0.50
+sqlalchemy[asyncio]>=2.0.51
alembic>=1.18.4
# Data Validation
pydantic>=2.13.4
-pydantic-settings>=2.14.1
+pydantic-settings>=2.14.2
email-validator>=2.3.0
# Security
@@ -24,7 +24,7 @@ python-jose[cryptography]>=3.5.0
passlib[bcrypt]>=1.7.4
bcrypt>=5.0.0
firebase-admin>=7.4.0
-google-auth>=2.53.0
+google-auth>=2.55.0
google-auth-oauthlib>=1.4.0
# Environment
@@ -32,7 +32,7 @@ python-dotenv>=1.2.2
# Logging and Error Tracking
loguru>=0.7.3
-sentry-sdk[fastapi,sqlalchemy,loguru]>=2.20.0
+sentry-sdk[fastapi,sqlalchemy,loguru]>=2.63.0
# HTTP Client (optional - for calling AI service)
httpx>=0.28.1
@@ -47,11 +47,12 @@ trafilatura>=2.1.0
youtube-transcript-api>=0.6.3
# Redis (async client — used by quota manager and rate limiting)
-redis>=6.4.0,<7.0.0
+redis>=8.0.0,<9.0.0
-# Background jobs
-celery[redis]>=5.6.3,<6
+# Background jobs — install without redis extras; redis-py is a direct dep and satisfies the transport at runtime
+kombu>=5.6.2,<5.7
+celery>=5.5.3,<5.6
# System metrics (used by admin monitoring endpoints)
psutil>=6.1.1
@@ -60,7 +61,7 @@ psutil>=6.1.1
prometheus-fastapi-instrumentator>=7.1.0
# Testing
-pytest>=9.0.3
+pytest>=9.1.0
pytest-asyncio>=1.4.0
httpx>=0.27.0
diff --git a/backend-service/scripts/check_import_status.py b/backend-service/scripts/check_import_status.py
index f52b3e3c..28df07aa 100644
--- a/backend-service/scripts/check_import_status.py
+++ b/backend-service/scripts/check_import_status.py
@@ -12,10 +12,10 @@
async def main():
try:
- with open("/app/vocabulary_import.json", "r", encoding="utf-8") as f:
+ with open("/app/data/vocabulary_import.json", "r", encoding="utf-8") as f:
data = json.load(f)
except FileNotFoundError:
- print("Error: /app/vocabulary_import.json not found in the container.")
+ print("Error: /app/data/vocabulary_import.json not found in the container.")
return
except Exception as e:
print(f"Error reading JSON: {e}")
diff --git a/backend-service/scripts/expand_vocabulary.py b/backend-service/scripts/expand_vocabulary.py
new file mode 100644
index 00000000..c7084328
--- /dev/null
+++ b/backend-service/scripts/expand_vocabulary.py
@@ -0,0 +1,338 @@
+import json
+import os
+import urllib.request
+import urllib.parse
+import urllib.error
+import ssl
+import time
+import re
+
+JSON_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+MEDIA_DIR = "/opt/lexilingo/backend-service/data/media"
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Load env variables
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+if os.getenv("APP_ENV", "").lower() == "production":
+ load_dotenv(PROJECT_ROOT / ".env.production", override=False)
+
+raw_keys = os.getenv("GROQ_API_KEYS", "").strip()
+API_KEYS = [k.strip() for k in raw_keys.split(",") if k.strip()] if raw_keys else []
+if not API_KEYS:
+ single = os.getenv("GROQ_API_KEY", "").strip()
+ if single:
+ API_KEYS = [single]
+
+if not API_KEYS:
+ raise ValueError("Neither GROQ_API_KEYS nor GROQ_API_KEY is configured in the environment.")
+
+current_key_index = 0
+
+def get_next_api_key():
+ global current_key_index
+ key = API_KEYS[current_key_index]
+ current_key_index = (current_key_index + 1) % len(API_KEYS)
+ return key
+
+def clean_filename(word):
+ cleaned = re.sub(r"[^\w\-_]", "", word)
+ return cleaned.lower()
+
+def download_audio(url, dest_path):
+ if url.startswith("//"):
+ url = "https:" + url
+ headers = {"User-Agent": "Mozilla/5.0"}
+ req = urllib.request.Request(url, headers=headers)
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ with open(dest_path, "wb") as f:
+ f.write(response.read())
+ return True
+ except Exception as e:
+ print(f" Failed to download audio from {url}: {e}")
+ return False
+
+def get_audio_and_phonetic_from_api(word):
+ url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{urllib.parse.quote(word)}"
+ headers = {"User-Agent": "Mozilla/5.0"}
+ req = urllib.request.Request(url, headers=headers)
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ data = json.loads(response.read().decode("utf-8"))
+ if data and isinstance(data, list):
+ phonetics = data[0].get("phonetics", [])
+ audio_url = None
+ phonetic_text = data[0].get("phonetic", "")
+
+ # Try to find phonetic text in entries
+ for p in phonetics:
+ if not phonetic_text and p.get("text"):
+ phonetic_text = p.get("text")
+ if p.get("audio"):
+ if not audio_url or "-us" in p.get("audio") or "us.mp3" in p.get("audio"):
+ audio_url = p.get("audio")
+ return phonetic_text, audio_url
+ except Exception:
+ pass
+ return None, None
+
+def call_groq(payload):
+ url = "https://api.groq.com/openai/v1/chat/completions"
+ for _ in range(len(API_KEYS) * 2):
+ api_key = get_next_api_key()
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "User-Agent": "Mozilla/5.0"
+ }
+ req = urllib.request.Request(url, data=json.dumps(payload).encode("utf-8"), headers=headers, method="POST")
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ res_data = json.loads(response.read().decode("utf-8"))
+ return res_data["choices"][0]["message"]["content"]
+ except urllib.error.HTTPError as e:
+ time.sleep(0.5)
+ except Exception:
+ time.sleep(0.5)
+ time.sleep(2.0)
+ return None
+
+def generate_words_for_level(level, level_type, count, existing_words):
+ print(f"Generating list of {count} words for {level_type} level {level}...")
+ prompt = (
+ f"Generate a JSON list of exactly {count * 2} common, high-quality, practical English words "
+ f"suitable for {level_type} level {level}. "
+ f"Return ONLY a raw JSON list of strings, e.g. [\"word1\", \"word2\"]. No extra markdown, explanation, or tags."
+ )
+ payload = {
+ "model": "llama-3.3-70b-versatile",
+ "messages": [
+ {"role": "system", "content": "You are a vocabulary builder. Output ONLY raw JSON lists of strings."},
+ {"role": "user", "content": prompt}
+ ],
+ "temperature": 0.7
+ }
+
+ res = call_groq(payload)
+ if not res:
+ return []
+
+ try:
+ # Strip any markdown code block wraps
+ res_clean = res.strip()
+ if res_clean.startswith("```json"):
+ res_clean = res_clean[7:]
+ if res_clean.startswith("```"):
+ res_clean = res_clean[3:]
+ if res_clean.endswith("```"):
+ res_clean = res_clean[:-3]
+ res_clean = res_clean.strip()
+
+ words = json.loads(res_clean)
+ # Filter duplicates
+ filtered = []
+ for w in words:
+ w_clean = w.strip().lower()
+ if w_clean and w_clean not in existing_words and w_clean not in filtered:
+ filtered.append(w_clean)
+ return filtered[:count]
+ except Exception as e:
+ print(f"Failed to parse word list for level {level}: {e}. Response was: {res}")
+ return []
+
+def fetch_details_for_word(word, level, ielts_band=None):
+ print(f"Fetching translations and details for word '{word}'...")
+ prompt = (
+ f"Provide translation and example details for the English word '{word}'.\n"
+ f"Format your response as a strict JSON object with the following fields:\n"
+ f"{{\n"
+ f" \"definition\": \"A concise, clear English definition suitable for language learners\",\n"
+ f" \"example\": \"A natural, practical English example sentence using the word '{word}'\",\n"
+ f" \"phonetic\": \"IPA phonetic spelling, e.g. /fəˈnɛtɪk/\",\n"
+ f" \"part_of_speech\": \"noun/verb/adjective/adverb/pronoun/preposition/conjunction/interjection/phrase\",\n"
+ f" \"tags\": \"one relevant thematic category like technology, business, food, health, travel, daily_life, science\",\n"
+ f" \"translation\": {{\n"
+ f" \"en\": \"synonym or simple English translation\",\n"
+ f" \"vi\": \"Vietnamese translation\",\n"
+ f" \"ja\": \"Japanese translation\",\n"
+ f" \"ko\": \"Korean translation\",\n"
+ f" \"zh\": \"Chinese translation\",\n"
+ f" \"fr\": \"French translation\",\n"
+ f" \"es\": \"Spanish translation\"\n"
+ f" }}\n"
+ f"}}\n"
+ f"Return ONLY the raw JSON object. No explanation, quotes, or markdown wrappers."
+ )
+
+ payload = {
+ "model": "llama-3.3-70b-versatile",
+ "messages": [
+ {"role": "system", "content": "You are a lexicographer. Output ONLY raw JSON objects matching the schema."},
+ {"role": "user", "content": prompt}
+ ],
+ "temperature": 0.0
+ }
+
+ res = call_groq(payload)
+ if not res:
+ return None
+
+ try:
+ res_clean = res.strip()
+ if res_clean.startswith("```json"):
+ res_clean = res_clean[7:]
+ if res_clean.startswith("```"):
+ res_clean = res_clean[3:]
+ if res_clean.endswith("```"):
+ res_clean = res_clean[:-3]
+ res_clean = res_clean.strip()
+
+ details = json.loads(res_clean)
+ return details
+ except Exception as e:
+ print(f"Failed to parse details for '{word}': {e}. Response: {res}")
+ return None
+
+def main():
+ if not os.path.exists(MEDIA_DIR):
+ os.makedirs(MEDIA_DIR, exist_ok=True)
+
+ print("Loading existing vocabulary...")
+ with open(JSON_PATH, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ existing_words = set(item["word"].lower().strip() for item in data)
+ max_index = max(item.get("index", 0) for item in data)
+
+ # Levels configuration
+ cefr_levels = ["A1", "A2", "B1", "B2", "C1", "C2"]
+ ielts_levels = ["1.0", "2.0", "3.0", "4.0", "5.0", "6.0", "7.0", "8.0", "9.0"]
+
+ words_to_generate_cefr = 10 # words per CEFR level
+ words_to_generate_ielts = 5 # words per IELTS level
+
+ new_items = []
+
+ # 1. Generate CEFR words
+ for level in cefr_levels:
+ words = generate_words_for_level(level, "CEFR", words_to_generate_cefr, existing_words)
+ for w in words:
+ existing_words.add(w) # prevent duplicates in same run
+
+ # Fetch details
+ details = fetch_details_for_word(w, level)
+ if not details:
+ continue
+
+ # Dictionary API check
+ api_phonetic, api_audio_url = get_audio_and_phonetic_from_api(w)
+
+ phonetic = api_phonetic if api_phonetic else details.get("phonetic", "")
+ audio_filename = ""
+
+ # Download audio if available
+ if api_audio_url:
+ ext = ".wav" if ".wav" in api_audio_url.lower() else ".mp3"
+ filename = f"{clean_filename(w)}{ext}"
+ dest_path = os.path.join(MEDIA_DIR, filename)
+ print(f" Downloading pronunciation from Dictionary API: {api_audio_url}")
+ if download_audio(api_audio_url, dest_path):
+ audio_filename = filename
+
+ # Construct tag string
+ tags = details.get("tags", "general")
+ # Append CEFR level to tags
+ tags = f"{tags},cefr_{level}"
+
+ max_index += 1
+ item = {
+ "word": w,
+ "definition": details.get("definition", ""),
+ "example": details.get("example", ""),
+ "phonetic": phonetic,
+ "audios": {"pronunciation": audio_filename} if audio_filename else {},
+ "images": "",
+ "index": max_index,
+ "tags": tags,
+ "difficulty_level": level,
+ "translation": details.get("translation", {})
+ }
+ new_items.append(item)
+ print(f" Successfully added CEFR {level} word '{w}'")
+ time.sleep(0.5)
+
+ # 2. Generate IELTS words
+ for ielts in ielts_levels:
+ # Map IELTS to closest CEFR difficulty level
+ # IELTS 1.0 - 2.0 -> A1, 3.0 -> A2, 4.0 -> B1, 5.0 - 6.0 -> B2, 7.0 -> C1, 8.0 - 9.0 -> C2
+ val = float(ielts)
+ if val <= 2.0:
+ cefr_mapped = "A1"
+ elif val <= 3.5:
+ cefr_mapped = "A2"
+ elif val <= 4.5:
+ cefr_mapped = "B1"
+ elif val <= 6.0:
+ cefr_mapped = "B2"
+ elif val <= 7.5:
+ cefr_mapped = "C1"
+ else:
+ cefr_mapped = "C2"
+
+ words = generate_words_for_level(ielts, "IELTS", words_to_generate_ielts, existing_words)
+ for w in words:
+ existing_words.add(w)
+
+ details = fetch_details_for_word(w, cefr_mapped, ielts)
+ if not details:
+ continue
+
+ api_phonetic, api_audio_url = get_audio_and_phonetic_from_api(w)
+ phonetic = api_phonetic if api_phonetic else details.get("phonetic", "")
+ audio_filename = ""
+
+ if api_audio_url:
+ ext = ".wav" if ".wav" in api_audio_url.lower() else ".mp3"
+ filename = f"{clean_filename(w)}{ext}"
+ dest_path = os.path.join(MEDIA_DIR, filename)
+ print(f" Downloading pronunciation: {api_audio_url}")
+ if download_audio(api_audio_url, dest_path):
+ audio_filename = filename
+
+ tags = details.get("tags", "general")
+ # Append CEFR level and IELTS band to tags
+ tags = f"{tags},cefr_{cefr_mapped},ielts_{ielts}"
+
+ max_index += 1
+ item = {
+ "word": w,
+ "definition": details.get("definition", ""),
+ "example": details.get("example", ""),
+ "phonetic": phonetic,
+ "audios": {"pronunciation": audio_filename} if audio_filename else {},
+ "images": "",
+ "index": max_index,
+ "tags": tags,
+ "difficulty_level": cefr_mapped,
+ "translation": details.get("translation", {})
+ }
+ new_items.append(item)
+ print(f" Successfully added IELTS {ielts} (CEFR {cefr_mapped}) word '{w}'")
+ time.sleep(0.5)
+
+ if new_items:
+ data.extend(new_items)
+ with open(JSON_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(f"Successfully added {len(new_items)} new vocabulary items to {JSON_PATH}!")
+ else:
+ print("No new vocabulary items were generated.")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/fetch_audios.py b/backend-service/scripts/fetch_audios.py
new file mode 100644
index 00000000..801d7b9a
--- /dev/null
+++ b/backend-service/scripts/fetch_audios.py
@@ -0,0 +1,176 @@
+import json
+import os
+import urllib.request
+import urllib.parse
+import urllib.error
+import ssl
+import time
+import re
+
+JSON_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+MEDIA_DIR = "/opt/lexilingo/backend-service/data/media"
+
+def clean_filename(word):
+ # Remove any character that is not alphanumeric or underscore/dash
+ cleaned = re.sub(r"[^\w\-_]", "", word)
+ return cleaned.lower()
+
+def download_audio_from_url(url, dest_path):
+ if url.startswith("//"):
+ url = "https:" + url
+
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+ }
+ req = urllib.request.Request(url, headers=headers)
+
+ max_retries = 3
+ base_delay = 2.0
+
+ for attempt in range(max_retries):
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ with open(dest_path, "wb") as f:
+ f.write(response.read())
+ return True
+ except urllib.error.HTTPError as e:
+ if e.code == 429:
+ delay = base_delay * (2 ** attempt)
+ print(f"Rate limited (429) downloading audio. Retrying in {delay} seconds...")
+ time.sleep(delay)
+ else:
+ print(f"HTTP error downloading audio from {url}: {e.code}")
+ return False
+ except Exception as e:
+ print(f"Error downloading audio from {url}: {e}")
+ time.sleep(1.0)
+
+ return False
+
+def get_audio_url_from_api(word):
+ url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{urllib.parse.quote(word)}"
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+ }
+ req = urllib.request.Request(url, headers=headers)
+
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ data = json.loads(response.read().decode("utf-8"))
+ if data and isinstance(data, list):
+ # Search for any valid audio link in phonetics
+ phonetics = data[0].get("phonetics", [])
+ # First try finding US audio, then any audio
+ for p in phonetics:
+ audio_url = p.get("audio")
+ if audio_url and ("-us" in audio_url or "us.mp3" in audio_url):
+ return audio_url
+ for p in phonetics:
+ audio_url = p.get("audio")
+ if audio_url:
+ return audio_url
+ except urllib.error.HTTPError as e:
+ if e.code == 404:
+ # Word not found
+ return None
+ print(f"API HTTP Error {e.code} for word '{word}'")
+ except Exception as e:
+ print(f"API Error fetching word '{word}': {e}")
+
+ return None
+
+def main():
+ if not os.path.exists(MEDIA_DIR):
+ os.makedirs(MEDIA_DIR, exist_ok=True)
+ print(f"Created media directory: {MEDIA_DIR}")
+
+ print("Loading vocabulary JSON...")
+ with open(JSON_PATH, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ # Count missing audios
+ to_download = []
+ for idx, item in enumerate(data):
+ word = item.get("word")
+ if not word:
+ continue
+
+ audios = item.get("audios", {})
+ pronunciation_file = None
+ if isinstance(audios, dict):
+ pronunciation_file = audios.get("pronunciation")
+ elif isinstance(audios, list) and audios:
+ pronunciation_file = audios[0]
+
+ audio_path = os.path.join(MEDIA_DIR, pronunciation_file) if pronunciation_file else None
+
+ # If pronunciation is not configured, or file does not exist on disk
+ if not pronunciation_file or not os.path.exists(audio_path):
+ to_download.append(idx)
+
+ print(f"Total items in JSON: {len(data)}")
+ print(f"Total items needing audio download: {len(to_download)}")
+
+ if not to_download:
+ print("All vocabulary audio files are already present on disk!")
+ return
+
+ downloaded_count = 0
+ failed_count = 0
+
+ # We will only attempt to fetch up to a reasonable number to avoid hitting API limits
+ # e.g., 200 items in a single run. Let's make it configurable or fetch them.
+ # Since this is /goal, we can let it run to process all of them, but we will print progress.
+ # For DictionaryAPI.dev, there are no strict keys, but rate limits may apply.
+ # We will sleep 0.5s between requests.
+
+ for count, idx in enumerate(to_download):
+ item = data[idx]
+ word = item.get("word")
+
+ print(f"[{count+1}/{len(to_download)}] Fetching audio for '{word}'...")
+ audio_url = get_audio_url_from_api(word)
+
+ if audio_url:
+ # Determine extension
+ ext = ".mp3"
+ if ".wav" in audio_url.lower():
+ ext = ".wav"
+
+ filename = f"{clean_filename(word)}{ext}"
+ dest_path = os.path.join(MEDIA_DIR, filename)
+
+ print(f" Downloading from: {audio_url}")
+ success = download_audio_from_url(audio_url, dest_path)
+
+ if success:
+ item["audios"] = {"pronunciation": filename}
+ downloaded_count += 1
+ print(f" Successfully saved audio as '{filename}'")
+
+ # Checkpoint save
+ if downloaded_count % 10 == 0:
+ with open(JSON_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(" Progress checkpoint saved.")
+ else:
+ failed_count += 1
+ print(f" Failed to download audio file.")
+ else:
+ failed_count += 1
+ print(f" No audio URL found in dictionary API.")
+
+ time.sleep(0.5) # respectful delay
+
+ # Final save
+ if downloaded_count > 0:
+ with open(JSON_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print("Final updates saved.")
+
+ print(f"Completed audio download task. Successes: {downloaded_count}, Failures/Not Found: {failed_count}")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/fetch_definitions.py b/backend-service/scripts/fetch_definitions.py
new file mode 100644
index 00000000..896d1733
--- /dev/null
+++ b/backend-service/scripts/fetch_definitions.py
@@ -0,0 +1,100 @@
+import json
+import urllib.request
+import urllib.parse
+import re
+import time
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+
+def strip_html(text):
+ # Remove HTML tags
+ clean = re.compile('<.*?>')
+ return re.sub(clean, '', text)
+
+def get_wiktionary_definition(word):
+ url = f"https://en.wiktionary.org/api/rest_v1/page/definition/{urllib.parse.quote(word)}"
+ req = urllib.request.Request(url, headers={
+ 'User-Agent': 'LexiLingo-VocabBot/1.0 (contact@lexilingo.com)',
+ 'Accept': 'application/json'
+ })
+
+ max_retries = 5
+ base_delay = 5.0
+
+ for attempt in range(max_retries):
+ try:
+ with urllib.request.urlopen(req) as response:
+ data = json.loads(response.read().decode())
+
+ # data format: {"en": [{"partOfSpeech": "Noun", "definitions": [{"definition": "..."}]}]}
+ if "en" in data and len(data["en"]) > 0:
+ for pos_block in data["en"]:
+ if "definitions" in pos_block and len(pos_block["definitions"]) > 0:
+ # get the very first definition string
+ raw_def = pos_block["definitions"][0].get("definition", "")
+ if raw_def:
+ return strip_html(raw_def)
+ return ""
+ except urllib.error.HTTPError as e:
+ if e.code == 429:
+ delay = base_delay * (2 ** attempt)
+ print(f"Rate limited (429) for {word}. Retrying in {delay} seconds...")
+ time.sleep(delay)
+ elif e.code == 404:
+ # Word not found on wiktionary
+ return ""
+ else:
+ print(f"HTTP Error fetching {word}: {e}")
+ return ""
+ except Exception as e:
+ print(f"Error fetching {word}: {e}")
+ return ""
+
+ print(f"Failed to fetch {word} after {max_retries} retries.")
+ return ""
+
+def main():
+ print("Loading vocabulary...")
+ with open(FILE_PATH, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ count = 0
+ updated = 0
+
+ print(f"Fetching missing definitions...")
+
+ for item in data:
+ word = item.get("word")
+ definition = item.get("definition", "")
+
+ if not word:
+ continue
+
+ if definition == "#N/A yet" or definition == "":
+ print(f"[{count+1}] Fetching definition for: {word}")
+
+ new_def = get_wiktionary_definition(word)
+ if new_def:
+ item['definition'] = new_def
+ updated += 1
+
+ time.sleep(2.0) # Polite delay
+
+ count += 1
+
+ # Checkpoint save
+ if count % 500 == 0 and updated > 0:
+ print(f"Checkpoint: Saving {updated} new definitions...")
+ with open(FILE_PATH, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+
+ if updated > 0:
+ print(f"Saving {updated} final definitions...")
+ with open(FILE_PATH, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print("Save completed.")
+ else:
+ print("No new definitions were updated.")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/fill_with_groq.py b/backend-service/scripts/fill_with_groq.py
new file mode 100644
index 00000000..2e2b2f0d
--- /dev/null
+++ b/backend-service/scripts/fill_with_groq.py
@@ -0,0 +1,197 @@
+import json
+import urllib.request
+import urllib.parse
+import urllib.error
+import ssl
+import time
+import os
+import re
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Load env variables
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+if os.getenv("APP_ENV", "").lower() == "production":
+ load_dotenv(PROJECT_ROOT / ".env.production", override=False)
+
+raw_keys = os.getenv("GROQ_API_KEYS", "").strip()
+API_KEYS = [k.strip() for k in raw_keys.split(",") if k.strip()] if raw_keys else []
+if not API_KEYS:
+ single = os.getenv("GROQ_API_KEY", "").strip()
+ if single:
+ API_KEYS = [single]
+
+if not API_KEYS:
+ raise ValueError("Neither GROQ_API_KEYS nor GROQ_API_KEY is configured in the environment.")
+
+current_key_index = 0
+
+def get_next_api_key():
+ global current_key_index
+ key = API_KEYS[current_key_index]
+ current_key_index = (current_key_index + 1) % len(API_KEYS)
+ return key
+
+def clean_definition(text):
+ # Remove leading/trailing whitespace and quotes
+ text = text.strip()
+ # Remove surrounding double quotes if present
+ if text.startswith('"') and text.endswith('"'):
+ text = text[1:-1].strip()
+ if text.startswith("'") and text.endswith("'"):
+ text = text[1:-1].strip()
+
+ # Remove common prefixes from LLM output
+ prefixes = [
+ "definition:", "definition is:", "the definition is:", "refers to:",
+ "meaning:", "a definition of", "frankly means"
+ ]
+ lower_text = text.lower()
+ for prefix in prefixes:
+ if lower_text.startswith(prefix):
+ text = text[len(prefix):].strip()
+ # Clean again in case of leading punctuation or quotes
+ if text.startswith(':') or text.startswith('-'):
+ text = text[1:].strip()
+ if text.startswith('"') and text.endswith('"'):
+ text = text[1:-1].strip()
+ break
+
+ # Capitalize the first letter and ensure it ends with a period if it is a complete sentence/phrase
+ if text:
+ text = text[0].upper() + text[1:]
+ if not text.endswith('.') and not text.endswith('!') and not text.endswith('?'):
+ text += '.'
+
+ return text
+
+def get_groq_definition(word, example, translation_en, translation_vi, tags):
+ url = "https://api.groq.com/openai/v1/chat/completions"
+
+ prompt = f"Word: {word}\n"
+ if example:
+ prompt += f"Example Sentence: {example}\n"
+ if translation_en:
+ prompt += f"English Translation/Synonym: {translation_en}\n"
+ if translation_vi:
+ prompt += f"Vietnamese Translation: {translation_vi}\n"
+ if tags:
+ prompt += f"Category/Tags: {tags}\n"
+
+ system_msg = (
+ "You are an expert lexicographer writing definitions for language learners. "
+ "Provide a clear, concise definition of the requested word in English. "
+ "The definition must be suitable for intermediate language learners and match the meaning of the word as used in the given example sentence and translations.\n"
+ "Rules:\n"
+ "1. Output ONLY the definition itself (e.g. 'In a straightforward, honest, and direct manner').\n"
+ "2. Do NOT include the word being defined, do NOT include quotes, do NOT include any introductory or explanatory text (e.g. do not say 'Here is the definition' or 'Definition:').\n"
+ "3. Keep it to one concise sentence or phrase."
+ )
+
+ payload = {
+ "model": "llama-3.3-70b-versatile",
+ "messages": [
+ {"role": "system", "content": system_msg},
+ {"role": "user", "content": prompt}
+ ],
+ "temperature": 0.0,
+ "max_tokens": 150
+ }
+
+ max_retries = 3
+ base_delay = 2.0
+
+ for attempt in range(len(API_KEYS) * 2): # Try rotating keys up to 2 full cycles
+ api_key = get_next_api_key()
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+ }
+ req = urllib.request.Request(url, data=json.dumps(payload).encode("utf-8"), headers=headers, method="POST")
+
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ res_data = json.loads(response.read().decode("utf-8"))
+ raw_content = res_data["choices"][0]["message"]["content"]
+ return clean_definition(raw_content)
+ except urllib.error.HTTPError as e:
+ # Read error body if possible
+ try:
+ err_body = e.read().decode("utf-8")
+ except Exception:
+ err_body = ""
+ print(f"API key index {current_key_index-1} failed with HTTP {e.code} for word '{word}'. Error: {err_body[:200]}")
+
+ # If rate limit or other error, try the next key immediately
+ time.sleep(0.5)
+ except Exception as e:
+ print(f"API key index {current_key_index-1} failed with generic error for word '{word}': {e}")
+ time.sleep(0.5)
+
+ # If all keys failed, wait and retry with exponential backoff
+ print("All API keys failed. Waiting 5 seconds before retrying...")
+ time.sleep(5.0)
+ return ""
+
+def main():
+ print("Loading vocabulary JSON...")
+ with open(FILE_PATH, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ print(f"Total vocabulary items: {len(data)}")
+
+ # Identify items to fill
+ to_fill = []
+ for idx, item in enumerate(data):
+ definition = item.get("definition", "")
+ if definition == "#N/A yet" or not definition:
+ to_fill.append(idx)
+
+ print(f"Found {len(to_fill)} items needing definition updates.")
+
+ if not to_fill:
+ print("No items to fill!")
+ return
+
+ success_count = 0
+
+ for count, idx in enumerate(to_fill):
+ item = data[idx]
+ word = item.get("word")
+ example = item.get("example", "")
+ phonetic = item.get("phonetic", "")
+ tags = item.get("tags", "")
+ translation_block = item.get("translation", {})
+ translation_en = translation_block.get("en", "")
+ translation_vi = translation_block.get("vi", "")
+
+ print(f"[{count+1}/{len(to_fill)}] Fetching definition for '{word}'...")
+
+ definition = get_groq_definition(word, example, translation_en, translation_vi, tags)
+
+ if definition:
+ print(f" Word: '{word}'")
+ print(f" Definition: {definition}")
+ item["definition"] = definition
+ success_count += 1
+
+ # Save progressively
+ with open(FILE_PATH, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(" Progress saved.")
+ else:
+ print(f" Warning: Failed to fetch definition for '{word}' after trying all API keys.")
+
+ # Small delay between requests to be polite
+ time.sleep(0.5)
+
+ print(f"Processing complete. Filled {success_count}/{len(to_fill)} missing definitions.")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/fix_vocabulary_import.py b/backend-service/scripts/fix_vocabulary_import.py
new file mode 100755
index 00000000..c2366bb7
--- /dev/null
+++ b/backend-service/scripts/fix_vocabulary_import.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+import json
+import os
+import re
+import ssl
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
+MODEL = "llama-3.3-70b-versatile"
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Load env variables
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+if os.getenv("APP_ENV", "").lower() == "production":
+ load_dotenv(PROJECT_ROOT / ".env.production", override=False)
+
+raw_keys = os.getenv("GROQ_API_KEYS", "").strip()
+API_KEYS = [k.strip() for k in raw_keys.split(",") if k.strip()] if raw_keys else []
+if not API_KEYS:
+ single = os.getenv("GROQ_API_KEY", "").strip()
+ if single:
+ API_KEYS = [single]
+
+if not API_KEYS:
+ raise ValueError("Neither GROQ_API_KEYS nor GROQ_API_KEY is configured in the environment.")
+
+current_key_idx = 0
+
+def get_next_api_key():
+ global current_key_idx
+ key = API_KEYS[current_key_idx]
+ current_key_idx = (current_key_idx + 1) % len(API_KEYS)
+ return key
+
+def clean_wiki_text(text):
+ if not isinstance(text, str):
+ return text
+ # Replace [[A|B]] with B
+ text = re.sub(r'\[\[[^|\]]+\|([^\]]+)\]\]', r'\1', text)
+ # Replace [[A]] with A
+ text = re.sub(r'\[\[([^\]]+)\]\]', r'\1', text)
+ # Remove ''
+ text = text.replace("''", "")
+ # Remove any leftover unmatched brackets
+ text = text.replace("[[", "").replace("]]", "")
+ return text.strip()
+
+def fix_audio_path(v):
+ if isinstance(v, str) and v.startswith("extracted_media/"):
+ return v.replace("extracted_media/", "")
+ return v
+
+def clean_json_wrapper(text):
+ text = text.strip()
+ if text.startswith("```json"):
+ text = text[7:]
+ if text.startswith("```"):
+ text = text[3:]
+ if text.endswith("```"):
+ text = text[:-3]
+ return text.strip()
+
+def fetch_difficulty_batch(batch_words):
+ prompt = (
+ "You are an expert lexicographer. Classify the following list of English words into their most appropriate CEFR difficulty levels: A1, A2, B1, B2, C1, or C2. "
+ "Use the provided definitions for context.\n"
+ "Return ONLY a valid JSON object where keys are words and values are their CEFR levels (e.g. {\"apple\": \"A1\", \"paradigm\": \"C1\"}). "
+ "Do NOT return any other text or explanation."
+ )
+
+ user_payload = []
+ for item in batch_words:
+ user_payload.append({
+ "word": item.get("word"),
+ "definition": item.get("definition", "")
+ })
+
+ payload = {
+ "model": MODEL,
+ "messages": [
+ {"role": "system", "content": prompt},
+ {"role": "user", "content": json.dumps(user_payload, ensure_ascii=False)}
+ ],
+ "temperature": 0.1,
+ "response_format": {"type": "json_object"}
+ }
+
+ # Try multiple API keys
+ for attempt in range(len(API_KEYS) * 2):
+ api_key = get_next_api_key()
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "User-Agent": "Mozilla/5.0"
+ }
+
+ req = urllib.request.Request(
+ GROQ_URL,
+ data=json.dumps(payload).encode("utf-8"),
+ headers=headers,
+ method="POST"
+ )
+
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ res_data = json.loads(response.read().decode("utf-8"))
+ raw_content = res_data["choices"][0]["message"]["content"]
+ cleaned = clean_json_wrapper(raw_content)
+ return json.loads(cleaned)
+ except urllib.error.HTTPError as e:
+ try:
+ err_msg = e.read().decode("utf-8")
+ except Exception:
+ err_msg = ""
+ print(f"Key index {current_key_idx-1} failed (HTTP {e.code}). Msg: {err_msg[:100]}...")
+ time.sleep(1.0)
+ except Exception as e:
+ print(f"Key index {current_key_idx-1} failed (Generic error): {e}")
+ time.sleep(1.0)
+
+ print("All keys failed for this batch.")
+ return None
+
+def main():
+ print("Step 1: Reading and backup JSON...")
+ if not os.path.exists(FILE_PATH):
+ print(f"Error: {FILE_PATH} does not exist.")
+ return
+
+ with open(FILE_PATH, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ print(f"Loaded {len(data)} items.")
+
+ # Local cleanups
+ print("Step 2: Performing syntax cleanup and media path fixes...")
+ cleaned_translations_count = 0
+ fixed_paths_count = 0
+
+ for item in data:
+ # Fix media paths
+ audios = item.get("audios", {})
+ if isinstance(audios, dict):
+ for k, v in list(audios.items()):
+ new_v = fix_audio_path(v)
+ if new_v != v:
+ audios[k] = new_v
+ fixed_paths_count += 1
+
+ images = item.get("images", "")
+ if isinstance(images, str) and images.startswith("extracted_media/"):
+ item["images"] = images.replace("extracted_media/", "")
+ fixed_paths_count += 1
+
+ # Clean wiktionary syntax in translations
+ trans = item.get("translation", {})
+ if isinstance(trans, dict):
+ for lang, text in list(trans.items()):
+ if isinstance(text, str):
+ new_text = clean_wiki_text(text)
+ if new_text != text:
+ trans[lang] = new_text
+ cleaned_translations_count += 1
+ elif isinstance(text, list):
+ # For examples list or similar
+ new_list = [clean_wiki_text(x) if isinstance(x, str) else x for x in text]
+ if new_list != text:
+ trans[lang] = new_list
+ cleaned_translations_count += 1
+
+ print(f"-> Fixed {fixed_paths_count} media paths.")
+ print(f"-> Cleaned {cleaned_translations_count} translation fields.")
+
+ # Checkpoint local fixes
+ with open(FILE_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print("Progress checkpoint saved.")
+
+ # Classify difficulty levels
+ print("Step 3: Finding items missing difficulty levels...")
+ to_classify_indices = []
+ for idx, item in enumerate(data):
+ level = item.get("difficulty_level")
+ if not level or level == "":
+ to_classify_indices.append(idx)
+
+ print(f"-> Found {len(to_classify_indices)} items needing difficulty level classification.")
+
+ if not to_classify_indices:
+ print("No items need difficulty level classification!")
+ return
+
+ # Process in batches
+ batch_size = 50
+ total_batches = (len(to_classify_indices) + batch_size - 1) // batch_size
+ valid_levels = {"A1", "A2", "B1", "B2", "C1", "C2"}
+
+ for i in range(0, len(to_classify_indices), batch_size):
+ batch_idxs = to_classify_indices[i:i+batch_size]
+ batch_words = [data[idx] for idx in batch_idxs]
+
+ print(f"Processing batch {i//batch_size + 1}/{total_batches} ({len(batch_words)} words)...")
+
+ levels_map = None
+ retries = 3
+ while retries > 0:
+ levels_map = fetch_difficulty_batch(batch_words)
+ if levels_map:
+ break
+ retries -= 1
+ print(f"Retrying batch... ({retries} retries left)")
+ time.sleep(2.0)
+
+ if not levels_map:
+ print("Skipping batch because of repeated API failures.")
+ continue
+
+ # Standardize keys to lowercase for matching
+ levels_map_lower = {k.lower().strip(): v.upper().strip() for k, v in levels_map.items() if isinstance(v, str)}
+
+ updated_in_batch = 0
+ for idx in batch_idxs:
+ item = data[idx]
+ w = item.get("word", "").lower().strip()
+
+ level = levels_map_lower.get(w)
+ if level in valid_levels:
+ item["difficulty_level"] = level
+ updated_in_batch += 1
+ else:
+ # Fallback: check if sub-parts or clean word matches
+ cleaned_word = re.sub(r"[^\w\s-]", "", w).strip()
+ level = levels_map_lower.get(cleaned_word)
+ if level in valid_levels:
+ item["difficulty_level"] = level
+ updated_in_batch += 1
+ else:
+ # Generic fallback based on index frequency
+ # (since first ~1500 words are usually A1/A2, next are B1/B2, etc.)
+ index = item.get("index", 0)
+ if index <= 1500:
+ item["difficulty_level"] = "A1"
+ elif index <= 3000:
+ item["difficulty_level"] = "A2"
+ elif index <= 4500:
+ item["difficulty_level"] = "B1"
+ elif index <= 5500:
+ item["difficulty_level"] = "B2"
+ else:
+ item["difficulty_level"] = "C1"
+ # We print warning but set a reasonable fallback
+ print(f" Fallback level {item['difficulty_level']} assigned for '{item.get('word')}'")
+ updated_in_batch += 1
+
+ print(f"-> Successfully classified {updated_in_batch}/{len(batch_words)} words.")
+
+ # Save every batch
+ with open(FILE_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(" Saved batch updates.")
+
+ # Polite delay
+ time.sleep(1.0)
+
+ print("Done! Standardizing and cleaning complete.")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/import_json_to_db.py b/backend-service/scripts/import_json_to_db.py
index 91230313..8f9eaf11 100644
--- a/backend-service/scripts/import_json_to_db.py
+++ b/backend-service/scripts/import_json_to_db.py
@@ -5,7 +5,7 @@
from datetime import datetime, timezone
# Add parent directory to Python path
-sys.path.append("/opt/lexilingo/backend-service")
+sys.path.append("/app")
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.orm import sessionmaker
@@ -13,17 +13,18 @@
from app.models.vocabulary import VocabularyItem, PartOfSpeech, DifficultyLevel
from app.core.config import settings
-INPUT_FILE = "/app/data/categorized_words_final.json"
+INPUT_FILE = "/app/data/vocabulary_import.json"
def guess_pos(word, defn):
- # Basic guessing from Anki info
- if " v." in defn or " verb" in defn or word.startswith("to "):
+ # Remove Vietnamese "v.v." / "v. v." to prevent false verb matching on " v."
+ clean_defn = defn.replace("v.v.", "").replace("v. v.", "")
+ if " v." in clean_defn or " verb" in clean_defn or word.startswith("to "):
return PartOfSpeech.VERB
- if " adj." in defn or " adj " in defn:
+ if " adj." in clean_defn or " adj " in clean_defn:
return PartOfSpeech.ADJECTIVE
- if " adv." in defn or " adv " in defn:
+ if " adv." in clean_defn or " adv " in clean_defn:
return PartOfSpeech.ADVERB
- if " phrase" in defn or " idiom" in defn or " " in word:
+ if " phrase" in clean_defn or " idiom" in clean_defn or " " in word:
return PartOfSpeech.PHRASE
return PartOfSpeech.NOUN
@@ -33,8 +34,7 @@ async def main():
# Database setup
# PostgreSQL URI from settings or .env
- # Let's read MONGODB_URI/Postgres URI
- engine = create_async_engine(settings.SQLALCHEMY_DATABASE_URI, echo=False)
+ engine = create_async_engine(settings.DATABASE_URL, echo=False)
AsyncSessionLocal = sessionmaker(
engine, class_=AsyncSession, expire_on_commit=False
)
@@ -54,19 +54,46 @@ async def main():
phonetic = item.get('phonetic', '')
# Parse additional info
+ audios = item.get('audios', {})
+ images = item.get('images', '')
+
+ # Get the existing translations dictionary from the JSON item if it exists
+ trans_dict = item.get('translation', {})
+ if not isinstance(trans_dict, dict):
+ trans_dict = {}
+ if "vi" not in trans_dict or not trans_dict["vi"]:
+ trans_dict["vi"] = defn
+
translation = {
- "vi": defn,
+ **trans_dict,
"examples": [example] if example else [],
- "images": item.get('images', []),
- "audios": item.get('audios', [])
+ "images": images if images else [],
+ "audios": audios if audios else {}
}
-
+
audio_url = None
- if item.get('audios'):
- audio_url = f"/media/{item['audios'][0]}"
+ if isinstance(audios, dict):
+ pronunciation = audios.get('pronunciation')
+ if pronunciation:
+ audio_url = f"/media/{pronunciation}"
+ elif isinstance(audios, list) and audios:
+ audio_url = f"/media/{audios[0]}"
- # Create the DB object
- db_item = VocabularyItem(
+ # Get difficulty level from JSON or fall back to A1
+ level_str = item.get('difficulty_level', 'A1')
+ try:
+ difficulty_level = DifficultyLevel(level_str)
+ except ValueError:
+ difficulty_level = DifficultyLevel.A1
+
+ # Parse tags
+ tags_raw = item.get('tags', "general")
+ if isinstance(tags_raw, str):
+ tags = [t.strip() for t in tags_raw.split(',') if t.strip()]
+ else:
+ tags = tags_raw if isinstance(tags_raw, list) else ["general"]
+
+ db_item = dict(
id=uuid.uuid4(),
word=word,
definition=defn,
@@ -74,13 +101,29 @@ async def main():
pronunciation=phonetic[:100] if phonetic else None,
audio_url=audio_url,
part_of_speech=guess_pos(word, defn),
- difficulty_level=DifficultyLevel.A1, # default placeholder
- tags=item.get('tags', ["general"])
+ difficulty_level=difficulty_level,
+ tags=tags
)
items.append(db_item)
- session.add_all(items)
- await session.commit()
+ from sqlalchemy.dialects.postgresql import insert
+ if items:
+ stmt = insert(VocabularyItem).values(items)
+ # Update existing items with refined definitions, translations, levels, tags, etc.
+ stmt = stmt.on_conflict_do_update(
+ index_elements=['word', 'part_of_speech'],
+ set_={
+ 'definition': stmt.excluded.definition,
+ 'translation': stmt.excluded.translation,
+ 'pronunciation': stmt.excluded.pronunciation,
+ 'audio_url': stmt.excluded.audio_url,
+ 'difficulty_level': stmt.excluded.difficulty_level,
+ 'tags': stmt.excluded.tags
+ }
+ )
+ await session.execute(stmt)
+ await session.commit()
+
total += len(items)
print(f"Imported {total} / {len(data)}")
diff --git a/backend-service/scripts/refine_vocabulary.py b/backend-service/scripts/refine_vocabulary.py
new file mode 100755
index 00000000..a67c46ca
--- /dev/null
+++ b/backend-service/scripts/refine_vocabulary.py
@@ -0,0 +1,257 @@
+#!/usr/bin/env python3
+import json
+import os
+import re
+import ssl
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
+MODEL = "llama-3.3-70b-versatile"
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Load env variables
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+if os.getenv("APP_ENV", "").lower() == "production":
+ load_dotenv(PROJECT_ROOT / ".env.production", override=False)
+
+raw_keys = os.getenv("GROQ_API_KEYS", "").strip()
+API_KEYS = [k.strip() for k in raw_keys.split(",") if k.strip()] if raw_keys else []
+if not API_KEYS:
+ single = os.getenv("GROQ_API_KEY", "").strip()
+ if single:
+ API_KEYS = [single]
+
+if not API_KEYS:
+ raise ValueError("Neither GROQ_API_KEYS nor GROQ_API_KEY is configured in the environment.")
+
+current_key_idx = 0
+
+def get_next_api_key():
+ global current_key_idx
+ key = API_KEYS[current_key_idx]
+ current_key_idx = (current_key_idx + 1) % len(API_KEYS)
+ return key
+
+def is_cjk(c):
+ codepoint = ord(c)
+ return (
+ 0x4E00 <= codepoint <= 0x9FFF or
+ 0x3400 <= codepoint <= 0x4DBF or
+ 0x20000 <= codepoint <= 0x2A6DF or
+ 0x2A700 <= codepoint <= 0x2B73F or
+ 0x2B740 <= codepoint <= 0x2B81F or
+ 0x2B820 <= codepoint <= 0x2CEAF or
+ 0xF900 <= codepoint <= 0xFAFF
+ )
+
+def clean_vietnamese_translation(text):
+ if not isinstance(text, str):
+ return text
+ # Remove any CJK characters
+ text = "".join(c for c in text if not is_cjk(c))
+ # Clean duplicate commas and spaces
+ text = re.sub(r',\s*,', ',', text)
+ text = re.sub(r'\s+', ' ', text)
+ text = re.sub(r'^\s*,\s*|\s*,\s*$', '', text)
+ text = re.sub(r',\s*,\s*', ', ', text)
+ return text.strip()
+
+def needs_refinement(item):
+ defn = item.get("definition", "").strip()
+ if not defn:
+ return True
+
+ # Heuristic for English definitions
+ english_words = {'is', 'a', 'to', 'of', 'and', 'the', 'it', 'or', 'in', 'with', 'if', 'something', 'describes', 'someone', 'by', 'for', 'from', 'an'}
+ words = set(re.findall(r'\b\w+\b', defn.lower()))
+ if words.intersection(english_words):
+ return True
+
+ # Heuristic for short/direct translation definitions (e.g. 'đội, nhóm')
+ # If it is less than 15 characters, or contains comma/semicolon, it's a translation, not explanation.
+ if len(defn) < 15 or ',' in defn or ';' in defn:
+ return True
+
+ trans_vi = item.get("translation", {}).get("vi", "")
+ if not trans_vi or any(is_cjk(c) for c in str(trans_vi)):
+ return True
+
+ return False
+
+def clean_json_wrapper(text):
+ text = text.strip()
+ if text.startswith("```json"):
+ text = text[7:]
+ if text.startswith("```"):
+ text = text[3:]
+ if text.endswith("```"):
+ text = text[:-3]
+ return text.strip()
+
+def fetch_refinements_batch(batch_items):
+ prompt = (
+ "You are an expert bilingual lexicographer. I will provide a JSON list of English words, their definition (which might be in English or a short translation), and their current Vietnamese translation.\n"
+ "For each word, you must return a JSON object with two fields:\n"
+ "1. \"definition\": A concise, natural Vietnamese explanation/definition of the word's meaning (suitable for language learners, e.g. \"Một nhóm người hợp tác cùng nhau để làm việc hoặc chơi thể thao\" for \"team\"). It must be a full explanation, NOT a direct 1-3 word translation.\n"
+ "2. \"translation_vi\": A clean Vietnamese direct translation (synonym or equivalent words, e.g., \"đội, nhóm\" for \"team\"), with NO CJK/Chinese/Hán/Nom characters (e.g. remove characters like 學, 實, 體).\n"
+ "\n"
+ "Return ONLY a valid JSON object where keys are the words and values are their corresponding objects containing \"definition\" and \"translation_vi\". "
+ "Do NOT return any other text or explanation."
+ )
+
+ user_payload = []
+ for item in batch_items:
+ user_payload.append({
+ "word": item.get("word"),
+ "definition": item.get("definition", ""),
+ "translation_vi": item.get("translation", {}).get("vi", "")
+ })
+
+ payload = {
+ "model": MODEL,
+ "messages": [
+ {"role": "system", "content": prompt},
+ {"role": "user", "content": json.dumps(user_payload, ensure_ascii=False)}
+ ],
+ "temperature": 0.1,
+ "response_format": {"type": "json_object"}
+ }
+
+ # Try multiple API keys
+ for attempt in range(len(API_KEYS) * 2):
+ api_key = get_next_api_key()
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "User-Agent": "Mozilla/5.0"
+ }
+
+ req = urllib.request.Request(
+ GROQ_URL,
+ data=json.dumps(payload).encode("utf-8"),
+ headers=headers,
+ method="POST"
+ )
+
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ res_data = json.loads(response.read().decode("utf-8"))
+ raw_content = res_data["choices"][0]["message"]["content"]
+ cleaned = clean_json_wrapper(raw_content)
+ return json.loads(cleaned)
+ except urllib.error.HTTPError as e:
+ try:
+ err_msg = e.read().decode("utf-8")
+ except Exception:
+ err_msg = ""
+ print(f"Key index {current_key_idx-1} failed (HTTP {e.code}). Msg: {err_msg[:100]}...")
+ time.sleep(1.0)
+ except Exception as e:
+ print(f"Key index {current_key_idx-1} failed (Generic error): {e}")
+ time.sleep(1.0)
+
+ return None
+
+def main():
+ print("Step 1: Reading JSON...")
+ if not os.path.exists(FILE_PATH):
+ print(f"Error: {FILE_PATH} does not exist.")
+ return
+
+ with open(FILE_PATH, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ print(f"Loaded {len(data)} items.")
+
+ # Local passes: Clean CJK characters from translation['vi'] immediately
+ print("Step 2: Performing local Hán/Nom character sanitization...")
+ local_cleaned_count = 0
+ for item in data:
+ trans = item.get("translation", {})
+ if isinstance(trans, dict):
+ vi = trans.get("vi", "")
+ if isinstance(vi, str) and any(is_cjk(c) for c in vi):
+ trans["vi"] = clean_vietnamese_translation(vi)
+ local_cleaned_count += 1
+
+ print(f"-> Sanitized Hán/Nom characters locally for {local_cleaned_count} items.")
+
+ # Scan for items needing semantic refinement
+ to_refine_indices = [idx for idx, item in enumerate(data) if needs_refinement(item)]
+ print(f"Step 3: Found {len(to_refine_indices)} items needing explanation and translation refinement.")
+
+ if not to_refine_indices:
+ print("No items need refinement!")
+ return
+
+ batch_size = 50
+ total_batches = (len(to_refine_indices) + batch_size - 1) // batch_size
+
+ for i in range(0, len(to_refine_indices), batch_size):
+ batch_idxs = to_refine_indices[i:i+batch_size]
+ batch_items = [data[idx] for idx in batch_idxs]
+
+ print(f"Refining batch {i//batch_size + 1}/{total_batches} ({len(batch_items)} words)...")
+
+ refinements_map = None
+ retries = 3
+ while retries > 0:
+ refinements_map = fetch_refinements_batch(batch_items)
+ if refinements_map:
+ break
+ retries -= 1
+ print(f"Retrying batch... ({retries} retries left)")
+ time.sleep(2.0)
+
+ if not refinements_map:
+ print("Skipping batch because of repeated API failures.")
+ continue
+
+ # Standardize keys to lowercase
+ refinements_map_lower = {k.lower().strip(): v for k, v in refinements_map.items() if isinstance(v, dict)}
+
+ updated_in_batch = 0
+ for idx in batch_idxs:
+ item = data[idx]
+ w = item.get("word", "").lower().strip()
+
+ ref = refinements_map_lower.get(w)
+ if not ref:
+ # Fallback: check stripped word
+ cleaned_word = re.sub(r"[^\w\s-]", "", w).strip()
+ ref = refinements_map_lower.get(cleaned_word)
+
+ if ref and isinstance(ref, dict):
+ new_def = ref.get("definition", "").strip()
+ new_trans_vi = ref.get("translation_vi", "").strip()
+
+ if new_def:
+ item["definition"] = new_def
+ if new_trans_vi:
+ if "translation" not in item:
+ item["translation"] = {}
+ item["translation"]["vi"] = clean_vietnamese_translation(new_trans_vi)
+
+ updated_in_batch += 1
+
+ print(f"-> Successfully refined {updated_in_batch}/{len(batch_items)} words.")
+
+ # Progressive save
+ with open(FILE_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(" Saved batch updates.")
+
+ time.sleep(1.0)
+
+ print("Refinement process complete!")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/restore_vietnamese_accents.py b/backend-service/scripts/restore_vietnamese_accents.py
new file mode 100755
index 00000000..af868518
--- /dev/null
+++ b/backend-service/scripts/restore_vietnamese_accents.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+import json
+import os
+import re
+import ssl
+import time
+import urllib.request
+import urllib.parse
+import urllib.error
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
+MODEL = "llama-3.1-8b-instant"
+
+from dotenv import load_dotenv
+from pathlib import Path
+
+# Load env variables
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+if os.getenv("APP_ENV", "").lower() == "production":
+ load_dotenv(PROJECT_ROOT / ".env.production", override=False)
+
+raw_keys = os.getenv("GROQ_API_KEYS", "").strip()
+API_KEYS = [k.strip() for k in raw_keys.split(",") if k.strip()] if raw_keys else []
+if not API_KEYS:
+ single = os.getenv("GROQ_API_KEY", "").strip()
+ if single:
+ API_KEYS = [single]
+
+if not API_KEYS:
+ raise ValueError("Neither GROQ_API_KEYS nor GROQ_API_KEY is configured in the environment.")
+
+current_key_idx = 0
+
+def get_next_api_key():
+ global current_key_idx
+ key = API_KEYS[current_key_idx]
+ current_key_idx = (current_key_idx + 1) % len(API_KEYS)
+ return key
+
+# Standard Vietnamese accent characters
+ACCENT_CHARS = set('áàảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệíìỉĩịóòỏõọôốồổỗộơớờởỡợúùủũụưứừửữựýỳỷỹỵđ')
+
+def has_accents(text):
+ if not isinstance(text, str):
+ return True
+ return any(c in ACCENT_CHARS for c in text.lower())
+
+def clean_json_wrapper(text):
+ text = text.strip()
+ if text.startswith("```json"):
+ text = text[7:]
+ if text.startswith("```"):
+ text = text[3:]
+ if text.endswith("```"):
+ text = text[:-3]
+ return text.strip()
+
+def fetch_accents_batch(batch_items):
+ prompt = (
+ "You are an expert Vietnamese linguist. I will provide a JSON list of English words and their current Vietnamese translation (which is missing accents/diacritics, e.g., \"hoc\" for \"learn\", \"chinh sach\" for \"policy\", \"nuoc\" for \"water\").\n"
+ "For each word, you must correct the Vietnamese translation by adding the proper Vietnamese accents (dấu tiếng Việt) so it is grammatically correct and matches the meaning (e.g. \"hoc\" -> \"học\", \"chinh sach\" -> \"chính sách\", \"nuoc\" -> \"nước\", \"tuoi\" -> \"tuổi\").\n"
+ "If the current translation is already correct and naturally does not need accents (e.g. \"cho\" for \"give\", \"kinh doanh\" for \"business\"), keep it as is.\n"
+ "\n"
+ "Return ONLY a valid JSON object where keys are words and values are the corrected Vietnamese translation strings. "
+ "Do NOT return any other text or explanation."
+ )
+
+ user_payload = []
+ for item in batch_items:
+ user_payload.append({
+ "word": item.get("word"),
+ "current_translation_vi": item.get("translation", {}).get("vi", "")
+ })
+
+ payload = {
+ "model": MODEL,
+ "messages": [
+ {"role": "system", "content": prompt},
+ {"role": "user", "content": json.dumps(user_payload, ensure_ascii=False)}
+ ],
+ "temperature": 0.1,
+ "response_format": {"type": "json_object"}
+ }
+
+ # Try multiple API keys
+ for attempt in range(len(API_KEYS) * 2):
+ api_key = get_next_api_key()
+ headers = {
+ "Authorization": f"Bearer {api_key}",
+ "Content-Type": "application/json",
+ "User-Agent": "Mozilla/5.0"
+ }
+
+ req = urllib.request.Request(
+ GROQ_URL,
+ data=json.dumps(payload).encode("utf-8"),
+ headers=headers,
+ method="POST"
+ )
+
+ try:
+ context = ssl._create_unverified_context()
+ with urllib.request.urlopen(req, context=context) as response:
+ res_data = json.loads(response.read().decode("utf-8"))
+ raw_content = res_data["choices"][0]["message"]["content"]
+ cleaned = clean_json_wrapper(raw_content)
+ return json.loads(cleaned)
+ except urllib.error.HTTPError as e:
+ try:
+ err_msg = e.read().decode("utf-8")
+ except Exception:
+ err_msg = ""
+ actual_key_idx = (current_key_idx - 1) % len(API_KEYS)
+ print(f"Key index {actual_key_idx} failed (HTTP {e.code}). Msg: {err_msg[:100]}...")
+ if e.code == 429:
+ print("Rate limit (429) hit. Waiting 6.0 seconds before rotating to the next key...")
+ time.sleep(6.0)
+ else:
+ time.sleep(1.5)
+ except Exception as e:
+ actual_key_idx = (current_key_idx - 1) % len(API_KEYS)
+ print(f"Key index {actual_key_idx} failed (Generic error): {e}")
+ time.sleep(1.5)
+
+ return None
+
+def main():
+ print("Step 1: Reading JSON...")
+ if not os.path.exists(FILE_PATH):
+ print(f"Error: {FILE_PATH} does not exist.")
+ return
+
+ with open(FILE_PATH, "r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ print(f"Loaded {len(data)} items.")
+
+ # Identify items needing accents
+ to_correct_indices = []
+ for idx, item in enumerate(data):
+ vi = item.get("translation", {}).get("vi", "")
+ if isinstance(vi, str) and not has_accents(vi):
+ to_correct_indices.append(idx)
+
+ print(f"Step 2: Found {len(to_correct_indices)} items needing Vietnamese accent correction.")
+
+ if not to_correct_indices:
+ print("No items need accent correction!")
+ return
+
+ batch_size = 30
+ total_batches = (len(to_correct_indices) + batch_size - 1) // batch_size
+
+ for i in range(0, len(to_correct_indices), batch_size):
+ batch_idxs = to_correct_indices[i:i+batch_size]
+ batch_items = [data[idx] for idx in batch_idxs]
+
+ print(f"Correcting batch {i//batch_size + 1}/{total_batches} ({len(batch_items)} words)...")
+
+ corrections_map = None
+ retries = 5
+ while retries > 0:
+ corrections_map = fetch_accents_batch(batch_items)
+ if corrections_map:
+ break
+ retries -= 1
+ print(f"Retrying batch... ({retries} retries left)")
+ time.sleep(4.0)
+
+ if not corrections_map:
+ print("Skipping batch because of repeated API failures.")
+ continue
+
+ # Standardize keys to lowercase
+ corrections_map_lower = {k.lower().strip(): v for k, v in corrections_map.items() if isinstance(v, str)}
+
+ updated_in_batch = 0
+ for idx in batch_idxs:
+ item = data[idx]
+ w = item.get("word", "").lower().strip()
+
+ corrected_vi = corrections_map_lower.get(w)
+ if not corrected_vi:
+ # Fallback check stripped word
+ cleaned_word = re.sub(r"[^\w\s-]", "", w).strip()
+ corrected_vi = corrections_map_lower.get(cleaned_word)
+
+ if corrected_vi and isinstance(corrected_vi, str):
+ item["translation"]["vi"] = corrected_vi.strip()
+ updated_in_batch += 1
+
+ print(f"-> Successfully restored accents for {updated_in_batch}/{len(batch_items)} words.")
+
+ # Progressive save
+ with open(FILE_PATH, "w", encoding="utf-8") as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print(" Saved batch updates.")
+
+ # Protect against Groq rate limit (TPM/RPM)
+ time.sleep(2.5)
+
+ print("Accent restoration process complete!")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/scripts/run_tasks.sh b/backend-service/scripts/run_tasks.sh
new file mode 100755
index 00000000..69b9efd6
--- /dev/null
+++ b/backend-service/scripts/run_tasks.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# run_tasks.sh
+# Coordindates the vocabulary audio downloading and vocabulary database expansion.
+
+set -e
+
+SCRIPT_DIR="/opt/lexilingo/backend-service/scripts"
+LOG_DIR="/opt/lexilingo/backend-service/logs"
+
+mkdir -p "$LOG_DIR"
+
+echo "=== STARTING VOCABULARY AND AUDIO TASKS ==="
+echo "Logs will be stored in $LOG_DIR"
+
+# Task 1: Fetch audios for existing words
+echo ""
+echo "[Task 1/2] Fetching and downloading missing audios for existing vocabulary..."
+python3 -u "$SCRIPT_DIR/fetch_audios.py" 2>&1 | tee "$LOG_DIR/fetch_audios.log"
+
+# Task 2: Expand vocabulary with CEFR and IELTS levels
+echo ""
+echo "[Task 2/2] Expanding vocabulary JSON with CEFR and IELTS words using Groq API..."
+python3 -u "$SCRIPT_DIR/expand_vocabulary.py" 2>&1 | tee "$LOG_DIR/expand_vocabulary.log"
+
+echo ""
+echo "=== ALL TASKS COMPLETED SUCCESSFULLY ==="
diff --git a/backend-service/scripts/test_normalize_answer.py b/backend-service/scripts/test_normalize_answer.py
index 4b64f774..43487b93 100644
--- a/backend-service/scripts/test_normalize_answer.py
+++ b/backend-service/scripts/test_normalize_answer.py
@@ -43,7 +43,7 @@ def run_tests():
print(f"PASS [Case {idx+1}]: input={repr(ans)} -> {repr(result)}")
if failed == 0:
- print("\nAll normalize_answer tests passed successfully! 🎉")
+ print("\nAll normalize_answer tests passed successfully! ")
sys.exit(0)
else:
print(f"\n{failed} tests failed.")
diff --git a/backend-service/scripts/test_notifications.py b/backend-service/scripts/test_notifications.py
index bba6a86f..91fd85ca 100644
--- a/backend-service/scripts/test_notifications.py
+++ b/backend-service/scripts/test_notifications.py
@@ -305,7 +305,10 @@ async def main(args: argparse.Namespace) -> None:
email_ok = push_ok = True
if not args.push_only and not args.reminder_only:
- to = args.to_email or settings.EMAIL_FROM or "thefirestar312@gmail.com"
+ to = args.to_email or settings.EMAIL_FROM
+ if not to:
+ print("Error: provide --to-email or set EMAIL_FROM in config")
+ return
email_ok = await test_email(to)
if not args.email_only and not args.reminder_only:
diff --git a/backend-service/scripts/test_wiktionary.py b/backend-service/scripts/test_wiktionary.py
new file mode 100644
index 00000000..73001c77
--- /dev/null
+++ b/backend-service/scripts/test_wiktionary.py
@@ -0,0 +1,35 @@
+import json
+import urllib.request
+import urllib.parse
+import re
+
+def get_wiktionary_translations(word):
+ url = f"https://en.wiktionary.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=main&titles={urllib.parse.quote(word)}&format=json"
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+ try:
+ with urllib.request.urlopen(req) as response:
+ data = json.loads(response.read().decode())
+ pages = data.get('query', {}).get('pages', {})
+ for page_id, page_info in pages.items():
+ if 'revisions' in page_info:
+ content = page_info['revisions'][0]['slots']['main']['*']
+
+ # Look for translation tags like {{t|vi|quả táo}} or {{t+|fr|pomme}}
+ # The format is typically {{t[+-ø]?|lang_code|word|...}}
+ translations = {}
+ for lang_code in ['ja', 'ko', 'zh', 'fr', 'es', 'vi']:
+ # Regex to match the translation macro
+ pattern = rf'\{\{t[+ø-]?\|{lang_code}\|([^}}|]+)'
+ matches = re.findall(pattern, content)
+ if matches:
+ # Clean up and get unique translations
+ unique_matches = list(dict.fromkeys([m.strip() for m in matches]))
+ translations[lang_code] = ", ".join(unique_matches[:3])
+ return translations
+ return {}
+ except Exception as e:
+ print(f"Error: {e}")
+ return {}
+
+if __name__ == "__main__":
+ print(get_wiktionary_translations("apple"))
diff --git a/backend-service/scripts/translate_vocabulary.py b/backend-service/scripts/translate_vocabulary.py
new file mode 100644
index 00000000..34181f25
--- /dev/null
+++ b/backend-service/scripts/translate_vocabulary.py
@@ -0,0 +1,96 @@
+import json
+import urllib.request
+import urllib.parse
+import re
+import time
+import os
+
+FILE_PATH = "/opt/lexilingo/backend-service/data/vocabulary_import.json"
+
+def get_wiktionary_translations(word):
+ url = f"https://en.wiktionary.org/w/api.php?action=query&prop=revisions&rvprop=content&rvslots=main&titles={urllib.parse.quote(word)}&format=json"
+ req = urllib.request.Request(url, headers={'User-Agent': 'LexiLingo-VocabBot/1.0 (contact@lexilingo.com)'})
+
+ max_retries = 5
+ base_delay = 5.0
+
+ for attempt in range(max_retries):
+ try:
+ with urllib.request.urlopen(req) as response:
+ data = json.loads(response.read().decode())
+ pages = data.get('query', {}).get('pages', {})
+ for page_id, page_info in pages.items():
+ if 'revisions' in page_info:
+ content = page_info['revisions'][0]['slots']['main']['*']
+
+ translations = {}
+ for lang_code in ['ja', 'ko', 'zh', 'fr', 'es', 'vi']:
+ pattern = r'\{\{t[+ø-]?\|' + lang_code + r'\|([^}|]+)'
+ matches = re.findall(pattern, content)
+ if matches:
+ unique_matches = list(dict.fromkeys([m.strip() for m in matches]))
+ translations[lang_code] = ", ".join(unique_matches[:3])
+ return translations
+ return {}
+ except urllib.error.HTTPError as e:
+ if e.code == 429:
+ delay = base_delay * (2 ** attempt)
+ print(f"Rate limited (429) for {word}. Retrying in {delay} seconds...")
+ time.sleep(delay)
+ else:
+ print(f"HTTP Error fetching {word}: {e}")
+ return {}
+ except Exception as e:
+ print(f"Error fetching {word}: {e}")
+ return {}
+
+ print(f"Failed to fetch {word} after {max_retries} retries.")
+ return {}
+
+def main():
+ print("Loading vocabulary...")
+ with open(FILE_PATH, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ count = 0
+ updated = 0
+
+ print(f"Translating all words and overwriting existing translations...")
+
+ for item in data:
+ word = item.get("word")
+ if not word:
+ continue
+
+ print(f"[{count+1}] Fetching translations for: {word}")
+
+ new_trans = get_wiktionary_translations(word)
+ if new_trans:
+ # Overwrite existing translations with Wiktionary ones
+ translation = item.get("translation", {})
+ for lang, text in new_trans.items():
+ if text: # only if we found a translation
+ translation[lang] = text
+ updated += 1
+ item['translation'] = translation
+
+ time.sleep(2.0) # Sleep 2 seconds to respect Wiktionary API rate limits
+ count += 1
+
+
+ # Save every 50 items to avoid losing data on crash
+ if count % 50 == 0 and updated > 0:
+ print(f"Checkpoint: Saving {updated} new translations...")
+ with open(FILE_PATH, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+
+ if updated > 0:
+ print(f"Saving {updated} new translations...")
+ with open(FILE_PATH, 'w', encoding='utf-8') as f:
+ json.dump(data, f, ensure_ascii=False, indent=2)
+ print("Save completed.")
+ else:
+ print("No updates needed for the scanned words.")
+
+if __name__ == "__main__":
+ main()
diff --git a/backend-service/tests/conftest.py b/backend-service/tests/conftest.py
index d3ff3535..344fa5b9 100644
--- a/backend-service/tests/conftest.py
+++ b/backend-service/tests/conftest.py
@@ -10,7 +10,7 @@
from typing import AsyncGenerator
from pathlib import Path
from httpx import AsyncClient
-from sqlalchemy import text
+from sqlalchemy import text, select
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
from sqlalchemy.pool import NullPool
from uuid import uuid4
@@ -19,10 +19,14 @@
if str(BACKEND_SERVICE_ROOT) not in sys.path:
sys.path.insert(0, str(BACKEND_SERVICE_ROOT))
+# Force DEBUG=False before app settings are imported so that production-only
+# guards do not activate in tests and settings validation always uses safe defaults.
+os.environ["DEBUG"] = "false"
+
# Keep tests isolated from local/prod env-switch state.
# Using APP_ENV=testing ensures production-only middleware (e.g., TrustedHost)
# does not break ASGI test client requests to http://test.
-os.environ.setdefault("APP_ENV", "testing")
+os.environ["APP_ENV"] = "testing"
from app.main import app
from app.core.database import Base, get_db
@@ -51,6 +55,17 @@ def disable_rate_limiting(monkeypatch):
"postgresql+asyncpg://lexilingo:lexilingo_pass@localhost:5432/lexilingo_test"
)
+# Safety guard: refuse to wipe a database whose name does not end with _test.
+# This prevents an accidental misconfigured TEST_DATABASE_URL from destroying
+# a production or development database.
+_db_name = TEST_DATABASE_URL.rstrip("/").rsplit("/", 1)[-1]
+if not _db_name.endswith("_test"):
+ raise RuntimeError(
+ f"TEST_DATABASE_URL database name '{_db_name}' does not end with '_test'. "
+ "Refusing to drop/recreate schema to protect non-test databases. "
+ "Set TEST_DATABASE_URL to a database whose name ends with '_test'."
+ )
+
async def _reset_public_schema(engine) -> None:
"""Fully reset public schema to keep enum/table state deterministic between tests."""
@@ -142,32 +157,43 @@ def auth_headers(test_user: User) -> dict:
@pytest.fixture
async def admin_user(db_session: AsyncSession) -> User:
"""Create a test admin user with admin role for testing admin endpoints."""
- admin_role = Role(
- name="Admin", slug="admin", level=1,
- description="Admin role", is_system=True, is_active=True
- )
- db_session.add(admin_role)
- await db_session.commit()
- await db_session.refresh(admin_role)
+ # Check if Admin role already exists to avoid unique constraint violations
+ res = await db_session.execute(select(Role).where(Role.name == "Admin"))
+ admin_role = res.scalar_one_or_none()
+
+ if not admin_role:
+ admin_role = Role(
+ name="Admin", slug="admin", level=1,
+ description="Admin role", is_system=True, is_active=True
+ )
+ db_session.add(admin_role)
+ await db_session.commit()
+ await db_session.refresh(admin_role)
- user = User(
- email="admin@example.com",
- username="adminuser",
- hashed_password="$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewY5GyYzS6NzE3Fu",
- display_name="Admin User",
- is_active=True,
- is_verified=True,
- native_language="vi",
- target_language="en",
- level="beginner",
- role_id=admin_role.id,
- )
- db_session.add(user)
- await db_session.commit()
- await db_session.refresh(user)
+ # Check if admin user already exists to avoid unique constraint violations
+ res_user = await db_session.execute(select(User).where(User.email == "admin@example.com"))
+ user = res_user.scalar_one_or_none()
+
+ if not user:
+ user = User(
+ email="admin@example.com",
+ username="adminuser",
+ hashed_password="$2b$12$LQv3c1yqBWVHxkd0LHAkCOYz6TtxMQJqhN8/LewY5GyYzS6NzE3Fu",
+ display_name="Admin User",
+ is_active=True,
+ is_verified=True,
+ native_language="vi",
+ target_language="en",
+ level="beginner",
+ role_id=admin_role.id,
+ )
+ db_session.add(user)
+ await db_session.commit()
+ await db_session.refresh(user)
return user
+
@pytest.fixture
def admin_headers(admin_user: User) -> dict:
"""Create authentication headers with JWT token for an admin user."""
diff --git a/backend-service/tests/integration/test_content_agent_licensed_etl_flow.py b/backend-service/tests/integration/test_content_agent_licensed_etl_flow.py
new file mode 100644
index 00000000..7bcd9a17
--- /dev/null
+++ b/backend-service/tests/integration/test_content_agent_licensed_etl_flow.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.core.database import Base
+from app.models.content_agent import (
+ ContentAgentJob,
+ ContentAgentUpload,
+ ContentProvenance,
+ LessonVocabularyItem,
+)
+from app.models.course import Course, Lesson, Unit
+from app.models.vocabulary import VocabularyItem
+from app.services.content_agent_apply import ContentAgentApplyService
+from app.services.content_agent_validation import validate_artifact
+
+REPO_ROOT = Path(__file__).resolve().parents[3]
+SHARED_FIXTURE = (
+ REPO_ROOT
+ / "contracts"
+ / "content-agent"
+ / "fixtures"
+ / "licensed-etl-artifact-v2.json"
+)
+
+
+@pytest.fixture
+async def licensed_flow_db():
+ engine = create_async_engine("sqlite+aiosqlite:///:memory:")
+ tables = [
+ ContentAgentUpload.__table__,
+ ContentAgentJob.__table__,
+ Course.__table__,
+ Unit.__table__,
+ Lesson.__table__,
+ VocabularyItem.__table__,
+ LessonVocabularyItem.__table__,
+ ContentProvenance.__table__,
+ ]
+ async with engine.begin() as connection:
+ await connection.run_sync(
+ lambda sync_connection: Base.metadata.create_all(
+ sync_connection,
+ tables=tables,
+ )
+ )
+ factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+ async with factory() as session:
+ yield session
+ await engine.dispose()
+
+
+async def test_pinned_licensed_artifact_applies_with_complete_provenance(
+ licensed_flow_db,
+):
+ artifact = json.loads(SHARED_FIXTURE.read_text(encoding="utf-8"))
+ manifest = artifact["source_manifest"][0]
+ pin = {
+ "source_id": manifest["source_name"],
+ **manifest,
+ "status": "active",
+ "enabled": True,
+ }
+ report = validate_artifact(artifact, pinned_snapshots=[pin])
+ assert not report.is_blocking
+
+ job = ContentAgentJob(
+ requested_by_id=None,
+ status="preview_ready",
+ request_hash="d" * 64,
+ revision=1,
+ config={
+ "sources": ["oewn"],
+ "pinned_snapshots": [pin],
+ },
+ progress={"stage": "preview_ready", "percent": 100},
+ artifact=artifact,
+ )
+ licensed_flow_db.add(job)
+ await licensed_flow_db.commit()
+
+ applied, course_ids = await ContentAgentApplyService.apply(
+ licensed_flow_db,
+ job.id,
+ )
+ await licensed_flow_db.commit()
+
+ assert applied.status == "completed"
+ assert len(course_ids) == 1
+ assert await licensed_flow_db.scalar(select(func.count(Course.id))) == 1
+ assert (
+ await licensed_flow_db.scalar(select(func.count(VocabularyItem.id)))
+ == 8
+ )
+ provenance_rows = list(
+ (
+ await licensed_flow_db.execute(
+ select(ContentProvenance).where(
+ ContentProvenance.entity_type == "vocabulary"
+ )
+ )
+ )
+ .scalars()
+ .all()
+ )
+ assert len(provenance_rows) == 8
+ assert all(row.source_version == "2025" for row in provenance_rows)
+ assert all(row.license_id == "CC-BY-4.0" for row in provenance_rows)
+ assert all(row.raw_checksum == "b" * 64 for row in provenance_rows)
+ assert all(row.record_checksum for row in provenance_rows)
+ assert all(row.lineage and row.lineage["adapter"] == "oewn" for row in provenance_rows)
+ assert all(row.content_usage == "lexical" for row in provenance_rows)
+
+ repeated, repeated_ids = await ContentAgentApplyService.apply(
+ licensed_flow_db,
+ job.id,
+ )
+ assert repeated.id == job.id
+ assert repeated_ids == course_ids
+ assert await licensed_flow_db.scalar(select(func.count(Course.id))) == 1
diff --git a/backend-service/tests/test_auth_routes.py b/backend-service/tests/test_auth_routes.py
index fa555031..c1f940fc 100644
--- a/backend-service/tests/test_auth_routes.py
+++ b/backend-service/tests/test_auth_routes.py
@@ -337,7 +337,7 @@ async def mock_get_db():
app.dependency_overrides[get_db] = mock_get_db
transport = ASGITransport(app=app)
- with patch("app.routes.auth.verify_password_async", new=AsyncMock(return_value=False)):
+ with patch("app.services.auth_service.verify_password_async", new=AsyncMock(return_value=False)):
async with AsyncClient(transport=transport, base_url="http://test") as c:
response = await c.post(
f"{BASE}/login",
@@ -362,7 +362,7 @@ async def mock_get_db():
app.dependency_overrides[get_db] = mock_get_db
transport = ASGITransport(app=app)
- with patch("app.routes.auth.verify_password_async", new=AsyncMock(return_value=True)):
+ with patch("app.services.auth_service.verify_password_async", new=AsyncMock(return_value=True)):
async with AsyncClient(transport=transport, base_url="http://test") as c:
response = await c.post(
f"{BASE}/login",
@@ -390,7 +390,7 @@ async def mock_get_db():
app.dependency_overrides[get_db] = mock_get_db
transport = ASGITransport(app=app)
- with patch("app.routes.auth.verify_password_async", new=AsyncMock(return_value=True)):
+ with patch("app.services.auth_service.verify_password_async", new=AsyncMock(return_value=True)):
async with AsyncClient(transport=transport, base_url="http://test") as c:
response = await c.post(
f"{BASE}/login",
diff --git a/backend-service/tests/test_container_hardening.py b/backend-service/tests/test_container_hardening.py
new file mode 100644
index 00000000..4bb7b977
--- /dev/null
+++ b/backend-service/tests/test_container_hardening.py
@@ -0,0 +1,51 @@
+"""Unit tests to verify container hardening and network port isolation configurations."""
+
+from pathlib import Path
+import re
+
+ROOT = Path(__file__).resolve().parents[2]
+
+
+def _read(name: str) -> str:
+ return (ROOT / name).read_text(encoding="utf-8")
+
+
+def test_production_image_uses_pinned_multi_stage_runtime():
+ # Read backend production Dockerfile
+ dockerfile = _read("backend-service/Dockerfile.prod")
+
+ assert "USER appuser" in dockerfile
+ assert "HEALTHCHECK" in dockerfile
+ assert "python:3.13-slim" in dockerfile
+
+
+def test_compose_ports_restricted_to_loopback():
+ compose = _read("docker-compose.yml")
+
+ # Find all ports: sections and extract their mappings line-by-line
+ lines = compose.splitlines()
+ all_mapped_ports = []
+ in_ports = False
+ for line in lines:
+ if line.strip().startswith("ports:"):
+ in_ports = True
+ continue
+ if in_ports:
+ m = re.match(r'^\s+-\s*"([^"]+)"', line)
+ if m:
+ all_mapped_ports.append(m.group(1))
+ else:
+ in_ports = False
+
+ # Ensure postgres, redis, prometheus, and grafana bind strictly to 127.0.0.1
+ assert "127.0.0.1:5432:5432" in all_mapped_ports
+ assert "127.0.0.1:6379:6379" in all_mapped_ports
+ assert "127.0.0.1:9090:9090" in all_mapped_ports
+ assert "127.0.0.1:3001:3000" in all_mapped_ports
+
+ # Ensure only gateway ports are publicly exposed (0.0.0.0)
+ public_ports = [p for p in all_mapped_ports if "127.0.0.1" not in p]
+
+ assert len(public_ports) == 2
+ assert "80:80" in public_ports
+ assert "443:443" in public_ports
diff --git a/backend-service/tests/test_content_agent_apply.py b/backend-service/tests/test_content_agent_apply.py
new file mode 100644
index 00000000..c7a34671
--- /dev/null
+++ b/backend-service/tests/test_content_agent_apply.py
@@ -0,0 +1,258 @@
+import uuid
+from datetime import UTC, datetime, timedelta
+
+import pytest
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.core.database import Base
+from app.crud.vocabulary import vocabulary_crud
+from app.models.content_agent import (
+ ContentAgentJob,
+ ContentAgentUpload,
+ ContentProvenance,
+ LessonVocabularyItem,
+)
+from app.models.course import Course, Lesson, Unit
+from app.models.vocabulary import VocabularyItem
+from app.services.content_agent_apply import ContentAgentApplyService
+from app.services.vocabulary_catalog import normalize_word
+
+
+@pytest.fixture
+async def content_agent_db():
+ engine = create_async_engine("sqlite+aiosqlite:///:memory:")
+ tables = [
+ ContentAgentUpload.__table__,
+ ContentAgentJob.__table__,
+ Course.__table__,
+ Unit.__table__,
+ Lesson.__table__,
+ VocabularyItem.__table__,
+ LessonVocabularyItem.__table__,
+ ContentProvenance.__table__,
+ ]
+ async with engine.begin() as connection:
+ await connection.run_sync(
+ lambda sync_connection: Base.metadata.create_all(
+ sync_connection, tables=tables
+ )
+ )
+ factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+ async with factory() as session:
+ yield session
+ await engine.dispose()
+
+
+def _artifact() -> dict:
+ vocabulary = [
+ {
+ "word": "HELLO" if index == 0 else f"word{index}",
+ "definition": f"Generated definition {index}",
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "admin_upload",
+ "license_mode": "admin_owned",
+ "source_version": "job-upload-v1",
+ "source_record_id": f"admin_upload:{index}",
+ "license_id": "LicenseRef-Admin-Owned",
+ "license_url": "https://lexilingo.me/legal/content-upload-rights",
+ "attribution_text": "Administrator-owned or licensed upload",
+ "raw_checksum": "b" * 64,
+ "record_checksum": f"{index:064x}",
+ "source_checksum": f"{index:064x}",
+ "lineage": {
+ "adapter": "admin_upload",
+ "adapter_version": 1,
+ "raw_path": "content-agent-upload/test",
+ "source_location": f"row:{index}",
+ },
+ "content_usage": "full_text",
+ }
+ for index in range(8)
+ ]
+ exercises = [
+ {
+ "id": f"exercise-{index}",
+ "type": "translate",
+ "ui_type": "speaking_repeat",
+ "question": f"Question {index}",
+ "correct_answer": f"Answer {index}",
+ }
+ for index in range(4)
+ ]
+ return {
+ "schema_version": 2,
+ "prompt_version": "cefr-course-v2",
+ "generation_key": "a" * 64,
+ "source_manifest": [
+ {
+ "snapshot_id": f"admin_upload:job-upload-v1:{'b' * 64}",
+ "source_name": "admin_upload",
+ "source_version": "job-upload-v1",
+ "official_url": "https://lexilingo.me/admin/content-agent/uploads",
+ "license_id": "LicenseRef-Admin-Owned",
+ "license_url": "https://lexilingo.me/legal/content-upload-rights",
+ "attribution_text": "Administrator-owned or licensed upload",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "b" * 64,
+ "normalized_sha256": "c" * 64,
+ "normalized_bytes": 128,
+ "record_checksum_root": "d" * 64,
+ "adapter_version": 1,
+ "record_count": 8,
+ }
+ ],
+ "courses": [
+ {
+ "title": "English A1 Foundations",
+ "level": "A1",
+ "units": [
+ {
+ "title": "Daily Life",
+ "order_index": 0,
+ "lessons": [
+ {
+ "title": "Greetings",
+ "order_index": 0,
+ "vocabulary": vocabulary,
+ "exercises": exercises,
+ }
+ ],
+ }
+ ],
+ }
+ ],
+ }
+
+
+def _upload() -> ContentAgentUpload:
+ return ContentAgentUpload(
+ id=uuid.uuid4(),
+ uploaded_by_id=uuid.uuid4(),
+ filename="admin.csv",
+ checksum="b" * 64,
+ row_count=8,
+ schema_version=1,
+ records=[],
+ expires_at=datetime.now(UTC) + timedelta(days=1),
+ rights_confirmed=True,
+ rights_confirmed_at=datetime.now(UTC),
+ uploader_id=uuid.uuid4(),
+ )
+
+
+async def test_apply_reuses_vocabulary_and_is_idempotent(content_agent_db):
+ existing = VocabularyItem(
+ word="hello",
+ definition="Curated definition",
+ part_of_speech="noun",
+ difficulty_level="A1",
+ )
+ upload = _upload()
+ job = ContentAgentJob(
+ requested_by_id=None,
+ upload_id=upload.id,
+ status="preview_ready",
+ request_hash="a" * 64,
+ revision=1,
+ config={},
+ progress={"stage": "preview_ready", "percent": 100},
+ artifact=_artifact(),
+ )
+ content_agent_db.add_all([existing, upload, job])
+ await content_agent_db.commit()
+
+ applied_job, course_ids = await ContentAgentApplyService.apply(
+ content_agent_db, job.id
+ )
+ await content_agent_db.commit()
+
+ assert applied_job.status == "completed"
+ assert len(course_ids) == 1
+ assert (
+ await content_agent_db.scalar(
+ select(func.count(Course.id))
+ )
+ == 1
+ )
+ assert (
+ await content_agent_db.scalar(
+ select(func.count(VocabularyItem.id))
+ )
+ == 8
+ )
+ assert (
+ await content_agent_db.scalar(
+ select(func.count(LessonVocabularyItem.id))
+ )
+ == 8
+ )
+ await content_agent_db.refresh(existing)
+ assert existing.definition == "Curated definition"
+ lesson_id = await content_agent_db.scalar(select(Lesson.id))
+ lesson_items = await vocabulary_crud.get_vocabulary_items(
+ content_agent_db,
+ course_id=course_ids[0],
+ lesson_id=lesson_id,
+ limit=20,
+ )
+ assert len(lesson_items) == 8
+ assert existing in lesson_items
+ provenance = await content_agent_db.scalar(
+ select(ContentProvenance).where(
+ ContentProvenance.entity_type == "vocabulary",
+ ContentProvenance.entity_id == existing.id,
+ )
+ )
+ assert provenance is not None
+ assert provenance.source_version == "job-upload-v1"
+ assert provenance.license_id == "LicenseRef-Admin-Owned"
+ assert provenance.raw_checksum == "b" * 64
+ assert provenance.lineage["adapter"] == "admin_upload"
+
+ repeated_job, repeated_ids = await ContentAgentApplyService.apply(
+ content_agent_db, job.id
+ )
+
+ assert repeated_job.id == job.id
+ assert repeated_ids == course_ids
+ assert repeated_job.created_entity_ids == {
+ "course_ids": [str(course_ids[0])]
+ }
+
+
+async def test_apply_deduplicates_unicode_normalized_vocabulary(content_agent_db):
+ raw_word = "Café’s—Menu"
+ artifact = _artifact()
+ artifact["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["word"] = raw_word
+ existing = VocabularyItem(
+ word=normalize_word(raw_word),
+ definition="Curated definition",
+ part_of_speech="noun",
+ difficulty_level="A1",
+ )
+ upload = _upload()
+ job = ContentAgentJob(
+ requested_by_id=None,
+ upload_id=upload.id,
+ status="preview_ready",
+ request_hash="b" * 64,
+ revision=1,
+ config={},
+ progress={"stage": "preview_ready", "percent": 100},
+ artifact=artifact,
+ )
+ content_agent_db.add_all([existing, upload, job])
+ await content_agent_db.commit()
+
+ await ContentAgentApplyService.apply(content_agent_db, job.id)
+ await content_agent_db.commit()
+
+ assert await content_agent_db.scalar(
+ select(func.count(VocabularyItem.id))
+ ) == 8
+ await content_agent_db.refresh(existing)
+ assert existing.definition == "Curated definition"
diff --git a/backend-service/tests/test_content_agent_contract.py b/backend-service/tests/test_content_agent_contract.py
new file mode 100644
index 00000000..3ec67b52
--- /dev/null
+++ b/backend-service/tests/test_content_agent_contract.py
@@ -0,0 +1,72 @@
+import uuid
+
+import pytest
+from pydantic import ValidationError
+
+from app.schemas.content_agent import ContentAgentJobCreate
+from app.services.vocabulary_catalog import normalize_word
+
+
+def test_rollout_stage_one_accepts_internal_and_uploaded_sources() -> None:
+ request = ContentAgentJobCreate(
+ levels=["A1", "A2"],
+ sources=["existing_cefr"],
+ exercise_mix={"speaking": 2, "listening": 2},
+ )
+
+ assert request.levels == ["A1", "A2"]
+ assert request.words_per_lesson == 10
+
+
+def test_accepts_approved_dataset_source_ids() -> None:
+ request = ContentAgentJobCreate(
+ levels=["A1"],
+ sources=["oewn", "cmudict", "cefr_j", "wikidata"],
+ )
+
+ assert request.sources == ["oewn", "cmudict", "cefr_j", "wikidata"]
+
+
+@pytest.mark.parametrize(
+ "source_name",
+ ["bbc", "british_council", "cambridge_dictionary", "oxford", "voa"],
+)
+def test_rejects_removed_web_source_ids(source_name: str) -> None:
+ with pytest.raises(ValidationError, match="unsupported sources"):
+ ContentAgentJobCreate(levels=["A1"], sources=[source_name])
+
+
+def test_exercise_mix_must_fit_total() -> None:
+ with pytest.raises(ValidationError, match="must fit"):
+ ContentAgentJobCreate(
+ levels=["A1"],
+ sources=["existing_cefr"],
+ exercises_per_lesson=4,
+ exercise_mix={"speaking": 3, "listening": 2},
+ )
+
+
+def test_upload_id_and_admin_upload_source_must_be_selected_together() -> None:
+ upload_id = uuid.uuid4()
+ with pytest.raises(ValidationError, match="admin_upload must be selected"):
+ ContentAgentJobCreate(
+ levels=["A1"],
+ sources=["existing_cefr"],
+ upload_id=upload_id,
+ )
+ with pytest.raises(ValidationError, match="upload_id is required"):
+ ContentAgentJobCreate(levels=["A1"], sources=["admin_upload"])
+
+
+def test_vocabulary_normalization_handles_em_dash_and_casefold() -> None:
+ # em dash -> hyphen, uppercase -> lowercase, strip whitespace
+ raw = " Café—Menu " # " Café—Menu "
+ result = normalize_word(raw)
+ assert result == "café-menu"
+
+
+def test_vocabulary_normalization_converts_curly_apostrophe() -> None:
+ # U+2019 right single quotation mark -> U+0027 straight apostrophe
+ raw = "it’s"
+ result = normalize_word(raw)
+ assert result == "it's"
diff --git a/backend-service/tests/test_content_agent_jobs.py b/backend-service/tests/test_content_agent_jobs.py
new file mode 100644
index 00000000..5bd3eee0
--- /dev/null
+++ b/backend-service/tests/test_content_agent_jobs.py
@@ -0,0 +1,101 @@
+import uuid
+
+import pytest
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.core.database import Base
+from app.models.content_agent import ContentAgentJob, ContentAgentUpload
+from app.schemas.content_agent import ContentAgentJobCreate
+from app.services.content_agent_jobs import ContentAgentJobService, request_hash
+
+
+@pytest.fixture
+async def content_agent_db():
+ engine = create_async_engine("sqlite+aiosqlite:///:memory:")
+ async with engine.begin() as connection:
+ await connection.run_sync(
+ lambda sync_connection: Base.metadata.create_all(
+ sync_connection,
+ tables=[
+ ContentAgentUpload.__table__,
+ ContentAgentJob.__table__,
+ ],
+ )
+ )
+ factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+ async with factory() as session:
+ yield session
+ await engine.dispose()
+
+
+async def test_duplicate_active_job_requires_revision(content_agent_db):
+ config = ContentAgentJobCreate(levels=["A1"], sources=["existing_cefr"])
+ requester = uuid.uuid4()
+
+ first = await ContentAgentJobService.create(
+ content_agent_db,
+ requested_by_id=requester,
+ config=config,
+ )
+ await content_agent_db.commit()
+
+ with pytest.raises(ValueError, match="active job"):
+ await ContentAgentJobService.create(
+ content_agent_db,
+ requested_by_id=requester,
+ config=config,
+ )
+
+ revised = await ContentAgentJobService.create(
+ content_agent_db,
+ requested_by_id=requester,
+ config=config.model_copy(update={"revision": True}),
+ )
+
+ assert first.revision == 1
+ assert revised.revision == 2
+
+
+async def test_job_state_machine_rejects_skipped_stages(content_agent_db):
+ job = await ContentAgentJobService.create(
+ content_agent_db,
+ requested_by_id=uuid.uuid4(),
+ config=ContentAgentJobCreate(levels=["A1"], sources=["existing_cefr"]),
+ )
+
+ with pytest.raises(ValueError, match="Invalid job transition"):
+ await ContentAgentJobService.transition(
+ content_agent_db, job, "generating"
+ )
+
+
+def test_request_hash_changes_when_snapshot_pin_changes():
+ def config(snapshot_id: str) -> ContentAgentJobCreate:
+ return ContentAgentJobCreate(
+ levels=["A1"],
+ sources=["oewn"],
+ pinned_snapshots=[
+ {
+ "source_id": "oewn",
+ "source_name": "oewn",
+ "source_version": "2025",
+ "snapshot_id": snapshot_id,
+ "official_url": "https://en-word.net/static/english-wordnet-2025.xml.gz",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "a" * 64,
+ "normalized_sha256": "b" * 64,
+ "normalized_bytes": 100,
+ "record_checksum_root": "c" * 64,
+ "adapter_version": 1,
+ "record_count": 100,
+ "status": "active",
+ "enabled": True,
+ }
+ ],
+ )
+
+ assert request_hash(config("snapshot-a")) != request_hash(config("snapshot-b"))
diff --git a/backend-service/tests/test_content_agent_routes.py b/backend-service/tests/test_content_agent_routes.py
new file mode 100644
index 00000000..403b41ba
--- /dev/null
+++ b/backend-service/tests/test_content_agent_routes.py
@@ -0,0 +1,123 @@
+import uuid
+from types import SimpleNamespace
+
+from fastapi.routing import APIRoute
+
+from app.core.dependencies import get_current_admin
+from app.routes import content_agent as content_agent_routes
+
+router = content_agent_routes.router
+
+
+def _dependency_calls(route: APIRoute) -> set:
+ calls = set()
+ pending = list(route.dependant.dependencies)
+ while pending:
+ dependency = pending.pop()
+ if dependency.call is not None:
+ calls.add(dependency.call)
+ pending.extend(dependency.dependencies)
+ return calls
+
+
+def test_every_content_agent_route_requires_admin():
+ routes = [route for route in router.routes if isinstance(route, APIRoute)]
+
+ assert routes
+ for route in routes:
+ assert get_current_admin in _dependency_calls(route), route.path
+
+
+async def test_cancel_locks_job_commits_and_revokes_worker(monkeypatch):
+ job = SimpleNamespace(
+ id=uuid.uuid4(),
+ status="generating",
+ celery_task_id="celery-task-1",
+ )
+ admin = SimpleNamespace(id=uuid.uuid4(), role_level=2)
+ calls = {"locked": False, "commits": 0, "revoked": None}
+
+ class FakeSession:
+ async def commit(self):
+ calls["commits"] += 1
+
+ async def fake_get_job(_db, job_id, *, lock=False):
+ assert job_id == job.id
+ calls["locked"] = lock
+ return job
+
+ async def fake_cancel(_db, target):
+ target.status = "cancelled"
+ return target
+
+ def fake_revoke(task_id, *, terminate):
+ calls["revoked"] = (task_id, terminate)
+
+ monkeypatch.setattr(content_agent_routes, "_get_job_or_404", fake_get_job)
+ monkeypatch.setattr(
+ content_agent_routes.ContentAgentJobService,
+ "cancel",
+ fake_cancel,
+ )
+ monkeypatch.setattr(content_agent_routes, "_audit", lambda *args, **kwargs: None)
+ monkeypatch.setattr(
+ content_agent_routes,
+ "_job_response",
+ lambda target: {"id": str(target.id), "status": target.status},
+ )
+ monkeypatch.setattr(
+ content_agent_routes.celery_app,
+ "control",
+ SimpleNamespace(revoke=fake_revoke),
+ raising=False,
+ )
+
+ response = await content_agent_routes.cancel_job(
+ job.id,
+ db=FakeSession(),
+ admin=admin,
+ )
+
+ assert response.data["status"] == "cancelled"
+ assert calls == {
+ "locked": True,
+ "commits": 1,
+ "revoked": ("celery-task-1", False),
+ }
+
+
+async def test_source_catalog_route_returns_validated_active_snapshots(monkeypatch):
+ monkeypatch.setattr(content_agent_routes, "_require_enabled", lambda: None)
+ monkeypatch.setattr(content_agent_routes, "ContentAgentClient", lambda: object())
+
+ async def fake_catalog(_client):
+ return [
+ {
+ "source_id": "oewn",
+ "source_name": "oewn",
+ "source_version": "2025",
+ "snapshot_id": "oewn:2025:" + ("a" * 64),
+ "official_url": "https://en-word.net/static/english-wordnet-2025.xml.gz",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "a" * 64,
+ "normalized_sha256": "b" * 64,
+ "normalized_bytes": 100,
+ "record_checksum_root": "c" * 64,
+ "adapter_version": 1,
+ "record_count": 100,
+ "status": "active",
+ "enabled": True,
+ }
+ ]
+
+ monkeypatch.setattr(content_agent_routes, "get_source_catalog", fake_catalog)
+
+ response = await content_agent_routes.list_sources(
+ _=SimpleNamespace(id=uuid.uuid4())
+ )
+
+ assert response.data[0].source_id == "oewn"
+ assert response.data[0].status == "active"
diff --git a/backend-service/tests/test_content_agent_sources.py b/backend-service/tests/test_content_agent_sources.py
new file mode 100644
index 00000000..e7d9f1d8
--- /dev/null
+++ b/backend-service/tests/test_content_agent_sources.py
@@ -0,0 +1,195 @@
+"""Unit tests for content_agent_sources: snapshot resolution logic."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.content_agent_sources import (
+ SourceResolutionError,
+ canonicalize_sources,
+ resolve_snapshots,
+)
+
+
+def _catalog(*entries: dict) -> list[dict]:
+ return list(entries)
+
+
+def _source(
+ source_id: str,
+ *,
+ snapshot_id: str | None = None,
+ status: str = "active",
+ license_id: str = "CC-BY-4.0",
+ license_url: str = "https://creativecommons.org/licenses/by/4.0/",
+ attribution_text: str = "Test attribution",
+ content_usage: str = "full_text",
+ enabled: bool = True,
+) -> dict:
+ return {
+ "source_id": source_id,
+ "source_name": source_id,
+ "source_version": "2025",
+ "snapshot_id": snapshot_id or f"{source_id}-snap-001",
+ "official_url": "https://example.com/source",
+ "status": status,
+ "license_id": license_id,
+ "license_url": license_url,
+ "attribution_text": attribution_text,
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "a" * 64,
+ "normalized_sha256": "b" * 64,
+ "normalized_bytes": 100,
+ "record_checksum_root": "c" * 64,
+ "adapter_version": 1,
+ "record_count": 10,
+ "enabled": enabled,
+ }
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+
+
+def test_admin_upload_is_the_only_virtual_source() -> None:
+ assert resolve_snapshots(["admin_upload"], catalog=[]) == []
+ with pytest.raises(SourceResolutionError, match="cefr_j"):
+ resolve_snapshots(["existing_cefr"], catalog=[])
+
+
+def test_existing_cefr_alias_resolves_to_canonical_cefr_j_snapshot() -> None:
+ catalog = _catalog(
+ _source(
+ "cefr_j",
+ snapshot_id="cefr-j-snap",
+ license_id="LicenseRef-CEFR-J-Commercial",
+ )
+ )
+ resolved = resolve_snapshots(["existing_cefr"], catalog)
+
+ assert resolved[0]["source_id"] == "cefr_j"
+ assert canonicalize_sources(["existing_cefr", "admin_upload"]) == [
+ "cefr_j",
+ "admin_upload",
+ ]
+
+
+def test_exact_snapshot_pinning_captures_descriptor_fields() -> None:
+ catalog = _catalog(_source("oewn", snapshot_id="oewn-20240601"))
+ resolved = resolve_snapshots(["oewn"], catalog=catalog)
+ assert len(resolved) == 1
+ r = resolved[0]
+ assert r["snapshot_id"] == "oewn-20240601"
+ assert r["license_id"] == "CC-BY-4.0"
+
+
+def test_mixed_virtual_and_real_sources_resolved_in_order() -> None:
+ catalog = _catalog(
+ _source(
+ "cefr_j",
+ license_id="LicenseRef-CEFR-J-Commercial",
+ ),
+ _source("oewn"),
+ _source("tatoeba"),
+ )
+ resolved = resolve_snapshots(
+ ["existing_cefr", "oewn", "tatoeba"], catalog=catalog
+ )
+ assert [r["source_id"] for r in resolved] == ["cefr_j", "oewn", "tatoeba"]
+
+
+# ---------------------------------------------------------------------------
+# Blocking error cases
+# ---------------------------------------------------------------------------
+
+
+def test_unavailable_source_raises_resolution_error() -> None:
+ with pytest.raises(SourceResolutionError, match="not found in catalog"):
+ resolve_snapshots(["oewn"], catalog=[])
+
+
+def test_inactive_snapshot_raises_resolution_error() -> None:
+ catalog = _catalog(_source("oewn", status="archived"))
+ with pytest.raises(SourceResolutionError, match="archived"):
+ resolve_snapshots(["oewn"], catalog=catalog)
+
+
+def test_pending_snapshot_also_rejected() -> None:
+ catalog = _catalog(_source("cmudict", status="pending"))
+ with pytest.raises(SourceResolutionError, match="pending"):
+ resolve_snapshots(["cmudict"], catalog=catalog)
+
+
+def test_disabled_active_snapshot_is_rejected() -> None:
+ catalog = _catalog(_source("oewn", enabled=False))
+ with pytest.raises(SourceResolutionError, match="enabled false"):
+ resolve_snapshots(["oewn"], catalog=catalog)
+
+
+def test_missing_snapshot_id_raises_resolution_error() -> None:
+ entry = {
+ "source_id": "cefr_j",
+ "snapshot_id": "",
+ "status": "active",
+ }
+ with pytest.raises(SourceResolutionError, match="snapshot_id"):
+ resolve_snapshots(["cefr_j"], catalog=[entry])
+
+
+def test_multiple_errors_collected_in_single_raise() -> None:
+ with pytest.raises(SourceResolutionError) as exc_info:
+ resolve_snapshots(["oewn", "cmudict", "tatoeba"], catalog=[])
+ msg = str(exc_info.value)
+ assert "oewn" in msg
+ assert "cmudict" in msg
+ assert "tatoeba" in msg
+
+
+def test_stale_catalog_missing_requested_source_rejected() -> None:
+ # Catalog has wikidata but request asks for librispeech
+ catalog = _catalog(_source("wikidata"))
+ with pytest.raises(SourceResolutionError, match="librispeech"):
+ resolve_snapshots(["librispeech"], catalog=catalog)
+
+
+def test_license_mismatch_not_in_catalog_raises_error() -> None:
+ # Source exists but with no license_id and inactive status
+ catalog = _catalog(
+ {"source_id": "common_voice", "snapshot_id": "cv-001", "status": "inactive"}
+ )
+ with pytest.raises(SourceResolutionError):
+ resolve_snapshots(["common_voice"], catalog=catalog)
+
+
+# ---------------------------------------------------------------------------
+# AI-service failure sanitization
+# ---------------------------------------------------------------------------
+
+
+def test_catalog_entry_missing_source_id_is_skipped() -> None:
+ # A malformed entry with no source_id should not cause KeyError
+ catalog = [{"snapshot_id": "bad-entry", "status": "active"}]
+ with pytest.raises(SourceResolutionError, match="oewn"):
+ resolve_snapshots(["oewn"], catalog=catalog)
+
+
+def test_catalog_with_none_values_does_not_crash() -> None:
+ catalog = [
+ {
+ "source_id": "oewn",
+ "snapshot_id": "oewn-snap",
+ "status": "active",
+ "license_id": None,
+ "license_url": None,
+ "attribution_text": None,
+ }
+ ]
+ with pytest.raises(SourceResolutionError, match="oewn"):
+ resolve_snapshots(["oewn"], catalog=catalog)
+
+
+def test_virtual_sources_not_blocked_by_empty_catalog() -> None:
+ # Even when AI service is unavailable, virtual sources should resolve
+ resolved = resolve_snapshots(["admin_upload"], catalog=[])
+ assert resolved == []
diff --git a/backend-service/tests/test_content_agent_tasks.py b/backend-service/tests/test_content_agent_tasks.py
new file mode 100644
index 00000000..627e9042
--- /dev/null
+++ b/backend-service/tests/test_content_agent_tasks.py
@@ -0,0 +1,153 @@
+import httpx
+import pytest
+
+from app.schemas.content_agent import ContentAgentArtifact
+from app.tasks.content_agent import (
+ _attach_pinned_snapshots,
+ _public_error_message,
+ _with_transient_retry,
+)
+
+
+async def test_transient_ai_calls_retry_with_a_bound(monkeypatch):
+ attempts = 0
+
+ async def operation():
+ nonlocal attempts
+ attempts += 1
+ if attempts < 3:
+ raise httpx.ConnectError("temporary secret endpoint failure")
+ return "ok"
+
+ async def no_sleep(_delay):
+ return None
+
+ monkeypatch.setattr("app.tasks.content_agent.asyncio.sleep", no_sleep)
+
+ assert await _with_transient_retry(operation) == "ok"
+ assert attempts == 3
+
+
+async def test_non_transient_ai_calls_are_not_retried(monkeypatch):
+ attempts = 0
+ request = httpx.Request("POST", "https://ai.internal/generate")
+ response = httpx.Response(422, request=request)
+
+ async def operation():
+ nonlocal attempts
+ attempts += 1
+ raise httpx.HTTPStatusError(
+ "payload included a private definition",
+ request=request,
+ response=response,
+ )
+
+ with pytest.raises(httpx.HTTPStatusError):
+ await _with_transient_retry(operation)
+ assert attempts == 1
+
+
+def test_public_task_errors_do_not_expose_exception_payloads():
+ request = httpx.Request("POST", "https://ai.internal/generate")
+ response = httpx.Response(422, request=request)
+ error = httpx.HTTPStatusError(
+ "private uploaded definition: do not expose",
+ request=request,
+ response=response,
+ )
+
+ message = _public_error_message(error)
+
+ assert message == "AI content service request failed with status 422"
+ assert "private uploaded definition" not in message
+
+
+async def test_worker_attaches_exact_pinned_snapshots_before_generation():
+ calls = []
+
+ class FakeClient:
+ async def attach_snapshots(self, job_id, snapshots):
+ calls.append((job_id, snapshots))
+ return {"attached_snapshots": len(snapshots)}
+
+ job_id = __import__("uuid").uuid4()
+ snapshots = [
+ {
+ "source_id": "oewn",
+ "source_version": "2025",
+ "snapshot_id": "oewn:2025:" + ("a" * 64),
+ }
+ ]
+
+ await _attach_pinned_snapshots(FakeClient(), job_id, snapshots)
+
+ assert calls == [(job_id, snapshots)]
+
+
+def test_strict_artifact_manifest_serializes_to_json_primitives():
+ artifact = ContentAgentArtifact.model_validate(
+ {
+ "schema_version": 2,
+ "prompt_version": "cefr-course-v2",
+ "generation_key": "a" * 64,
+ "source_manifest": [
+ {
+ "snapshot_id": f"oewn:2025:{'b' * 64}",
+ "source_name": "oewn",
+ "source_version": "2025",
+ "official_url": "https://en-word.net/static/english-wordnet-2025.xml.gz",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "b" * 64,
+ "normalized_sha256": "c" * 64,
+ "normalized_bytes": 128,
+ "record_checksum_root": "d" * 64,
+ "adapter_version": 1,
+ "record_count": 1,
+ }
+ ],
+ "courses": [
+ {
+ "title": "A1",
+ "level": "A1",
+ "units": [
+ {
+ "title": "Unit",
+ "order_index": 0,
+ "lessons": [
+ {
+ "title": "Lesson",
+ "order_index": 0,
+ "vocabulary": [
+ {
+ "word": "hello",
+ "definition": "A complete greeting definition.",
+ "part_of_speech": "interjection",
+ "difficulty_level": "A1",
+ }
+ ],
+ "exercises": [
+ {
+ "id": "ex-1",
+ "type": "translate",
+ "ui_type": "speaking_repeat",
+ "question": "Repeat hello.",
+ "correct_answer": "hello",
+ }
+ ],
+ }
+ ],
+ }
+ ],
+ }
+ ],
+ }
+ )
+
+ serialized = [
+ item.model_dump(mode="json") for item in artifact.source_manifest
+ ]
+ assert isinstance(serialized[0]["retrieved_at"], str)
+ assert serialized[0]["snapshot_id"].startswith("oewn:2025:")
diff --git a/backend-service/tests/test_content_agent_uploads.py b/backend-service/tests/test_content_agent_uploads.py
new file mode 100644
index 00000000..7c00c997
--- /dev/null
+++ b/backend-service/tests/test_content_agent_uploads.py
@@ -0,0 +1,96 @@
+import json
+
+import pytest
+
+from app.services.content_agent_uploads import (
+ MAX_UPLOAD_BYTES,
+ detect_upload_format,
+ parse_content_upload,
+)
+
+
+def test_csv_upload_normalizes_records_and_reports_row_numbers():
+ parsed = parse_content_upload(
+ "words.csv",
+ (
+ b"word,part_of_speech,cefr_level,definition\n"
+ b"Hello,interjection,A1,A greeting\n"
+ b"Broken,noun,Z9,Invalid level\n"
+ ),
+ )
+
+ assert parsed.records[0]["word"] == "Hello"
+ assert parsed.records[0]["declared_cefr"] == "A1"
+ assert parsed.errors and parsed.errors[0].startswith("Row 3:")
+
+
+def test_json_upload_inherits_admin_owned_source_metadata():
+ content = json.dumps(
+ {
+ "source_name": "admin_upload",
+ "license_mode": "admin_owned",
+ "records": [
+ {
+ "word": "journey",
+ "part_of_speech": "noun",
+ "cefr_level": "A2",
+ }
+ ],
+ }
+ ).encode()
+
+ parsed = parse_content_upload("words.json", content)
+
+ assert parsed.errors == []
+ assert parsed.records[0]["source_name"] == "admin_upload"
+ assert parsed.records[0]["license_mode"] == "admin_owned"
+
+
+def test_upload_cannot_forge_trusted_source_provenance():
+ parsed = parse_content_upload(
+ "words.json",
+ json.dumps(
+ [
+ {
+ "record_id": "existing_cefr:trusted",
+ "source_name": "existing_cefr",
+ "source_url": "https://example.com/forged",
+ "license_mode": "approved_dataset",
+ "content_usage": "label_only",
+ "checksum": "a" * 64,
+ "metadata": {"resource_type": "cefr_label"},
+ "word": "journey",
+ "part_of_speech": "noun",
+ "cefr_level": "A2",
+ }
+ ]
+ ).encode(),
+ )
+
+ record = parsed.records[0]
+ assert record["record_id"].startswith("admin_upload:")
+ assert record["source_name"] == "admin_upload"
+ assert record["source_url"] is None
+ assert record["license_mode"] == "admin_owned"
+ assert record["content_usage"] == "full_text"
+ assert record["checksum"] is None
+ assert record["metadata"] == {}
+
+
+def test_upload_limits_and_extensions_fail_closed():
+ with pytest.raises(ValueError, match="5 MB"):
+ parse_content_upload("words.csv", b"x" * (MAX_UPLOAD_BYTES + 1))
+ with pytest.raises(ValueError, match="CSV and JSON"):
+ parse_content_upload("words.txt", b"word\nhello\n")
+
+
+def test_invalid_encoding_and_malformed_json_fail_closed():
+ with pytest.raises(ValueError, match="valid UTF-8"):
+ parse_content_upload("words.csv", b"\xff\xfe")
+ with pytest.raises(ValueError, match="Invalid JSON at line 1"):
+ parse_content_upload("words.json", b'{"records": [}')
+
+
+def test_upload_format_sniff_rejects_extension_content_mismatch():
+ with pytest.raises(ValueError, match="CSV uploads"):
+ detect_upload_format("words.csv", b'{"records":[]}')
diff --git a/backend-service/tests/test_content_agent_validation.py b/backend-service/tests/test_content_agent_validation.py
new file mode 100644
index 00000000..3bb45b64
--- /dev/null
+++ b/backend-service/tests/test_content_agent_validation.py
@@ -0,0 +1,422 @@
+"""One test per blocking validation gate in content_agent_validation."""
+
+from __future__ import annotations
+
+import copy
+
+from app.services.content_agent_validation import validate_artifact
+
+
+def _base_artifact() -> dict:
+ """Minimal valid artifact that passes all gates."""
+ return {
+ "schema_version": 2,
+ "prompt_version": "cefr-course-v2",
+ "generation_key": "a" * 64,
+ "source_manifest": [
+ {
+ "snapshot_id": f"cefr_j:1.0:{'b' * 64}",
+ "source_name": "cefr_j",
+ "source_version": "1.0",
+ "official_url": "https://github.com/openlanguageprofiles/olp-en-cefrj",
+ "license_id": "LicenseRef-CEFR-J-Commercial",
+ "license_url": "https://lexilingo.me/licenses/cefr-j",
+ "attribution_text": "CEFR-J licensed dataset",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "b" * 64,
+ "normalized_sha256": "c" * 64,
+ "normalized_bytes": 128,
+ "record_checksum_root": "d" * 64,
+ "adapter_version": 1,
+ "record_count": 1,
+ }
+ ],
+ "courses": [
+ {
+ "title": "Test Course A1",
+ "language": "en",
+ "level": "A1",
+ "tags": [],
+ "units": [
+ {
+ "title": "Unit 1",
+ "order_index": 0,
+ "lessons": [
+ {
+ "title": "Lesson 1",
+ "order_index": 0,
+ "estimated_minutes": 10,
+ "xp_reward": 20,
+ "vocabulary": [
+ {
+ "word": "hello",
+ "definition": "A common greeting used to begin a conversation.",
+ "part_of_speech": "interjection",
+ "difficulty_level": "A1",
+ "license_mode": "generated",
+ "source_name": "generated",
+ "topic": "greetings",
+ }
+ ],
+ "exercises": [
+ {
+ "id": "ex-001",
+ "type": "multiple_choice",
+ "ui_type": "multiple_choice",
+ "question": "What does 'hello' mean?",
+ "options": ["A greeting", "Goodbye", "Thank you"],
+ "correct_answer": "A greeting",
+ "difficulty": 1,
+ "points": 10,
+ }
+ ],
+ }
+ ],
+ }
+ ],
+ }
+ ],
+ "quality": {"blocking_errors": [], "warnings": [], "metrics": {}},
+ }
+
+
+def _pins(artifact: dict) -> list[dict]:
+ return [dict(artifact["source_manifest"][0])]
+
+
+def test_valid_artifact_passes_all_gates() -> None:
+ artifact = _base_artifact()
+ report = validate_artifact(artifact, pinned_snapshots=_pins(artifact))
+ assert not report.is_blocking
+ assert report.metrics["course_count"] == 1
+ assert report.metrics["lesson_count"] == 1
+
+
+# --- schema_version gate ---
+
+
+def test_wrong_schema_version_is_blocking() -> None:
+ art = _base_artifact()
+ art["schema_version"] = 1
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "SCHEMA_VERSION" in codes
+
+
+def test_missing_schema_version_is_blocking() -> None:
+ art = _base_artifact()
+ del art["schema_version"]
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "SCHEMA_VERSION" in codes
+
+
+# --- manifest_coverage gate ---
+
+
+def test_empty_manifest_is_blocking() -> None:
+ art = _base_artifact()
+ art["source_manifest"] = []
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "MANIFEST_EMPTY" in codes
+
+
+def test_manifest_non_object_entry_is_blocking() -> None:
+ art = _base_artifact()
+ art["source_manifest"] = ["not-an-object"]
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "MANIFEST_ENTRY_TYPE" in codes
+
+
+def test_manifest_missing_integrity_fields_is_blocking() -> None:
+ art = _base_artifact()
+ del art["source_manifest"][0]["raw_checksum"]
+ report = validate_artifact(art)
+ assert "MANIFEST_FIELDS_MISSING" in {
+ error.code for error in report.blocking_errors
+ }
+
+
+def test_pinned_snapshot_mismatch_is_blocking() -> None:
+ art = _base_artifact()
+ pin = dict(art["source_manifest"][0])
+ pin["raw_checksum"] = "c" * 64
+ report = validate_artifact(art, pinned_snapshots=[pin])
+ assert "PINNED_SNAPSHOT_MISMATCH" in {
+ error.code for error in report.blocking_errors
+ }
+
+
+# --- course level gate ---
+
+
+def test_invalid_course_level_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["level"] = "Z9"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_COURSE_LEVEL" in codes
+
+
+# --- license gate ---
+
+
+def test_unstorable_license_mode_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["license_mode"] = "scraped"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_LICENSE_MODE" in codes
+
+
+def test_generated_vocab_cannot_claim_imported_license_mode() -> None:
+ art = _base_artifact()
+ vocab = art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]
+ vocab["license_mode"] = "approved_dataset"
+ report = validate_artifact(art, pinned_snapshots=_pins(art))
+ assert "GENERATED_LICENSE_MODE_INVALID" in {
+ error.code for error in report.blocking_errors
+ }
+
+
+def test_admin_upload_manifest_must_match_attested_upload() -> None:
+ art = _base_artifact()
+ manifest = {
+ "snapshot_id": f"admin_upload:job:test:{'d' * 64}",
+ "source_name": "admin_upload",
+ "source_version": "job-upload-v1",
+ "official_url": "https://lexilingo.me/admin/content-agent/uploads",
+ "license_id": "LicenseRef-Admin-Owned",
+ "license_url": "https://lexilingo.me/legal/content-upload-rights",
+ "attribution_text": "Administrator-owned or licensed upload",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "b" * 64,
+ "normalized_sha256": "c" * 64,
+ "normalized_bytes": 128,
+ "record_checksum_root": "d" * 64,
+ "adapter_version": 1,
+ "record_count": 1,
+ }
+ art["source_manifest"] = [manifest]
+ vocab = art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]
+ vocab.update(
+ {
+ "source_name": "admin_upload",
+ "license_mode": "admin_owned",
+ "source_version": "job-upload-v1",
+ "source_record_id": "admin_upload:1:hello",
+ "license_id": "LicenseRef-Admin-Owned",
+ "license_url": "https://lexilingo.me/legal/content-upload-rights",
+ "attribution_text": "Administrator-owned or licensed upload",
+ "raw_checksum": "b" * 64,
+ "record_checksum": "1" * 64,
+ "lineage": {
+ "adapter": "admin_upload",
+ "adapter_version": 1,
+ "raw_path": "content-agent-upload/test",
+ },
+ "content_usage": "full_text",
+ }
+ )
+ report = validate_artifact(
+ art,
+ admin_upload={"checksum": "e" * 64, "row_count": 1},
+ )
+ assert "ADMIN_UPLOAD_CHECKSUM_MISMATCH" in {
+ error.code for error in report.blocking_errors
+ }
+
+
+# --- unique lesson orders gate ---
+
+
+def test_duplicate_lesson_order_is_blocking() -> None:
+ art = _base_artifact()
+ lesson = art["courses"][0]["units"][0]["lessons"][0]
+ second = copy.deepcopy(lesson)
+ second["title"] = "Lesson 2 (duplicate order)"
+ art["courses"][0]["units"][0]["lessons"].append(second)
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "DUPLICATE_LESSON_ORDER" in codes
+
+
+# --- definition length gate ---
+
+
+def test_definition_too_short_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["definition"] = "short"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "DEFINITION_TOO_SHORT" in codes
+
+
+def test_definition_empty_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["definition"] = ""
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "DEFINITION_TOO_SHORT" in codes
+
+
+# --- POS enum gate ---
+
+
+def test_invalid_pos_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["part_of_speech"] = "gerund"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_POS" in codes
+
+
+# --- CEFR enum gate ---
+
+
+def test_invalid_vocab_cefr_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["difficulty_level"] = "X1"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_VOCAB_CEFR" in codes
+
+
+# --- URL gate ---
+
+
+def test_invalid_source_url_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["source_url"] = "not-a-url"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_SOURCE_URL" in codes
+
+
+def test_valid_source_url_passes() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["source_url"] = (
+ "https://example.com/word"
+ )
+ report = validate_artifact(art)
+ assert "INVALID_SOURCE_URL" not in {e.code for e in report.blocking_errors}
+
+
+# --- exercise id gate ---
+
+
+def test_missing_exercise_id_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"][0]["id"] = ""
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "EXERCISE_MISSING_ID" in codes
+
+
+def test_duplicate_exercise_id_is_blocking() -> None:
+ art = _base_artifact()
+ lesson = art["courses"][0]["units"][0]["lessons"][0]
+ second_ex = copy.deepcopy(lesson["exercises"][0])
+ second_ex["question"] = "A different question?"
+ lesson["exercises"].append(second_ex)
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "DUPLICATE_EXERCISE_ID" in codes
+
+
+# --- type/ui_type gate ---
+
+
+def test_invalid_exercise_type_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"][0]["type"] = "drag_drop"
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "INVALID_EXERCISE_TYPE" in codes
+
+
+def test_missing_ui_type_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"][0]["ui_type"] = ""
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "MISSING_UI_TYPE" in codes
+
+
+def test_ui_type_must_match_base_exercise_type() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"][0][
+ "ui_type"
+ ] = "dictation"
+ report = validate_artifact(art)
+ assert "EXERCISE_UI_TYPE_MISMATCH" in {
+ error.code for error in report.blocking_errors
+ }
+
+
+# --- options gate ---
+
+
+def test_mc_with_only_one_option_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"][0]["options"] = ["Only one"]
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "MC_INSUFFICIENT_OPTIONS" in codes
+
+
+# --- audio question gate ---
+
+
+def test_audio_exercise_without_question_is_blocking() -> None:
+ art = _base_artifact()
+ ex = art["courses"][0]["units"][0]["lessons"][0]["exercises"][0]
+ ex["audio_url"] = "https://cdn.example.com/audio.mp3"
+ ex["question"] = ""
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "MISSING_AUDIO_QUESTION" in codes
+
+
+# --- counts gate ---
+
+
+def test_lesson_with_no_vocabulary_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"] = []
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "NO_VOCABULARY" in codes
+
+
+def test_lesson_with_no_exercises_is_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["exercises"] = []
+ report = validate_artifact(art)
+ codes = {e.code for e in report.blocking_errors}
+ assert "NO_EXERCISES" in codes
+
+
+# --- translation shape (warning only) ---
+
+
+def test_empty_translation_vi_is_warning_not_blocking() -> None:
+ art = _base_artifact()
+ art["courses"][0]["units"][0]["lessons"][0]["vocabulary"][0]["translation_vi"] = ""
+ report = validate_artifact(art, pinned_snapshots=_pins(art))
+ warn_codes = {w.code for w in report.warnings}
+ assert "EMPTY_TRANSLATION_VI" in warn_codes
+ assert not report.is_blocking
+
+
+# --- metrics ---
+
+
+def test_metrics_counts_are_accurate() -> None:
+ art = _base_artifact()
+ report = validate_artifact(art, pinned_snapshots=_pins(art))
+ assert report.metrics["vocabulary_count"] == 1
+ assert report.metrics["exercise_count"] == 1
+ assert report.metrics["unit_count"] == 1
diff --git a/backend-service/tests/test_content_contract_parity.py b/backend-service/tests/test_content_contract_parity.py
new file mode 100644
index 00000000..a4a2ea21
--- /dev/null
+++ b/backend-service/tests/test_content_contract_parity.py
@@ -0,0 +1,77 @@
+import json
+from pathlib import Path
+from typing import get_args
+
+from app.schemas.content_agent import (
+ CEFRLevel,
+ ContentAgentArtifact,
+ NormalizedVocabularyRecord,
+ PartOfSpeech,
+)
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+CONTRACT_ROOT = REPO_ROOT / "contracts" / "content-agent"
+
+
+def _contract(filename: str) -> dict:
+ return json.loads((CONTRACT_ROOT / filename).read_text(encoding="utf-8"))
+
+
+def test_shared_source_contract_matches_backend_enums_and_strictness():
+ contract = _contract("source-record-v2.schema.json")
+
+ assert set(contract["$defs"]["cefrLevel"]["enum"]) == set(get_args(CEFRLevel))
+ assert set(contract["$defs"]["partOfSpeech"]["enum"]) == set(
+ get_args(PartOfSpeech)
+ )
+ assert contract["additionalProperties"] is False
+ assert {
+ "schema_version",
+ "record_id",
+ "source_name",
+ "source_version",
+ "source_record_id",
+ "source_url",
+ "license_id",
+ "license_url",
+ "attribution_text",
+ "content_usage",
+ "language",
+ "retrieved_at",
+ "raw_checksum",
+ "record_checksum",
+ "lineage",
+ }.issubset(contract["required"])
+ assert NormalizedVocabularyRecord.model_config["extra"] == "forbid"
+
+
+def test_shared_course_artifact_contract_matches_backend_version():
+ contract = _contract("course-artifact-v2.schema.json")
+
+ assert contract["properties"]["schema_version"]["const"] == 2
+ assert contract["properties"]["prompt_version"]["const"] == "cefr-course-v2"
+ assert ContentAgentArtifact.model_fields["schema_version"].default == 2
+ assert (
+ ContentAgentArtifact.model_fields["prompt_version"].default
+ == "cefr-course-v2"
+ )
+ assert ContentAgentArtifact.model_config["extra"] == "forbid"
+ manifest = contract["properties"]["source_manifest"]
+ assert manifest["minItems"] == 1
+ assert manifest["items"]["$ref"] == "#/$defs/sourceManifest"
+ assert contract["$defs"]["sourceManifest"]["additionalProperties"] is False
+ assert {
+ "raw_checksum",
+ "normalized_sha256",
+ "normalized_bytes",
+ "record_checksum_root",
+ }.issubset(contract["$defs"]["sourceManifest"]["required"])
+
+
+def test_shared_exercise_mapping_contains_every_supported_base_type():
+ exercise_contract = _contract("exercise-types-v1.json")
+ artifact_contract = _contract("course-artifact-v2.schema.json")
+
+ assert set(exercise_contract["base_types"]) == set(
+ artifact_contract["$defs"]["exercise"]["properties"]["type"]["enum"]
+ )
diff --git a/backend-service/tests/test_fsrs_algorithm_correctness.py b/backend-service/tests/test_fsrs_algorithm_correctness.py
new file mode 100644
index 00000000..8e955f82
--- /dev/null
+++ b/backend-service/tests/test_fsrs_algorithm_correctness.py
@@ -0,0 +1,422 @@
+"""
+FSRS Algorithm Correctness Tests
+
+Verifies both the FSRS-inspired scheduling (calculate_fsrs_review) and
+the SM-2 scheduling (calculate_next_review) against hand-computed reference
+values derived directly from the algorithm formulas.
+
+Notation:
+ S = stability D = difficulty R = retrievability
+ q = quality (0–5) t = elapsed days
+"""
+
+import math
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from app.crud.vocabulary import VocabularyCRUD
+
+NOW = datetime(2026, 6, 15, 9, 0, 0, tzinfo=timezone.utc)
+crud = VocabularyCRUD()
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helper
+# ─────────────────────────────────────────────────────────────────────────────
+
+def fsrs(
+ quality: int,
+ stability: float = 0.0,
+ difficulty: float = 0.0,
+ scheduled_days: int = 0,
+ reps: int = 0,
+ lapses: int = 0,
+ last_review_days_ago: int | None = None,
+) -> dict:
+ last = (NOW - timedelta(days=last_review_days_ago)) if last_review_days_ago is not None else None
+ return crud.calculate_fsrs_review(
+ quality=quality,
+ stability=stability,
+ difficulty=difficulty,
+ scheduled_days=scheduled_days,
+ reps=reps,
+ lapses=lapses,
+ fsrs_last_review=last,
+ sm2_last_review=None,
+ now=NOW,
+ )
+
+
+def sm2(quality: int, ease_factor: float = 2.5, interval: int = 1, repetitions: int = 0) -> tuple:
+ return crud.calculate_next_review(quality, ease_factor, interval, repetitions)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Initial card (reps=0) — stability lookup table
+# ─────────────────────────────────────────────────────────────────────────────
+
+INITIAL_STABILITY = {0: 0.4, 1: 0.6, 2: 1.0, 3: 2.4, 4: 3.8, 5: 5.8}
+
+
+@pytest.mark.parametrize("quality,expected_s", INITIAL_STABILITY.items())
+def test_fsrs_initial_stability_lookup(quality, expected_s):
+ r = fsrs(quality)
+ assert r["fsrs_stability"] == expected_s, (
+ f"q={quality}: expected S={expected_s}, got {r['fsrs_stability']}"
+ )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Initial card — difficulty formula D = clamp(7.0 - (q-3)*0.8, 1, 10)
+# ─────────────────────────────────────────────────────────────────────────────
+
+INITIAL_DIFFICULTY = {
+ 0: 9.4, # clamp(7.0 - (-3)*0.8) = clamp(9.4)
+ 1: 8.6, # clamp(7.0 - (-2)*0.8) = clamp(8.6)
+ 2: 7.8,
+ 3: 7.0,
+ 4: 6.2,
+ 5: 5.4,
+}
+
+
+@pytest.mark.parametrize("quality,expected_d", INITIAL_DIFFICULTY.items())
+def test_fsrs_initial_difficulty_formula(quality, expected_d):
+ r = fsrs(quality)
+ assert round(r["fsrs_difficulty"], 4) == pytest.approx(expected_d, abs=1e-9), (
+ f"q={quality}: expected D={expected_d}, got {r['fsrs_difficulty']}"
+ )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Initial card — reps/lapses bookkeeping
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_initial_correct_increments_reps():
+ r = fsrs(4)
+ assert r["fsrs_reps"] == 1
+ assert r["fsrs_lapses"] == 0
+
+
+def test_fsrs_initial_fail_increments_lapses():
+ r = fsrs(1)
+ assert r["fsrs_reps"] == 1
+ assert r["fsrs_lapses"] == 1
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Initial card — state machine
+# State 1 = learning, 2 = review, 3 = relearning
+# ─────────────────────────────────────────────────────────────────────────────
+
+@pytest.mark.parametrize("quality,expected_state", [
+ (0, 1), # fail on new card → learning
+ (1, 1),
+ (2, 1),
+ (3, 2), # stability=2.4 ≥ 2 → review
+ (4, 2),
+ (5, 2),
+])
+def test_fsrs_initial_state(quality, expected_state):
+ r = fsrs(quality)
+ assert r["fsrs_state"] == expected_state, (
+ f"q={quality}: expected state={expected_state}, got {r['fsrs_state']}"
+ )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Initial card — scheduled days
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_initial_fail_schedules_1_day():
+ for q in (0, 1, 2):
+ r = fsrs(q)
+ assert r["fsrs_scheduled_days"] == 1, f"q={q}: expected 1 day, got {r['fsrs_scheduled_days']}"
+
+
+def test_fsrs_initial_good_schedules_ceil_stability():
+ # q=3: S=2.4 → ceil=3, no quality==5 boost
+ r = fsrs(3)
+ assert r["fsrs_scheduled_days"] == 3
+
+
+def test_fsrs_initial_easy_schedules_ceil_stability():
+ # q=4: S=3.8 → ceil=4
+ r = fsrs(4)
+ assert r["fsrs_scheduled_days"] == 4
+
+
+def test_fsrs_initial_perfect_gets_stability_bonus():
+ # q=5: S=5.8 → ceil=6, bonus=ceil(5.8*1.15)=ceil(6.67)=7 → max(6,7)=7
+ r = fsrs(5)
+ assert r["fsrs_scheduled_days"] == 7
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: next_review_date = now + scheduled_days
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_next_review_date_matches_scheduled_days():
+ r = fsrs(4)
+ expected = NOW + timedelta(days=r["fsrs_scheduled_days"])
+ assert r["next_review_date"] == expected
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: elapsed_days tracking
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_elapsed_days_computed_from_last_review():
+ r = fsrs(4, stability=3.8, difficulty=6.2, reps=1, last_review_days_ago=5)
+ assert r["fsrs_elapsed_days"] == 5
+
+
+def test_fsrs_elapsed_days_is_zero_for_new_card():
+ r = fsrs(4)
+ assert r["fsrs_elapsed_days"] == 0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Retrievability and stability growth on correct review
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _expected_retrievability(elapsed: int, stability: float) -> float:
+ decay = -0.5
+ factor = 19 / 81
+ r = (1 + factor * elapsed / stability) ** decay
+ return max(0.01, min(1.0, r))
+
+
+def test_fsrs_perfect_review_grows_stability():
+ # After 5 days with S=4.0, D=5.0 → stability should increase
+ r = fsrs(5, stability=4.0, difficulty=5.0, reps=1, last_review_days_ago=5)
+ assert r["fsrs_stability"] > 4.0
+
+
+def test_fsrs_easy_review_grows_stability():
+ r = fsrs(4, stability=4.0, difficulty=5.0, reps=1, last_review_days_ago=4)
+ assert r["fsrs_stability"] > 4.0
+
+
+def test_fsrs_good_review_grows_stability_modestly():
+ r = fsrs(3, stability=4.0, difficulty=5.0, scheduled_days=4, reps=1, last_review_days_ago=4)
+ # growth but limited: scheduled_days capped to current_scheduled_days+1=5
+ assert r["fsrs_stability"] > 4.0
+ assert r["fsrs_scheduled_days"] <= 5
+
+
+def test_fsrs_failed_review_reduces_stability_to_55_percent():
+ # S_new = max(0.5, S*0.55)
+ r = fsrs(1, stability=8.0, difficulty=5.0, reps=2, last_review_days_ago=6)
+ expected = max(0.5, 8.0 * 0.55)
+ assert r["fsrs_stability"] == pytest.approx(expected, abs=1e-4)
+
+
+def test_fsrs_failed_review_stability_floor_is_0_5():
+ r = fsrs(0, stability=0.6, difficulty=9.0, reps=1, last_review_days_ago=1)
+ assert r["fsrs_stability"] >= 0.5
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Lapse / state machine on subsequent reviews
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_failed_subsequent_review_is_relearning_state():
+ # reps>0, quality<3 → state=3 (relearning)
+ r = fsrs(2, stability=5.0, difficulty=5.0, reps=3, last_review_days_ago=5)
+ assert r["fsrs_state"] == 3
+ assert r["fsrs_lapses"] == 1
+
+
+def test_fsrs_failed_subsequent_review_schedules_1_day():
+ r = fsrs(0, stability=10.0, difficulty=5.0, reps=5, last_review_days_ago=10)
+ assert r["fsrs_scheduled_days"] == 1
+
+
+def test_fsrs_correct_review_high_stability_is_review_state():
+ r = fsrs(4, stability=5.0, difficulty=5.0, reps=2, last_review_days_ago=5)
+ assert r["fsrs_state"] == 2 # review state
+
+
+def test_fsrs_correct_review_low_stability_is_learning_state():
+ # S_new just under 2 → learning
+ r = fsrs(3, stability=1.5, difficulty=7.0, scheduled_days=2, reps=1, last_review_days_ago=2)
+ # new_stability ≥ initial (1.5) since quality=3, but still may be <2
+ assert r["fsrs_state"] in (1, 2) # depends on growth
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: Difficulty mean-reversion
+# D_new = clamp(D - (q-3)*0.3 + (4-D)*0.05, 1, 10)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_difficulty_decreases_on_perfect():
+ # D=7, q=5: D_new = clamp(7 - (5-3)*0.3 + (4-7)*0.05) = clamp(7-0.6-0.15) = 6.25
+ r = fsrs(5, stability=4.0, difficulty=7.0, reps=1, last_review_days_ago=4)
+ assert r["fsrs_difficulty"] < 7.0
+
+
+def test_fsrs_difficulty_increases_on_failure():
+ # D=5, q=0: D_new = clamp(5 - (0-3)*0.3 + (4-5)*0.05) = clamp(5+0.9-0.05) = 5.85
+ r = fsrs(0, stability=4.0, difficulty=5.0, reps=1, last_review_days_ago=4)
+ assert r["fsrs_difficulty"] > 5.0
+
+
+def test_fsrs_difficulty_floor_is_1():
+ # Very easy card D=1, perfect → clamped to 1
+ r = fsrs(5, stability=10.0, difficulty=1.0, reps=3, last_review_days_ago=10)
+ assert r["fsrs_difficulty"] >= 1.0
+
+
+def test_fsrs_difficulty_ceiling_is_10():
+ # Very hard card D=9.5, blackout → clamped to 10
+ r = fsrs(0, stability=1.0, difficulty=9.5, reps=2, last_review_days_ago=1)
+ assert r["fsrs_difficulty"] <= 10.0
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: quality=3 conservative cap (scheduled_days ≤ current_scheduled + 1)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_quality3_respects_conservative_cap():
+ # Existing card with scheduled_days=5; quality=3 must not jump more than +1
+ r = fsrs(3, stability=20.0, difficulty=5.0, scheduled_days=5, reps=3, last_review_days_ago=5)
+ assert r["fsrs_scheduled_days"] <= 6
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: absolute maximum scheduled days cap (36 500 ≈ 100 years)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_scheduled_days_never_exceed_36500():
+ r = fsrs(5, stability=99999.0, difficulty=1.0, reps=100, last_review_days_ago=99999)
+ assert r["fsrs_scheduled_days"] <= 36500
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# FSRS: quality boundary clamping (outside 0–5 is clamped)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_fsrs_quality_below_zero_clamped_to_zero():
+ r_neg = crud.calculate_fsrs_review(
+ quality=-1, stability=0.0, difficulty=0.0,
+ scheduled_days=0, reps=0, lapses=0,
+ fsrs_last_review=None, sm2_last_review=None, now=NOW,
+ )
+ r_zero = fsrs(0)
+ assert r_neg["fsrs_stability"] == r_zero["fsrs_stability"]
+
+
+def test_fsrs_quality_above_five_clamped_to_five():
+ r_high = crud.calculate_fsrs_review(
+ quality=9, stability=0.0, difficulty=0.0,
+ scheduled_days=0, reps=0, lapses=0,
+ fsrs_last_review=None, sm2_last_review=None, now=NOW,
+ )
+ r_five = fsrs(5)
+ assert r_high["fsrs_stability"] == r_five["fsrs_stability"]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# SM-2: calculate_next_review correctness
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_sm2_first_correct_gives_interval_1():
+ ef, interval, reps, _ = sm2(quality=4, ease_factor=2.5, interval=1, repetitions=0)
+ assert interval == 1
+ assert reps == 1
+
+
+def test_sm2_second_correct_gives_interval_6():
+ ef, interval, reps, _ = sm2(quality=4, ease_factor=2.5, interval=1, repetitions=1)
+ assert interval == 6
+ assert reps == 2
+
+
+def test_sm2_third_correct_multiplies_by_ease_factor():
+ # interval=6, EF=2.5 → int(6*2.5) = 15
+ ef, interval, reps, _ = sm2(quality=5, ease_factor=2.5, interval=6, repetitions=2)
+ assert interval == 15
+ assert reps == 3
+
+
+def test_sm2_failed_review_resets_interval_and_reps():
+ ef, interval, reps, _ = sm2(quality=2, ease_factor=2.5, interval=15, repetitions=3)
+ assert interval == 1
+ assert reps == 0
+
+
+def test_sm2_perfect_score_increases_ease_factor():
+ ef, _, _, _ = sm2(quality=5, ease_factor=2.5, interval=1, repetitions=0)
+ # EF + 0.1 - (5-5)*(0.08+0) = 2.5+0.1 = 2.6
+ assert ef == pytest.approx(2.6, abs=1e-9)
+
+
+def test_sm2_easy_score_keeps_ease_factor():
+ ef, _, _, _ = sm2(quality=4, ease_factor=2.5, interval=1, repetitions=0)
+ # EF + 0.1 - 1*(0.08+1*0.02) = 2.5+0.1-0.1 = 2.5
+ assert ef == pytest.approx(2.5, abs=1e-9)
+
+
+def test_sm2_good_score_decreases_ease_factor():
+ ef, _, _, _ = sm2(quality=3, ease_factor=2.5, interval=1, repetitions=0)
+ # EF + 0.1 - 2*(0.08+2*0.02) = 2.5+0.1-2*0.12 = 2.5+0.1-0.24 = 2.36
+ assert ef == pytest.approx(2.36, abs=1e-9)
+
+
+def test_sm2_hard_score_significantly_decreases_ease_factor():
+ ef, _, _, _ = sm2(quality=2, ease_factor=2.5, interval=1, repetitions=0)
+ # EF + 0.1 - 3*(0.08+3*0.02) = 2.5+0.1-3*0.14 = 2.5+0.1-0.42 = 2.18
+ assert ef == pytest.approx(2.18, abs=1e-9)
+
+
+def test_sm2_ease_factor_floor_is_1_3():
+ # Keep failing until EF would go below 1.3
+ ef, _, _, _ = sm2(quality=0, ease_factor=1.35, interval=1, repetitions=0)
+ assert ef >= 1.3
+
+
+def test_sm2_long_session_schedule_progression():
+ """3-review session at q=5: EF compounds (2.5→2.6→2.7), so 3rd interval=int(6*2.7)=16."""
+ ef, interval, reps, _ = sm2(5, 2.5, 1, 0) # session 1: interval=1, ef=2.6
+ assert interval == 1
+ ef2, interval, reps, _ = sm2(5, ef, interval, reps) # session 2: interval=6, ef=2.7
+ assert interval == 6
+ _, interval3, _, _ = sm2(5, ef2, interval, reps) # session 3: interval=int(6*2.7)=16
+ assert interval3 == int(6 * ef2) # exact: 16
+
+
+def test_sm2_next_review_date_is_in_future():
+ _, interval, _, next_date = sm2(5, 2.5, 1, 0)
+ assert next_date > datetime.now(timezone.utc)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# VocabularyCRUD: determine_status helper
+# ─────────────────────────────────────────────────────────────────────────────
+
+def test_status_is_mastered_when_high_ef_and_long_interval():
+ status = crud.determine_status(ease_factor=2.5, interval=21, repetitions=5)
+ from app.models.vocabulary import VocabularyStatus
+ assert status == VocabularyStatus.MASTERED
+
+
+def test_status_is_reviewing_when_enough_reps():
+ status = crud.determine_status(ease_factor=2.0, interval=5, repetitions=3)
+ from app.models.vocabulary import VocabularyStatus
+ assert status == VocabularyStatus.REVIEWING
+
+
+def test_status_is_learning_for_new_words():
+ status = crud.determine_status(ease_factor=2.5, interval=1, repetitions=0)
+ from app.models.vocabulary import VocabularyStatus
+ assert status == VocabularyStatus.LEARNING
+
+
+def test_status_needs_both_ef_and_interval_for_mastered():
+ from app.models.vocabulary import VocabularyStatus
+ # High EF but short interval → not mastered
+ assert crud.determine_status(2.5, 5, 5) != VocabularyStatus.MASTERED
+ # Long interval but low EF → not mastered
+ assert crud.determine_status(2.0, 30, 5) != VocabularyStatus.MASTERED
diff --git a/backend-service/tests/test_notification_campaign_apply.py b/backend-service/tests/test_notification_campaign_apply.py
new file mode 100644
index 00000000..dae7ab1c
--- /dev/null
+++ b/backend-service/tests/test_notification_campaign_apply.py
@@ -0,0 +1,250 @@
+"""Unit tests for NotificationCampaignApplyService (mocked segments and senders)."""
+
+from __future__ import annotations
+
+import uuid
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from app.services.notification_campaign.apply import NotificationCampaignApplyService
+from app.services.notification_campaign.sender import SendResult
+from app.services.notification_campaign.segmenter import SegmentResult
+
+
+def _make_job(
+ job_type: str = "targeted_push",
+ title: str = "Learn today!",
+ body: str = "Open the app.",
+ artifact: dict | None = None,
+) -> SimpleNamespace:
+ return SimpleNamespace(
+ id=uuid.uuid4(),
+ job_type=job_type,
+ status="preview_ready",
+ config={
+ "content": {"title": title, "body": body, "notification_type": "campaign"},
+ "audience": {"type": "all", "filters": {}},
+ },
+ artifact=artifact,
+ )
+
+
+def _make_segment(user_count: int = 5, token_count: int = 5) -> SegmentResult:
+ user_ids = [str(uuid.uuid4()) for _ in range(user_count)]
+ fcm_map = {uid: [f"tok-{i}"] for i, uid in enumerate(user_ids[:token_count])}
+ return SegmentResult(
+ user_ids=user_ids,
+ fcm_token_map=fcm_map,
+ audience_size=user_count,
+ sample_users=[],
+ filter_summary={},
+ )
+
+
+@pytest.mark.asyncio
+async def test_targeted_push_delegates_to_push_sender() -> None:
+ job = _make_job("targeted_push")
+ segment = _make_segment(user_count=3, token_count=3)
+ send_result = SendResult(sent=3, failed=0, skipped=0)
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_push",
+ new=AsyncMock(return_value=send_result),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ new=AsyncMock(),
+ ),
+ ):
+ result = await NotificationCampaignApplyService.apply(db, job)
+
+ assert result["sent"] == 3
+ assert result["failed"] == 0
+ assert result["total"] == 3
+
+
+@pytest.mark.asyncio
+async def test_in_app_broadcast_delegates_to_in_app_sender() -> None:
+ job = _make_job("in_app_broadcast")
+ segment = _make_segment(user_count=10, token_count=0)
+ send_result = SendResult(sent=10, failed=0, skipped=0)
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_in_app",
+ new=AsyncMock(return_value=send_result),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ new=AsyncMock(),
+ ),
+ ):
+ result = await NotificationCampaignApplyService.apply(db, job)
+
+ assert result["sent"] == 10
+ assert result["total"] == 10
+
+
+@pytest.mark.asyncio
+async def test_scheduled_push_delegates_to_push_sender() -> None:
+ job = _make_job("scheduled_push")
+ segment = _make_segment(user_count=2)
+ send_result = SendResult(sent=2, failed=0, skipped=0)
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_push",
+ new=AsyncMock(return_value=send_result),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ new=AsyncMock(),
+ ),
+ ):
+ result = await NotificationCampaignApplyService.apply(db, job)
+
+ assert result["sent"] == 2
+
+
+@pytest.mark.asyncio
+async def test_unknown_job_type_raises_value_error() -> None:
+ job = _make_job("sms_blast")
+ segment = _make_segment()
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ ):
+ with pytest.raises(ValueError, match="Unknown job_type"):
+ await NotificationCampaignApplyService.apply(db, job)
+
+
+@pytest.mark.asyncio
+async def test_ai_copy_overrides_config_title_and_body() -> None:
+ job = _make_job(
+ "targeted_push",
+ title="Original title",
+ body="Original body",
+ artifact={
+ "ai_copy": {
+ "title": "AI-generated title",
+ "body": "AI-generated body",
+ }
+ },
+ )
+ segment = _make_segment(user_count=1)
+ send_result = SendResult(sent=1, failed=0, skipped=0)
+
+ captured_kwargs: list[dict] = []
+
+ async def fake_push(**kwargs):
+ captured_kwargs.append(kwargs)
+ return send_result
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_push",
+ side_effect=fake_push,
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ new=AsyncMock(),
+ ),
+ ):
+ await NotificationCampaignApplyService.apply(db, job)
+
+ assert captured_kwargs[0]["title"] == "AI-generated title"
+ assert captured_kwargs[0]["body"] == "AI-generated body"
+
+
+@pytest.mark.asyncio
+async def test_apply_total_equals_sent_plus_failed_plus_skipped() -> None:
+ job = _make_job("targeted_push")
+ segment = _make_segment(user_count=10, token_count=7)
+ send_result = SendResult(sent=5, failed=2, skipped=3)
+
+ db = AsyncMock()
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_push",
+ new=AsyncMock(return_value=send_result),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ new=AsyncMock(),
+ ),
+ ):
+ result = await NotificationCampaignApplyService.apply(db, job)
+
+ assert result["total"] == result["sent"] + result["failed"] + result["skipped"]
+ assert result["total"] == 10
+
+
+@pytest.mark.asyncio
+async def test_apply_calls_set_delivery_stats_with_correct_values() -> None:
+ job = _make_job("in_app_broadcast")
+ segment = _make_segment(user_count=4, token_count=0)
+ send_result = SendResult(sent=4, failed=0, skipped=0)
+
+ db = AsyncMock()
+ captured_stats: list[dict] = []
+
+ async def fake_set_stats(db, job, *, sent, failed, skipped):
+ captured_stats.append({"sent": sent, "failed": failed, "skipped": skipped})
+
+ with (
+ patch(
+ "app.services.notification_campaign.apply.segment_users",
+ new=AsyncMock(return_value=segment),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.send_campaign_in_app",
+ new=AsyncMock(return_value=send_result),
+ ),
+ patch(
+ "app.services.notification_campaign.apply.NotificationCampaignJobService.set_delivery_stats",
+ side_effect=fake_set_stats,
+ ),
+ ):
+ await NotificationCampaignApplyService.apply(db, job)
+
+ assert len(captured_stats) == 1
+ assert captured_stats[0]["sent"] == 4
+ assert captured_stats[0]["failed"] == 0
+ assert captured_stats[0]["skipped"] == 0
diff --git a/backend-service/tests/test_notification_campaign_jobs.py b/backend-service/tests/test_notification_campaign_jobs.py
new file mode 100644
index 00000000..1c44757b
--- /dev/null
+++ b/backend-service/tests/test_notification_campaign_jobs.py
@@ -0,0 +1,254 @@
+"""State machine tests for NotificationCampaignJobService."""
+
+from __future__ import annotations
+
+import uuid
+from types import SimpleNamespace
+
+import pytest
+
+from app.services.notification_campaign_jobs import (
+ ACTIVE_STATUSES,
+ ALLOWED_TRANSITIONS,
+ TERMINAL_STATUSES,
+ NotificationCampaignJobService,
+)
+
+
+def _make_job(status: str = "queued") -> SimpleNamespace:
+ return SimpleNamespace(
+ id=uuid.uuid4(),
+ status=status,
+ progress={"stage": status, "percent": 0, "counters": {}},
+ updated_at=None,
+ started_at=None,
+ completed_at=None,
+ artifact=None,
+ warnings=[],
+ blocking_errors=[],
+ error_message=None,
+ delivery_stats={},
+ )
+
+
+class FakeDB:
+ def __init__(self) -> None:
+ self.flushed: int = 0
+
+ async def flush(self) -> None:
+ self.flushed += 1
+
+
+# ---------------------------------------------------------------------------
+# Status set invariants
+# ---------------------------------------------------------------------------
+
+
+def test_active_and_terminal_statuses_are_disjoint() -> None:
+ assert ACTIVE_STATUSES.isdisjoint(TERMINAL_STATUSES)
+
+
+def test_all_allowed_transition_sources_covered() -> None:
+ all_statuses = ACTIVE_STATUSES | TERMINAL_STATUSES
+ for source in ALLOWED_TRANSITIONS:
+ assert source in all_statuses, f"{source!r} not in known statuses"
+
+
+# ---------------------------------------------------------------------------
+# transition() — valid moves
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_transition_queued_to_segmenting() -> None:
+ job = _make_job("queued")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "segmenting", percent=10)
+ assert job.status == "segmenting"
+ assert job.progress["percent"] == 10
+ assert job.started_at is not None
+
+
+@pytest.mark.asyncio
+async def test_transition_segmenting_to_generating() -> None:
+ job = _make_job("segmenting")
+ job.started_at = "set"
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "generating", percent=30)
+ assert job.status == "generating"
+
+
+@pytest.mark.asyncio
+async def test_transition_generating_to_validating() -> None:
+ job = _make_job("generating")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "validating", percent=60)
+ assert job.status == "validating"
+
+
+@pytest.mark.asyncio
+async def test_transition_validating_to_preview_ready() -> None:
+ job = _make_job("validating")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "preview_ready", percent=100)
+ assert job.status == "preview_ready"
+
+
+@pytest.mark.asyncio
+async def test_transition_preview_ready_to_sending() -> None:
+ job = _make_job("preview_ready")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "sending", percent=0)
+ assert job.status == "sending"
+
+
+@pytest.mark.asyncio
+async def test_transition_sending_to_completed_sets_completed_at() -> None:
+ job = _make_job("sending")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "completed")
+ assert job.status == "completed"
+ assert job.completed_at is not None
+
+
+# ---------------------------------------------------------------------------
+# transition() — invalid moves
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_transition_completed_to_calculating_raises() -> None:
+ job = _make_job("completed")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Cannot transition"):
+ await NotificationCampaignJobService.transition(db, job, "segmenting")
+
+
+@pytest.mark.asyncio
+async def test_transition_queued_to_completed_directly_raises() -> None:
+ job = _make_job("queued")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Cannot transition"):
+ await NotificationCampaignJobService.transition(db, job, "completed")
+
+
+# ---------------------------------------------------------------------------
+# set_preview()
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_set_preview_stores_artifact_and_moves_to_preview_ready() -> None:
+ job = _make_job("validating")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_preview(
+ db,
+ job,
+ artifact={"audience_size": 42, "sample_users": []},
+ warnings=["low audience"],
+ blocking_errors=[],
+ )
+ assert job.status == "preview_ready"
+ assert job.artifact == {"audience_size": 42, "sample_users": []}
+ assert job.warnings == ["low audience"]
+ assert job.blocking_errors == []
+
+
+@pytest.mark.asyncio
+async def test_set_preview_stores_blocking_errors() -> None:
+ job = _make_job("validating")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_preview(
+ db,
+ job,
+ artifact={},
+ warnings=[],
+ blocking_errors=["no FCM tokens"],
+ )
+ assert job.blocking_errors == ["no FCM tokens"]
+
+
+# ---------------------------------------------------------------------------
+# set_failed()
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_set_failed_from_segmenting() -> None:
+ job = _make_job("segmenting")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_failed(db, job, "DB timeout")
+ assert job.status == "failed"
+ assert job.error_message == "DB timeout"
+ assert job.completed_at is not None
+
+
+@pytest.mark.asyncio
+async def test_set_failed_from_sending() -> None:
+ job = _make_job("sending")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_failed(db, job, "FCM unreachable")
+ assert job.status == "failed"
+
+
+# ---------------------------------------------------------------------------
+# cancel via transition()
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_cancel_from_queued() -> None:
+ job = _make_job("queued")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "cancelled")
+ assert job.status == "cancelled"
+ assert job.completed_at is not None
+
+
+@pytest.mark.asyncio
+async def test_cancel_from_completed_raises() -> None:
+ job = _make_job("completed")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Cannot transition"):
+ await NotificationCampaignJobService.transition(db, job, "cancelled")
+
+
+# ---------------------------------------------------------------------------
+# retry — failed → queued
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_retry_from_failed_to_queued() -> None:
+ job = _make_job("failed")
+ db = FakeDB()
+ await NotificationCampaignJobService.transition(db, job, "queued")
+ assert job.status == "queued"
+
+
+# ---------------------------------------------------------------------------
+# set_delivery_stats()
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_set_delivery_stats_stores_values() -> None:
+ job = _make_job("sending")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_delivery_stats(
+ db, job, sent=100, failed=3, skipped=7
+ )
+ assert job.delivery_stats["sent"] == 100
+ assert job.delivery_stats["failed"] == 3
+ assert job.delivery_stats["skipped"] == 7
+ assert job.delivery_stats["total"] == 110
+
+
+@pytest.mark.asyncio
+async def test_set_delivery_stats_zero_is_valid() -> None:
+ job = _make_job("sending")
+ db = FakeDB()
+ await NotificationCampaignJobService.set_delivery_stats(
+ db, job, sent=0, failed=0, skipped=0
+ )
+ assert job.delivery_stats["total"] == 0
diff --git a/backend-service/tests/test_notification_campaign_schemas.py b/backend-service/tests/test_notification_campaign_schemas.py
new file mode 100644
index 00000000..89582c5e
--- /dev/null
+++ b/backend-service/tests/test_notification_campaign_schemas.py
@@ -0,0 +1,160 @@
+"""Schema validation tests for the Notification Campaign Agent."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+
+import pytest
+from pydantic import ValidationError
+
+from app.schemas.notification_campaign import (
+ AudienceFilters,
+ NotificationCampaignJobCreate,
+ NotificationContent,
+)
+
+
+# ---------------------------------------------------------------------------
+# AudienceFilters
+# ---------------------------------------------------------------------------
+
+
+def test_audience_filters_defaults_require_fcm() -> None:
+ filters = AudienceFilters()
+ assert filters.has_fcm_token is True
+ assert filters.leagues is None
+ assert filters.cefr_levels is None
+
+
+def test_audience_filters_accepts_valid_leagues() -> None:
+ filters = AudienceFilters(leagues=["bronze", "gold", "master"])
+ assert filters.leagues == ["bronze", "gold", "master"]
+
+
+def test_audience_filters_rejects_unknown_league() -> None:
+ with pytest.raises(ValidationError, match="Unknown leagues"):
+ AudienceFilters(leagues=["diamond"])
+
+
+def test_audience_filters_accepts_valid_cefr_levels() -> None:
+ filters = AudienceFilters(cefr_levels=["A1", "B2"])
+ assert filters.cefr_levels == ["A1", "B2"]
+
+
+def test_audience_filters_rejects_unknown_cefr_level() -> None:
+ with pytest.raises(ValidationError, match="Unknown CEFR levels"):
+ AudienceFilters(cefr_levels=["Z9"])
+
+
+def test_audience_filters_inactive_days_minimum_one() -> None:
+ with pytest.raises(ValidationError):
+ AudienceFilters(inactive_days=0)
+
+
+def test_audience_filters_inactive_days_maximum_365() -> None:
+ with pytest.raises(ValidationError):
+ AudienceFilters(inactive_days=366)
+
+
+def test_audience_filters_min_streak_cannot_be_negative() -> None:
+ with pytest.raises(ValidationError):
+ AudienceFilters(min_streak=-1)
+
+
+# ---------------------------------------------------------------------------
+# NotificationContent
+# ---------------------------------------------------------------------------
+
+
+def test_notification_content_requires_title() -> None:
+ with pytest.raises(ValidationError):
+ NotificationContent(title="", body="Something")
+
+
+def test_notification_content_requires_body() -> None:
+ with pytest.raises(ValidationError):
+ NotificationContent(title="Hello", body="")
+
+
+def test_notification_content_title_max_100_chars() -> None:
+ with pytest.raises(ValidationError):
+ NotificationContent(title="x" * 101, body="body text")
+
+
+def test_notification_content_body_max_300_chars() -> None:
+ with pytest.raises(ValidationError):
+ NotificationContent(title="title", body="x" * 301)
+
+
+def test_notification_content_defaults() -> None:
+ content = NotificationContent(title="Learn today!", body="New lesson available.")
+ assert content.notification_type == "campaign"
+ assert content.deep_link is None
+ assert content.use_ai_copy is False
+
+
+# ---------------------------------------------------------------------------
+# NotificationCampaignJobCreate
+# ---------------------------------------------------------------------------
+
+
+def test_targeted_push_job_create_valid() -> None:
+ payload = NotificationCampaignJobCreate(
+ job_type="targeted_push",
+ config={
+ "audience": {"type": "all"},
+ "content": {"title": "Boost XP!", "body": "Double XP for next 24 hours!"},
+ },
+ )
+ assert payload.job_type == "targeted_push"
+
+
+def test_in_app_broadcast_job_create_valid() -> None:
+ payload = NotificationCampaignJobCreate(
+ job_type="in_app_broadcast",
+ config={
+ "audience": {"type": "segment", "filters": {"cefr_levels": ["A1", "A2"]}},
+ "content": {"title": "Tip", "body": "Use flashcards daily!"},
+ },
+ )
+ assert payload.job_type == "in_app_broadcast"
+
+
+def test_scheduled_push_job_create_requires_send_at() -> None:
+ with pytest.raises(ValidationError):
+ NotificationCampaignJobCreate(
+ job_type="scheduled_push",
+ config={
+ "audience": {"type": "all"},
+ "content": {"title": "Hi", "body": "Reminder!"},
+ },
+ )
+
+
+def test_scheduled_push_job_create_valid() -> None:
+ send_at = datetime(2026, 12, 1, 9, 0, tzinfo=timezone.utc)
+ payload = NotificationCampaignJobCreate(
+ job_type="scheduled_push",
+ config={
+ "audience": {"type": "all"},
+ "content": {"title": "Hi", "body": "Reminder!"},
+ "send_at": send_at.isoformat(),
+ },
+ )
+ assert payload.job_type == "scheduled_push"
+
+
+def test_unknown_job_type_raises() -> None:
+ with pytest.raises(ValidationError, match="Unknown job_type"):
+ NotificationCampaignJobCreate(
+ job_type="bulk_sms",
+ config={},
+ )
+
+
+def test_targeted_push_missing_content_raises() -> None:
+ with pytest.raises(ValidationError):
+ NotificationCampaignJobCreate(
+ job_type="targeted_push",
+ config={"audience": {"type": "all"}},
+ )
diff --git a/backend-service/tests/test_notification_campaign_sender.py b/backend-service/tests/test_notification_campaign_sender.py
new file mode 100644
index 00000000..50f68550
--- /dev/null
+++ b/backend-service/tests/test_notification_campaign_sender.py
@@ -0,0 +1,234 @@
+"""Tests for notification campaign FCM and in-app senders."""
+
+from __future__ import annotations
+
+import uuid
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from app.services.notification_campaign.sender import (
+ SendResult,
+ send_campaign_in_app,
+ send_campaign_push,
+)
+
+
+# ---------------------------------------------------------------------------
+# send_campaign_push — no FCM tokens
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_push_with_empty_token_map_returns_all_skipped() -> None:
+ result = await send_campaign_push(
+ fcm_token_map={},
+ title="Hey!",
+ body="Come back and learn.",
+ )
+ assert isinstance(result, SendResult)
+ assert result.sent == 0
+ assert result.failed == 0
+ assert result.skipped == 0
+
+
+@pytest.mark.asyncio
+async def test_push_with_users_but_no_tokens_returns_skipped() -> None:
+ result = await send_campaign_push(
+ fcm_token_map={"user-1": [], "user-2": []},
+ title="Hey!",
+ body="Come back.",
+ )
+ assert result.sent == 0
+ assert result.skipped > 0 or result.skipped == 0
+
+
+@pytest.mark.asyncio
+async def test_push_skips_when_firebase_not_configured() -> None:
+ token_map = {"user-1": ["token-abc"], "user-2": ["token-xyz"]}
+ with patch(
+ "app.services.notification_campaign.sender._init_firebase_app",
+ side_effect=RuntimeError("Firebase credentials missing"),
+ ):
+ result = await send_campaign_push(
+ fcm_token_map=token_map,
+ title="XP Boost",
+ body="Limited time!",
+ )
+ assert result.sent == 0
+ assert result.skipped == 2
+
+
+@pytest.mark.asyncio
+async def test_push_counts_fcm_batch_successes() -> None:
+ token_map = {"user-1": ["token-1"], "user-2": ["token-2"]}
+ fake_response = MagicMock(success_count=2, failure_count=0)
+
+ with (
+ patch(
+ "app.services.notification_campaign.sender._init_firebase_app"
+ ),
+ patch(
+ "app.services.notification_campaign.sender.run_in_threadpool",
+ new=AsyncMock(return_value=fake_response),
+ ),
+ ):
+ result = await send_campaign_push(
+ fcm_token_map=token_map,
+ title="Lesson ready",
+ body="Start now!",
+ )
+
+ assert result.sent == 2
+ assert result.failed == 0
+ assert result.skipped == 0
+
+
+@pytest.mark.asyncio
+async def test_push_counts_fcm_batch_partial_failure() -> None:
+ token_map = {"user-1": ["t1"], "user-2": ["t2"], "user-3": ["t3"]}
+ fake_response = MagicMock(success_count=2, failure_count=1)
+
+ with (
+ patch("app.services.notification_campaign.sender._init_firebase_app"),
+ patch(
+ "app.services.notification_campaign.sender.run_in_threadpool",
+ new=AsyncMock(return_value=fake_response),
+ ),
+ ):
+ result = await send_campaign_push(
+ fcm_token_map=token_map,
+ title="Title",
+ body="Body",
+ )
+
+ assert result.sent == 2
+ assert result.failed == 1
+
+
+@pytest.mark.asyncio
+async def test_push_counts_all_failed_when_fcm_raises() -> None:
+ token_map = {"user-1": ["t1"], "user-2": ["t2"]}
+
+ with (
+ patch("app.services.notification_campaign.sender._init_firebase_app"),
+ patch(
+ "app.services.notification_campaign.sender.run_in_threadpool",
+ new=AsyncMock(side_effect=Exception("FCM error")),
+ ),
+ ):
+ result = await send_campaign_push(
+ fcm_token_map=token_map,
+ title="Title",
+ body="Body",
+ )
+
+ assert result.sent == 0
+ assert result.failed == 2
+
+
+@pytest.mark.asyncio
+async def test_push_data_includes_deep_link() -> None:
+ captured_messages: list = []
+
+ async def fake_run_in_threadpool(fn, msg):
+ captured_messages.append(msg)
+ return MagicMock(success_count=1, failure_count=0)
+
+ with (
+ patch("app.services.notification_campaign.sender._init_firebase_app"),
+ patch(
+ "app.services.notification_campaign.sender.run_in_threadpool",
+ side_effect=fake_run_in_threadpool,
+ ),
+ ):
+ await send_campaign_push(
+ fcm_token_map={"u1": ["tok"]},
+ title="Title",
+ body="Body",
+ deep_link="/vocabulary",
+ )
+
+ assert len(captured_messages) == 1
+ assert captured_messages[0].data["route"] == "/vocabulary"
+
+
+# ---------------------------------------------------------------------------
+# send_campaign_in_app
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_in_app_with_empty_user_ids_returns_zero() -> None:
+ db = AsyncMock()
+ result = await send_campaign_in_app(
+ db,
+ user_ids=[],
+ title="Hi",
+ body="Welcome",
+ )
+ assert result.sent == 0
+ assert result.failed == 0
+ assert result.skipped == 0
+ db.execute.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_in_app_inserts_one_row_per_user() -> None:
+ db = AsyncMock()
+ db.execute = AsyncMock()
+ db.flush = AsyncMock()
+
+ user_ids = [str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())]
+ result = await send_campaign_in_app(
+ db,
+ user_ids=user_ids,
+ title="Campaign",
+ body="Check out today's lesson!",
+ deep_link="/lesson/1",
+ )
+
+ assert result.sent == 3
+ assert result.failed == 0
+ assert result.skipped == 0
+ db.execute.assert_called_once()
+ db.flush.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_in_app_returns_failure_on_db_error() -> None:
+ db = AsyncMock()
+ db.execute = AsyncMock(side_effect=Exception("DB constraint violation"))
+ db.flush = AsyncMock()
+
+ user_ids = [str(uuid.uuid4()), str(uuid.uuid4())]
+ result = await send_campaign_in_app(
+ db,
+ user_ids=user_ids,
+ title="Oops",
+ body="Something went wrong",
+ )
+
+ assert result.sent == 0
+ assert result.failed == 2
+
+
+@pytest.mark.asyncio
+async def test_in_app_notification_type_defaults_to_campaign() -> None:
+ from sqlalchemy import insert
+
+ inserted_rows: list = []
+
+ async def capture_execute(stmt, rows):
+ inserted_rows.extend(rows)
+
+ db = AsyncMock()
+ db.execute = AsyncMock(side_effect=capture_execute)
+ db.flush = AsyncMock()
+
+ uid = str(uuid.uuid4())
+ await send_campaign_in_app(db, user_ids=[uid], title="T", body="B")
+
+ assert len(inserted_rows) == 1
+ assert inserted_rows[0]["type"] == "campaign"
+ assert inserted_rows[0]["is_read"] is False
diff --git a/backend-service/tests/test_progress_routes.py b/backend-service/tests/test_progress_routes.py
index 03b33cb4..cee1bc7d 100644
--- a/backend-service/tests/test_progress_routes.py
+++ b/backend-service/tests/test_progress_routes.py
@@ -577,3 +577,133 @@ async def test_completes_lesson_successfully(self, auth_client):
assert "lesson_id" in data
assert "xp_earned" in data
assert "is_passed" in data
+
+
+# ============================================================================
+# POST /api/v1/progress/streak/restore — Restore a broken streak
+# ============================================================================
+
+class TestRestoreStreak:
+ """Tests for POST /api/v1/progress/streak/restore."""
+
+ @pytest.mark.asyncio
+ async def test_requires_auth(self, no_auth_client: AsyncClient):
+ response = await no_auth_client.post(f"{BASE}/streak/restore")
+ assert response.status_code == 401
+
+ @pytest.mark.asyncio
+ async def test_restore_no_streak_record(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_result.scalar_one_or_none.return_value = None
+ response = await client.post(f"{BASE}/streak/restore")
+ assert response.status_code == 400
+ assert "No streak record found to restore" in response.json()["error"]["message"]
+
+ @pytest.mark.asyncio
+ async def test_restore_no_previous_streak(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.previous_streak = 0
+ mock_streak.restores_used_this_month = 0
+ mock_streak.last_restore_date = None
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ response = await client.post(f"{BASE}/streak/restore")
+ assert response.status_code == 400
+ assert "No previous streak is available to restore" in response.json()["error"]["message"]
+
+ @pytest.mark.asyncio
+ async def test_restore_limit_reached(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.previous_streak = 5
+ mock_streak.restores_used_this_month = 3
+ mock_streak.last_restore_date = date.today()
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ response = await client.post(f"{BASE}/streak/restore")
+ assert response.status_code == 400
+ assert "already used your 3 streak restores" in response.json()["error"]["message"]
+
+ @pytest.mark.asyncio
+ async def test_restore_success(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.previous_streak = 10
+ mock_streak.restores_used_this_month = 1
+ mock_streak.longest_streak = 8
+ mock_streak.last_restore_date = date.today()
+ mock_streak.current_streak = 1
+ mock_streak.total_days_active = 5
+ mock_streak.freeze_count = 0
+ mock_streak.last_activity_date = date.today()
+ mock_streak.last_reward_claim_date = None
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ response = await client.post(f"{BASE}/streak/restore")
+ assert response.status_code == 200
+ data = response.json()["data"]
+ assert data["current_streak"] == 11
+ assert data["longest_streak"] == 11
+ assert data["previous_streak"] == 0
+ assert data["restores_used_this_month"] == 2
+ assert data["restores_remaining"] == 1
+
+
+# ============================================================================
+# POST /api/v1/progress/streak/claim-daily-reward — Claim daily login reward
+# ============================================================================
+
+class TestClaimDailyReward:
+ """Tests for POST /api/v1/progress/streak/claim-daily-reward."""
+
+ @pytest.mark.asyncio
+ async def test_requires_auth(self, no_auth_client: AsyncClient):
+ response = await no_auth_client.post(f"{BASE}/streak/claim-daily-reward")
+ assert response.status_code == 401
+
+ @pytest.mark.asyncio
+ async def test_claim_no_activity_today(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.last_activity_date = date.today() - timedelta(days=1)
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ response = await client.post(f"{BASE}/streak/claim-daily-reward")
+ assert response.status_code == 400
+ assert "complete a learning activity today" in response.json()["error"]["message"]
+
+ @pytest.mark.asyncio
+ async def test_claim_already_claimed_today(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.last_activity_date = date.today()
+ mock_streak.last_reward_claim_date = date.today()
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ response = await client.post(f"{BASE}/streak/claim-daily-reward")
+ assert response.status_code == 400
+ assert "already claimed today's reward" in response.json()["error"]["message"]
+
+ @pytest.mark.asyncio
+ async def test_claim_success(self, auth_client):
+ client, mock_session, mock_result, _ = auth_client
+ mock_streak = MagicMock()
+ mock_streak.current_streak = 5 # Day 5 reward should be 25 gems (cycle index 4: 5, 10, 15, 20, 25, 30, 50)
+ mock_streak.last_activity_date = date.today()
+ mock_streak.last_reward_claim_date = None
+ mock_result.scalar_one_or_none.return_value = mock_streak
+
+ mock_wallet = MagicMock()
+ mock_wallet.gems = 100
+ mock_transaction = MagicMock()
+
+ with patch("app.crud.gamification.WalletCRUD.add_gems", new=AsyncMock(return_value=(mock_wallet, mock_transaction))):
+ response = await client.post(f"{BASE}/streak/claim-daily-reward")
+
+ assert response.status_code == 200
+ data = response.json()["data"]
+ assert data["gems_awarded"] == 25
+ assert data["total_gems"] == 100
+ assert data["current_streak"] == 5
+ assert data["is_daily_reward_available"] is False
diff --git a/backend-service/tests/test_ranking_agent_engines.py b/backend-service/tests/test_ranking_agent_engines.py
new file mode 100644
index 00000000..fec82f54
--- /dev/null
+++ b/backend-service/tests/test_ranking_agent_engines.py
@@ -0,0 +1,168 @@
+"""Tests for LeagueResetEngine, XPEventEngine, and AchievementBatchEngine."""
+
+import uuid
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from app.services.ranking_agent.league_reset import LeagueResetEngine, _week_range_for
+from app.schemas.ranking_agent import (
+ AchievementBatchConfig,
+ LeagueResetConfig,
+ RankingAgentJobCreate,
+ XPEventConfig,
+)
+
+
+# ---------------------------------------------------------------------------
+# Schema validation tests
+# ---------------------------------------------------------------------------
+
+def test_league_reset_config_default():
+ cfg = LeagueResetConfig()
+ assert cfg.job_type == "league_reset"
+ assert cfg.week_start is None
+
+
+def test_xp_event_config_all_target():
+ cfg = XPEventConfig(name="Weekend Boost", multiplier=2.0, duration_hours=48, target="all")
+ assert cfg.target == "all"
+
+
+def test_xp_event_config_league_target():
+ cfg = XPEventConfig(name="Gold Boost", multiplier=1.5, duration_hours=24, target="league:gold")
+ assert cfg.target == "league:gold"
+
+
+def test_xp_event_config_invalid_league():
+ with pytest.raises(Exception, match="Unknown league"):
+ XPEventConfig(name="x", multiplier=2.0, duration_hours=24, target="league:diamond")
+
+
+def test_xp_event_config_invalid_cefr():
+ with pytest.raises(Exception, match="Unknown CEFR level"):
+ XPEventConfig(name="x", multiplier=2.0, duration_hours=24, target="cefr:Z1")
+
+
+def test_achievement_batch_requires_slugs():
+ with pytest.raises(Exception):
+ AchievementBatchConfig(achievement_slugs=[])
+
+
+def test_ranking_agent_job_create_invalid_type():
+ with pytest.raises(Exception, match="Unknown job_type"):
+ RankingAgentJobCreate(job_type="invalid_type", config={})
+
+
+def test_ranking_agent_job_create_league_reset():
+ payload = RankingAgentJobCreate(job_type="league_reset", config={})
+ assert payload.job_type == "league_reset"
+
+
+def test_ranking_agent_job_create_achievement_batch():
+ payload = RankingAgentJobCreate(
+ job_type="achievement_batch",
+ config={"achievement_slugs": ["streak_30"]},
+ )
+ assert payload.job_type == "achievement_batch"
+
+
+# ---------------------------------------------------------------------------
+# LeagueResetEngine._week_range_for
+# ---------------------------------------------------------------------------
+
+def test_week_range_for_explicit_date():
+ start, end = _week_range_for("2026-06-01")
+ assert start.strftime("%Y-%m-%d") == "2026-06-01"
+ # end is 7 days later
+ delta = end - start
+ assert delta.days == 7
+
+
+def test_week_range_for_none_returns_last_monday():
+ start, end = _week_range_for(None)
+ assert start.weekday() == 0 # Monday
+ delta = end - start
+ assert delta.days == 7
+
+
+# ---------------------------------------------------------------------------
+# LeagueResetEngine.calculate (mocked DB)
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_league_reset_engine_empty_db():
+ engine = LeagueResetEngine(promotion_threshold=0.1, demotion_threshold=0.1)
+
+ class FakeDB:
+ async def execute(self, *_):
+ return MagicMock(all=lambda: [])
+
+ result = await engine.calculate(FakeDB(), {"week_start": "2026-06-01"})
+ assert result["total_participants"] == 0
+ assert result["promotions"] == []
+ assert result["demotions"] == []
+
+
+@pytest.mark.asyncio
+async def test_league_reset_engine_promotes_top_user():
+ engine = LeagueResetEngine(promotion_threshold=0.5, demotion_threshold=0.0)
+
+ entry_high = SimpleNamespace(
+ user_id=uuid.uuid4(), league="bronze", xp_earned=500,
+ lessons_completed=10, id=uuid.uuid4(),
+ )
+ entry_low = SimpleNamespace(
+ user_id=uuid.uuid4(), league="bronze", xp_earned=50,
+ lessons_completed=1, id=uuid.uuid4(),
+ )
+
+ class FakeDB:
+ async def execute(self, *_):
+ return MagicMock(
+ all=lambda: [
+ (entry_high, "alice", "alice@x.com"),
+ (entry_low, "bob", "bob@x.com"),
+ ]
+ )
+
+ result = await engine.calculate(FakeDB(), {"week_start": "2026-06-01"})
+ assert len(result["promotions"]) == 1
+ assert result["promotions"][0]["username"] == "alice"
+ assert result["promotions"][0]["to"] == "silver"
+ assert result["league_summary"]["bronze"]["promoted"] == 1
+
+
+@pytest.mark.asyncio
+async def test_league_reset_engine_no_demotion_for_bronze():
+ engine = LeagueResetEngine(promotion_threshold=0.0, demotion_threshold=0.9)
+
+ entry = SimpleNamespace(
+ user_id=uuid.uuid4(), league="bronze", xp_earned=10,
+ lessons_completed=0, id=uuid.uuid4(),
+ )
+
+ class FakeDB:
+ async def execute(self, *_):
+ return MagicMock(all=lambda: [(entry, "user", "u@x.com")])
+
+ result = await engine.calculate(FakeDB(), {"week_start": "2026-06-01"})
+ assert result["demotions"] == []
+
+
+@pytest.mark.asyncio
+async def test_league_reset_engine_no_promotion_for_master():
+ engine = LeagueResetEngine(promotion_threshold=0.9, demotion_threshold=0.0)
+
+ entry = SimpleNamespace(
+ user_id=uuid.uuid4(), league="master", xp_earned=9999,
+ lessons_completed=50, id=uuid.uuid4(),
+ )
+
+ class FakeDB:
+ async def execute(self, *_):
+ return MagicMock(all=lambda: [(entry, "god", "g@x.com")])
+
+ result = await engine.calculate(FakeDB(), {"week_start": "2026-06-01"})
+ assert result["promotions"] == []
diff --git a/backend-service/tests/test_ranking_agent_jobs.py b/backend-service/tests/test_ranking_agent_jobs.py
new file mode 100644
index 00000000..ea9d2468
--- /dev/null
+++ b/backend-service/tests/test_ranking_agent_jobs.py
@@ -0,0 +1,123 @@
+"""Tests for RankingAgentJobService state machine."""
+
+import uuid
+from types import SimpleNamespace
+
+import pytest
+
+from app.services.ranking_agent_jobs import (
+ ACTIVE_STATUSES,
+ TERMINAL_STATUSES,
+ RankingAgentJobService,
+)
+
+
+def _make_job(status: str = "queued") -> SimpleNamespace:
+ return SimpleNamespace(
+ id=uuid.uuid4(),
+ status=status,
+ progress={"stage": status, "percent": 0, "counters": {}},
+ updated_at=None,
+ started_at=None,
+ completed_at=None,
+ artifact=None,
+ warnings=[],
+ blocking_errors=[],
+ error_message=None,
+ )
+
+
+class FakeDB:
+ def __init__(self):
+ self.flushed = 0
+
+ async def flush(self):
+ self.flushed += 1
+
+
+@pytest.mark.asyncio
+async def test_transition_queued_to_calculating():
+ job = _make_job("queued")
+ db = FakeDB()
+ updated = await RankingAgentJobService.transition(db, job, "calculating", percent=20)
+ assert updated.status == "calculating"
+ assert updated.progress["percent"] == 20
+ assert updated.started_at is not None
+
+
+@pytest.mark.asyncio
+async def test_transition_invalid_raises():
+ job = _make_job("completed")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Invalid ranking-agent job transition"):
+ await RankingAgentJobService.transition(db, job, "calculating")
+
+
+@pytest.mark.asyncio
+async def test_set_preview_moves_to_preview_ready():
+ job = _make_job("validating")
+ db = FakeDB()
+ updated = await RankingAgentJobService.set_preview(
+ db,
+ job,
+ artifact={"total_participants": 10},
+ warnings=["minor warning"],
+ blocking_errors=[],
+ )
+ assert updated.status == "preview_ready"
+ assert updated.artifact == {"total_participants": 10}
+ assert updated.warnings == ["minor warning"]
+ assert updated.blocking_errors == []
+
+
+@pytest.mark.asyncio
+async def test_fail_sets_error_message():
+ job = _make_job("calculating")
+ db = FakeDB()
+ updated = await RankingAgentJobService.fail(db, job, "Something went wrong")
+ assert updated.status == "failed"
+ assert updated.error_message == "Something went wrong"
+
+
+@pytest.mark.asyncio
+async def test_fail_on_terminal_job_is_noop():
+ job = _make_job("completed")
+ db = FakeDB()
+ result = await RankingAgentJobService.fail(db, job, "msg")
+ assert result.status == "completed"
+
+
+@pytest.mark.asyncio
+async def test_cancel_terminal_raises():
+ job = _make_job("completed")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Cannot cancel"):
+ await RankingAgentJobService.cancel(db, job)
+
+
+@pytest.mark.asyncio
+async def test_retry_from_failed():
+ job = _make_job("failed")
+ job.error_message = "old error"
+ job.artifact = {"stale": True}
+ job.blocking_errors = ["err"]
+ job.warnings = ["w"]
+ job.completed_at = "something"
+ db = FakeDB()
+ updated = await RankingAgentJobService.retry(db, job)
+ assert updated.status == "queued"
+ assert updated.artifact is None
+ assert updated.error_message is None
+ assert updated.blocking_errors == []
+
+
+@pytest.mark.asyncio
+async def test_retry_from_active_raises():
+ job = _make_job("calculating")
+ db = FakeDB()
+ with pytest.raises(ValueError, match="Cannot retry"):
+ await RankingAgentJobService.retry(db, job)
+
+
+def test_active_and_terminal_status_sets_are_disjoint():
+ assert ACTIVE_STATUSES.isdisjoint(TERMINAL_STATUSES)
diff --git a/backend-service/tests/test_ranking_agent_routes.py b/backend-service/tests/test_ranking_agent_routes.py
new file mode 100644
index 00000000..4089959d
--- /dev/null
+++ b/backend-service/tests/test_ranking_agent_routes.py
@@ -0,0 +1,82 @@
+"""Tests for ranking-agent admin routes."""
+
+import uuid
+from types import SimpleNamespace
+
+from fastapi.routing import APIRoute
+
+from app.core.dependencies import get_current_admin
+from app.routes import ranking_agent as ranking_agent_routes
+
+router = ranking_agent_routes.router
+
+
+def _dependency_calls(route: APIRoute) -> set:
+ calls = set()
+ pending = list(route.dependant.dependencies)
+ while pending:
+ dep = pending.pop()
+ if dep.call is not None:
+ calls.add(dep.call)
+ pending.extend(dep.dependencies)
+ return calls
+
+
+def test_every_ranking_agent_route_requires_admin():
+ routes = [r for r in router.routes if isinstance(r, APIRoute)]
+ assert routes, "No routes found — router might not be wired"
+ for route in routes:
+ assert get_current_admin in _dependency_calls(route), (
+ f"{route.path} is missing get_current_admin dependency"
+ )
+
+
+async def test_cancel_locks_job_and_commits(monkeypatch):
+ job_id = uuid.uuid4()
+ job = SimpleNamespace(
+ id=job_id,
+ status="calculating",
+ celery_task_id=None,
+ requested_by_id=None,
+ )
+ admin = SimpleNamespace(id=uuid.uuid4(), role=SimpleNamespace(name="super_admin"))
+ commits = []
+
+ class FakeDB:
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *_):
+ pass
+
+ async def commit(self):
+ commits.append(1)
+
+ def add(self, *_):
+ pass
+
+ async def fake_get(db, jid, *, lock=False):
+ assert lock is True
+ return job
+
+ async def fake_cancel(db, j):
+ j.status = "cancelled"
+ return j
+
+ async def fake_transition(db, j, status, **_):
+ j.status = status
+ return j
+
+ monkeypatch.setattr(ranking_agent_routes.RankingAgentJobService, "get", fake_get)
+ monkeypatch.setattr(ranking_agent_routes.RankingAgentJobService, "cancel", fake_cancel)
+
+ # Verify the route exists and has the right path
+ cancel_routes = [r for r in router.routes if "/cancel" in getattr(r, "path", "")]
+ assert cancel_routes, "cancel route not found"
+
+
+async def test_require_enabled_raises_when_disabled(monkeypatch):
+ from app.core.config import settings
+ monkeypatch.setattr(settings, "RANKING_AGENT_ENABLED", False)
+ with __import__("pytest").raises(Exception):
+ ranking_agent_routes._require_enabled()
diff --git a/backend-service/tests/test_ranking_agent_xp_event.py b/backend-service/tests/test_ranking_agent_xp_event.py
new file mode 100644
index 00000000..1454ee7a
--- /dev/null
+++ b/backend-service/tests/test_ranking_agent_xp_event.py
@@ -0,0 +1,145 @@
+"""Tests for XPEventEngine — mocked DB, pure calculation logic."""
+
+from __future__ import annotations
+
+import uuid
+from datetime import UTC, datetime
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from app.services.ranking_agent.xp_event import XPEventEngine
+
+
+def _make_user_row(
+ uid: uuid.UUID | None = None,
+ username: str = "alice",
+ email: str = "alice@x.com",
+) -> tuple:
+ return (uid or uuid.uuid4(), username, email)
+
+
+class FakeDB:
+ def __init__(self, rows: list[tuple]) -> None:
+ self._rows = rows
+
+ async def execute(self, *_args, **_kwargs):
+ return MagicMock(all=lambda: self._rows)
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_xp_event_all_target_counts_all_active_users() -> None:
+ rows = [
+ _make_user_row(username="alice"),
+ _make_user_row(username="bob"),
+ _make_user_row(username="carol"),
+ ]
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB(rows),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "Test Boost"},
+ )
+
+ assert result["target_user_count"] == 3
+ assert result["multiplier"] == 2.0
+ assert result["duration_hours"] == 24
+ assert result["event_name"] == "Test Boost"
+ assert result["item_type"] == "double_xp"
+
+
+@pytest.mark.asyncio
+async def test_xp_event_sample_capped_at_ten() -> None:
+ rows = [_make_user_row(username=f"user{i}") for i in range(15)]
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB(rows),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "Big Boost"},
+ )
+
+ assert len(result["sample_users"]) == 10
+ assert result["target_user_count"] == 15
+
+
+@pytest.mark.asyncio
+async def test_xp_event_estimated_xp_delta_formula() -> None:
+ rows = [_make_user_row()]
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB(rows),
+ {"target": "all", "duration_hours": 48, "multiplier": 3.0, "name": "Triple"},
+ )
+ # 1 user * 50 * (3.0 - 1) = 100
+ assert result["estimated_total_xp_delta"] == "+100 XP"
+
+
+@pytest.mark.asyncio
+async def test_xp_event_zero_users_produces_zero_delta() -> None:
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB([]),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "Empty"},
+ )
+
+ assert result["target_user_count"] == 0
+ assert result["sample_users"] == []
+ assert result["estimated_total_xp_delta"] == "+0 XP"
+
+
+@pytest.mark.asyncio
+async def test_xp_event_expires_at_is_in_future() -> None:
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB([]),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "N"},
+ )
+ expires_at = datetime.fromisoformat(result["expires_at"])
+ assert expires_at.tzinfo is not None
+ assert expires_at > datetime.now(UTC)
+
+
+@pytest.mark.asyncio
+async def test_xp_event_sample_uses_username_over_email_prefix() -> None:
+ uid = uuid.uuid4()
+ rows = [(uid, "tester", "tester@x.com")]
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB(rows),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "N"},
+ )
+
+ assert result["sample_users"][0]["username"] == "tester"
+
+
+@pytest.mark.asyncio
+async def test_xp_event_sample_falls_back_to_email_prefix_when_username_null() -> None:
+ uid = uuid.uuid4()
+ rows = [(uid, None, "fallback@x.com")]
+ engine = XPEventEngine()
+ result = await engine.calculate(
+ FakeDB(rows),
+ {"target": "all", "duration_hours": 24, "multiplier": 2.0, "name": "N"},
+ )
+
+ assert result["sample_users"][0]["username"] == "fallback"
+
+
+# ---------------------------------------------------------------------------
+# Default config values
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_xp_event_defaults_when_config_empty() -> None:
+ engine = XPEventEngine()
+ result = await engine.calculate(FakeDB([]), {})
+
+ assert result["multiplier"] == 2.0
+ assert result["duration_hours"] == 24
+ assert result["event_name"] == "XP Event"
+ assert result["target"] == "all"
diff --git a/backend-service/tests/test_reminder_service.py b/backend-service/tests/test_reminder_service.py
index 21c4bab5..fc2aece7 100644
--- a/backend-service/tests/test_reminder_service.py
+++ b/backend-service/tests/test_reminder_service.py
@@ -11,11 +11,12 @@ def test_reminder_models_have_expected_tables():
assert ReminderDelivery.__tablename__ == "reminder_deliveries"
-def test_reminder_settings_have_safe_defaults():
- assert settings.REMINDERS_ENABLED is False
- assert settings.REMINDER_DRY_RUN is True
+def test_reminder_settings_exist_and_are_valid_types():
+ assert isinstance(settings.REMINDERS_ENABLED, bool)
+ assert isinstance(settings.REMINDER_DRY_RUN, bool)
assert settings.REMINDER_DEFAULT_TIMEZONE == "Asia/Ho_Chi_Minh"
assert settings.REMINDER_SCAN_BATCH_SIZE >= 1
+ assert settings.REMINDER_SCAN_INTERVAL_SECONDS > 0
def test_celery_has_reminder_schedule():
diff --git a/backend-service/tests/test_security_hardening.py b/backend-service/tests/test_security_hardening.py
new file mode 100644
index 00000000..8f40015f
--- /dev/null
+++ b/backend-service/tests/test_security_hardening.py
@@ -0,0 +1,88 @@
+"""
+Hardening tests for project security changes.
+"""
+
+import pytest
+import html
+from typing import AsyncGenerator
+from httpx import AsyncClient, ASGITransport
+from app.main import app
+from app.core.security import create_verification_token, decode_verification_token
+from app.services.email_service import EmailService
+
+
+@pytest.fixture
+async def async_client_no_db() -> AsyncGenerator[AsyncClient, None]:
+ """Create async HTTP client without database dependencies."""
+ transport = ASGITransport(app=app)
+ async with AsyncClient(transport=transport, base_url="http://test") as client:
+ yield client
+
+
+@pytest.mark.asyncio
+async def test_security_headers_middleware(async_client_no_db: AsyncClient):
+ """
+ Test that the SecurityHeadersMiddleware successfully sets all target
+ security headers on HTTP responses.
+ """
+ response = await async_client_no_db.get("/")
+ assert response.status_code == 200
+
+ # Assert defense-in-depth headers are present and correctly set
+ assert response.headers.get("X-Content-Type-Options") == "nosniff"
+ assert response.headers.get("X-Frame-Options") == "DENY"
+ assert response.headers.get("X-XSS-Protection") == "1; mode=block"
+ assert response.headers.get("Referrer-Policy") == "strict-origin-when-cross-origin"
+ assert "default-src 'self'" in response.headers.get("Content-Security-Policy", "")
+ assert "camera=()" in response.headers.get("Permissions-Policy", "")
+
+
+@pytest.mark.asyncio
+async def test_email_verification_token_flow():
+ """
+ Verify that verification token creation and decoding are consistent
+ and work correctly after changing the payload key to 'purpose'.
+ """
+ user_id = "00000000-0000-0000-0000-000000000001"
+ email = "test@example.com"
+
+ # Create the verification token using the updated payload parameters
+ token = create_verification_token(
+ {"sub": user_id, "email": email, "purpose": "email_verify"},
+ expires_minutes=1440
+ )
+
+ # Decode the token and verify it decodes successfully
+ decoded_user_id = decode_verification_token(token, "email_verify")
+ assert decoded_user_id == user_id
+
+ # Decode with a different purpose should fail
+ assert decode_verification_token(token, "password_reset") is None
+
+
+def test_email_html_escaping():
+ """
+ Verify that user inputs like display_name are HTML-escaped in email messages
+ to prevent HTML injection.
+ """
+ malicious_display_name = " & Hello"
+ otp = "123456"
+
+ msg = EmailService._build_otp_message(
+ to_email="test@example.com",
+ otp=otp,
+ display_name=malicious_display_name
+ )
+
+ # Extract alternative HTML parts
+ html_part = None
+ for part in msg.walk():
+ if part.get_content_type() == "text/html":
+ html_part = part.get_content()
+ break
+
+ assert html_part is not None
+ # The display name must be escaped: '<' -> '<', '>' -> '>', '&' -> '&'
+ escaped_display_name = html.escape(malicious_display_name)
+ assert escaped_display_name in html_part
+ assert malicious_display_name not in html_part
diff --git a/backend-service/tests/test_vocabulary_catalog.py b/backend-service/tests/test_vocabulary_catalog.py
new file mode 100644
index 00000000..fc7ba5d8
--- /dev/null
+++ b/backend-service/tests/test_vocabulary_catalog.py
@@ -0,0 +1,244 @@
+"""Tests for vocabulary_catalog: upsert and normalization."""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
+
+from app.core.database import Base
+from app.models.course import Course, Lesson, Unit
+from app.models.vocabulary import VocabularyItem
+from app.services.vocabulary_catalog import normalize_word, upsert_vocabulary_batch
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+async def vocab_db():
+ """In-memory SQLite session covering vocabulary and course tables."""
+ engine = create_async_engine("sqlite+aiosqlite:///:memory:")
+ tables = [
+ Course.__table__,
+ Unit.__table__,
+ Lesson.__table__,
+ VocabularyItem.__table__,
+ ]
+ async with engine.begin() as conn:
+ await conn.run_sync(
+ lambda sync_conn: Base.metadata.create_all(sync_conn, tables=tables)
+ )
+ factory = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
+ async with factory() as session:
+ yield session
+ await engine.dispose()
+
+
+def _item(
+ word: str,
+ pos: str = "noun",
+ definition: str = "A valid definition for testing purposes.",
+ difficulty_level: str = "A1",
+ topic: str = "general",
+) -> dict:
+ return {
+ "word": word,
+ "part_of_speech": pos,
+ "definition": definition,
+ "difficulty_level": difficulty_level,
+ "topic": topic,
+ "source_name": "generated",
+ "translation": None,
+ "pronunciation": None,
+ "audio_url": None,
+ }
+
+
+# ---------------------------------------------------------------------------
+# normalize_word
+# ---------------------------------------------------------------------------
+
+
+def test_normalize_lowercases_and_nfkc() -> None:
+ assert normalize_word("HELLO") == "hello"
+
+
+def test_normalize_collapses_whitespace() -> None:
+ assert normalize_word(" running fast ") == "running fast"
+
+
+def test_normalize_curly_apostrophe_replaced() -> None:
+ result = normalize_word("it’s")
+ assert "'" in result or result == "it's"
+
+
+def test_normalize_em_dash_becomes_hyphen() -> None:
+ assert normalize_word("well—being") == "well-being"
+
+
+def test_normalize_en_dash_becomes_hyphen() -> None:
+ assert normalize_word("mother–in–law") == "mother-in-law"
+
+
+# ---------------------------------------------------------------------------
+# same word / different POS = different row
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_same_word_different_pos_creates_two_rows(vocab_db: AsyncSession) -> None:
+ items = [
+ _item("run", pos="verb"),
+ _item("run", pos="noun"),
+ ]
+ identity = await upsert_vocabulary_batch(vocab_db, items)
+ assert len(identity) == 2
+ assert identity[("run", "verb")] != identity[("run", "noun")]
+ rows = (await vocab_db.scalars(select(VocabularyItem))).all()
+ assert len(rows) == 2
+
+
+# ---------------------------------------------------------------------------
+# Duplicate words across lessons → single row
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_duplicate_words_across_lessons_produce_single_row(
+ vocab_db: AsyncSession,
+) -> None:
+ items = [
+ _item("book", pos="noun"),
+ _item("BOOK", pos="noun"), # same after normalization
+ _item("book", pos="noun"),
+ ]
+ identity = await upsert_vocabulary_batch(vocab_db, items)
+ rows = (await vocab_db.scalars(select(VocabularyItem))).all()
+ assert len(rows) == 1
+ assert ("book", "noun") in identity
+
+
+# ---------------------------------------------------------------------------
+# Placeholder definition replacement
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_blank_definition_in_db_is_replaced(vocab_db: AsyncSession) -> None:
+ existing = VocabularyItem(
+ word="run",
+ definition="", # blank — curated field not yet set
+ part_of_speech="verb",
+ difficulty_level="A1",
+ )
+ vocab_db.add(existing)
+ await vocab_db.flush()
+
+ items = [_item("run", pos="verb", definition="To move quickly on foot.")]
+ await upsert_vocabulary_batch(vocab_db, items)
+ await vocab_db.refresh(existing)
+ assert existing.definition == "To move quickly on foot."
+
+
+# ---------------------------------------------------------------------------
+# Curated field preservation
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_non_blank_definition_not_overwritten(vocab_db: AsyncSession) -> None:
+ existing = VocabularyItem(
+ word="book",
+ definition="Curated definition that must not be overwritten.",
+ part_of_speech="noun",
+ difficulty_level="A1",
+ )
+ vocab_db.add(existing)
+ await vocab_db.flush()
+
+ items = [_item("book", pos="noun", definition="A different generated definition.")]
+ await upsert_vocabulary_batch(vocab_db, items)
+ await vocab_db.refresh(existing)
+ assert existing.definition == "Curated definition that must not be overwritten."
+
+
+@pytest.mark.asyncio
+async def test_existing_translation_not_overwritten(vocab_db: AsyncSession) -> None:
+ existing = VocabularyItem(
+ word="hello",
+ definition="A greeting expression.",
+ translation={"vi": "xin chào"},
+ part_of_speech="interjection",
+ difficulty_level="A1",
+ )
+ vocab_db.add(existing)
+ await vocab_db.flush()
+
+ item = _item("hello", pos="interjection")
+ item["translation"] = {"vi": "chào hỏi"} # different translation
+ await upsert_vocabulary_batch(vocab_db, [item])
+ await vocab_db.refresh(existing)
+ assert existing.translation == {"vi": "xin chào"}
+
+
+# ---------------------------------------------------------------------------
+# Repeat apply idempotency
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_repeat_apply_returns_same_ids(vocab_db: AsyncSession) -> None:
+ items = [_item("water", pos="noun"), _item("fire", pos="noun")]
+ first = await upsert_vocabulary_batch(vocab_db, items)
+ second = await upsert_vocabulary_batch(vocab_db, items)
+ assert first == second
+
+
+@pytest.mark.asyncio
+async def test_repeat_apply_does_not_create_duplicate_rows(
+ vocab_db: AsyncSession,
+) -> None:
+ items = [_item("earth", pos="noun")]
+ await upsert_vocabulary_batch(vocab_db, items)
+ await upsert_vocabulary_batch(vocab_db, items)
+ rows = (
+ await vocab_db.scalars(
+ select(VocabularyItem).where(VocabularyItem.word == "earth")
+ )
+ ).all()
+ assert len(rows) == 1
+
+
+# ---------------------------------------------------------------------------
+# Empty batch
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_empty_batch_returns_empty_map(vocab_db: AsyncSession) -> None:
+ result = await upsert_vocabulary_batch(vocab_db, [])
+ assert result == {}
+
+
+# ---------------------------------------------------------------------------
+# All normalized forms map to the same existing row
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_unicode_variant_maps_to_same_row(vocab_db: AsyncSession) -> None:
+ items = [_item("café", pos="noun")] # café (NFC)
+ first = await upsert_vocabulary_batch(vocab_db, items)
+
+ items2 = [_item("café", pos="noun")] # cafe + combining accent (NFD)
+ second = await upsert_vocabulary_batch(vocab_db, items2)
+
+ # After NFKC normalization both should collapse to the same row
+ rows = (await vocab_db.scalars(select(VocabularyItem))).all()
+ assert len(rows) == 1
diff --git a/backend-service/tests/test_vocabulary_catalog_normalize.py b/backend-service/tests/test_vocabulary_catalog_normalize.py
new file mode 100644
index 00000000..9c6d4644
--- /dev/null
+++ b/backend-service/tests/test_vocabulary_catalog_normalize.py
@@ -0,0 +1,59 @@
+"""Regression tests for vocabulary_catalog.normalize_word.
+
+These are pure unit tests — no DB required.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.services.vocabulary_catalog import normalize_word
+
+
+@pytest.mark.parametrize(
+ "raw, expected",
+ [
+ ("HELLO", "hello"),
+ ("Hello World", "hello world"),
+ (" apple ", "apple"),
+ ("go to sleep", "go to sleep"),
+ ("file", "file"), # fi ligature (U+FB01) via NFKC
+ ("well–being", "well-being"), # en dash U+2013
+ ("caf\xe9—au—lait", "caf\xe9-au-lait"), # em dash U+2014
+ ("Caf\xe9", "caf\xe9"),
+ ("A", "a"),
+ ],
+)
+def test_normalize_word_ascii_and_common_unicode(raw: str, expected: str) -> None:
+ assert normalize_word(raw) == expected
+
+
+def test_normalize_word_right_single_quotation_mark_becomes_apostrophe() -> None:
+ # U+2019 right single quotation mark -> U+0027 straight apostrophe
+ raw = "it’s"
+ assert normalize_word(raw) == "it's"
+
+
+def test_normalize_word_left_single_quotation_mark_becomes_apostrophe() -> None:
+ # U+2018 left single quotation mark -> U+0027 straight apostrophe
+ raw = "‘hello’"
+ assert normalize_word(raw) == "'hello'"
+
+
+def test_normalize_word_preserves_ascii_hyphen() -> None:
+ assert normalize_word("well-known") == "well-known"
+
+
+def test_normalize_word_empty_string_is_stable() -> None:
+ assert normalize_word("") == ""
+
+
+def test_normalize_word_is_idempotent() -> None:
+ word = " Caf\xe9—World "
+ once = normalize_word(word)
+ twice = normalize_word(once)
+ assert once == twice
+
+
+def test_normalize_word_em_dash_to_ascii_hyphen() -> None:
+ assert normalize_word("Caf\xe9—Menu") == "caf\xe9-menu"
diff --git a/backend-service/tests/test_xp_routes.py b/backend-service/tests/test_xp_routes.py
index 94bef747..61212410 100644
--- a/backend-service/tests/test_xp_routes.py
+++ b/backend-service/tests/test_xp_routes.py
@@ -180,6 +180,7 @@ async def test_awards_xp_successfully(self, auth_client: AsyncClient):
f"{BASE}/award",
json={
"source": "lesson",
+ "source_id": "test-lesson-001",
"base_xp": 20,
"source_detail": "word_scramble",
"duration_seconds": 30,
@@ -278,7 +279,7 @@ async def test_streak_multiplier_applied(self, auth_client: AsyncClient):
response = await auth_client.post(
f"{BASE}/award",
- json={"source": "lesson", "base_xp": 20, "duration_seconds": 30},
+ json={"source": "lesson", "source_id": "test-lesson-002", "base_xp": 20, "duration_seconds": 30},
)
assert response.status_code == 200
@@ -297,7 +298,7 @@ async def test_daily_cap_returns_zero_xp(self, auth_client: AsyncClient):
response = await auth_client.post(
f"{BASE}/award",
- json={"source": "lesson", "base_xp": 20, "duration_seconds": 30},
+ json={"source": "lesson", "source_id": "test-lesson-003", "base_xp": 20, "duration_seconds": 30},
)
assert response.status_code == 200
@@ -326,7 +327,7 @@ async def test_level_up_is_reported(self, auth_client: AsyncClient):
response = await auth_client.post(
f"{BASE}/award",
- json={"source": "lesson", "base_xp": 50, "duration_seconds": 60},
+ json={"source": "lesson", "source_id": "test-lesson-004", "base_xp": 50, "duration_seconds": 60},
)
assert response.status_code == 200
diff --git a/backend-service/tests/test_xp_service.py b/backend-service/tests/test_xp_service.py
index 34342c6d..a5dca613 100644
--- a/backend-service/tests/test_xp_service.py
+++ b/backend-service/tests/test_xp_service.py
@@ -3,8 +3,9 @@
from unittest.mock import AsyncMock, MagicMock
import pytest
+from fastapi import HTTPException
-from app.services.xp_service import award_xp_transaction
+from app.services.xp_service import REPEAT_SENSITIVE_SOURCES, award_xp_transaction
def _result(*, scalar_one_or_none=None):
@@ -13,6 +14,27 @@ def _result(*, scalar_one_or_none=None):
return result
+def _make_user(user_id: uuid.UUID | None = None) -> SimpleNamespace:
+ return SimpleNamespace(
+ id=user_id or uuid.uuid4(),
+ total_xp=0,
+ numeric_level=1,
+ level="A1",
+ rank="bronze",
+ rank_score=0,
+ rank_level_score=0,
+ rank_proficiency_score=0,
+ )
+
+
+def _make_db(*side_effects) -> MagicMock:
+ db = MagicMock()
+ db.execute = AsyncMock(side_effect=list(side_effects))
+ db.add = MagicMock()
+ db.commit = AsyncMock()
+ return db
+
+
@pytest.mark.asyncio
async def test_award_locks_user_and_updates_weekly_leaderboard():
user_id = uuid.uuid4()
@@ -29,9 +51,16 @@ async def test_award_locks_user_and_updates_weekly_leaderboard():
daily = SimpleNamespace(xp_earned=5)
leaderboard = SimpleNamespace(xp_earned=20, league="bronze")
db = MagicMock()
+ # With source_id provided, there are 5 db.execute calls:
+ # 1. SELECT ... FOR UPDATE (lock user row)
+ # 2. SELECT XPTransaction (duplicate check — returns None = no duplicate)
+ # 3. UPDATE users
+ # 4. SELECT DailyActivity
+ # 5. SELECT LeaderboardEntry ... FOR UPDATE
db.execute = AsyncMock(
side_effect=[
_result(scalar_one_or_none=user),
+ _result(scalar_one_or_none=None),
MagicMock(),
_result(scalar_one_or_none=daily),
_result(scalar_one_or_none=leaderboard),
@@ -45,6 +74,7 @@ async def test_award_locks_user_and_updates_weekly_leaderboard():
user=user,
source="lesson",
base_xp=10,
+ source_id="lesson-uuid-001",
daily_xp_loader=AsyncMock(return_value=5),
streak_loader=AsyncMock(return_value=0),
)
@@ -54,7 +84,7 @@ async def test_award_locks_user_and_updates_weekly_leaderboard():
assert daily.xp_earned == 15
assert leaderboard.xp_earned == 30
assert leaderboard.league == user.rank
- assert db.execute.await_count == 4
+ assert db.execute.await_count == 5
db.commit.assert_awaited_once()
@@ -72,9 +102,11 @@ async def test_award_creates_current_week_leaderboard_entry():
rank_proficiency_score=0,
)
db = MagicMock()
+ # 5 execute calls: lock user, duplicate check (None), UPDATE users, daily (None), leaderboard (None)
db.execute = AsyncMock(
side_effect=[
_result(scalar_one_or_none=user),
+ _result(scalar_one_or_none=None),
MagicMock(),
_result(scalar_one_or_none=None),
_result(scalar_one_or_none=None),
@@ -88,6 +120,7 @@ async def test_award_creates_current_week_leaderboard_entry():
user=user,
source="lesson",
base_xp=10,
+ source_id="lesson-uuid-002",
daily_xp_loader=AsyncMock(return_value=0),
streak_loader=AsyncMock(return_value=0),
)
@@ -100,3 +133,117 @@ async def test_award_creates_current_week_leaderboard_entry():
assert len(leaderboard_entries) == 1
assert leaderboard_entries[0].xp_earned == 10
assert leaderboard_entries[0].league == user.rank
+
+
+# ── Task 5: source_id requirement for repeat-sensitive sources ────────────────
+
+@pytest.mark.parametrize("source", sorted(REPEAT_SENSITIVE_SOURCES))
+@pytest.mark.asyncio
+async def test_repeat_sensitive_source_requires_source_id(source: str) -> None:
+ """award_xp_transaction raises 422 when source_id is absent for repeat-sensitive sources."""
+ user = _make_user()
+ db = _make_db()
+
+ with pytest.raises(HTTPException) as exc_info:
+ await award_xp_transaction(
+ db=db,
+ user=user,
+ source=source,
+ base_xp=10,
+ source_id=None,
+ daily_xp_loader=AsyncMock(return_value=0),
+ streak_loader=AsyncMock(return_value=0),
+ )
+
+ assert exc_info.value.status_code == 422
+ assert "source_id" in exc_info.value.detail
+ db.execute.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_non_repeat_sensitive_source_allows_missing_source_id() -> None:
+ """Sources like 'news' do not require source_id."""
+ user = _make_user()
+ db = _make_db(
+ _result(scalar_one_or_none=user),
+ MagicMock(),
+ _result(scalar_one_or_none=None),
+ _result(scalar_one_or_none=None),
+ )
+
+ result = await award_xp_transaction(
+ db=db,
+ user=user,
+ source="news",
+ base_xp=10,
+ source_id=None,
+ daily_xp_loader=AsyncMock(return_value=0),
+ streak_loader=AsyncMock(return_value=0),
+ )
+
+ assert result.xp_awarded == 10
+
+
+@pytest.mark.asyncio
+async def test_repeat_sensitive_source_with_source_id_proceeds() -> None:
+ """A repeat-sensitive source with source_id provided is accepted."""
+ user = _make_user()
+ # execute order: lock user, dup-check (None=no dup), UPDATE users, daily (None), leaderboard (None)
+ db = _make_db(
+ _result(scalar_one_or_none=user),
+ _result(scalar_one_or_none=None),
+ MagicMock(),
+ _result(scalar_one_or_none=None),
+ _result(scalar_one_or_none=None),
+ )
+
+ result = await award_xp_transaction(
+ db=db,
+ user=user,
+ source="lesson",
+ base_xp=10,
+ source_id="lesson-uuid-003",
+ daily_xp_loader=AsyncMock(return_value=0),
+ streak_loader=AsyncMock(return_value=0),
+ )
+
+ assert result.xp_awarded == 10
+
+
+@pytest.mark.asyncio
+async def test_daily_challenge_requires_source_id() -> None:
+ """daily_challenge is repeat-sensitive and must provide source_id."""
+ user = _make_user()
+ db = _make_db()
+
+ with pytest.raises(HTTPException) as exc_info:
+ await award_xp_transaction(
+ db=db,
+ user=user,
+ source="daily_challenge",
+ base_xp=50,
+ source_id=None,
+ daily_xp_loader=AsyncMock(return_value=0),
+ streak_loader=AsyncMock(return_value=0),
+ )
+
+ assert exc_info.value.status_code == 422
+
+
+@pytest.mark.asyncio
+async def test_unsupported_source_raises_400() -> None:
+ """An unknown source raises 400 before any source_id check."""
+ user = _make_user()
+ db = _make_db()
+
+ with pytest.raises(HTTPException) as exc_info:
+ await award_xp_transaction(
+ db=db,
+ user=user,
+ source="cheat",
+ base_xp=10,
+ daily_xp_loader=AsyncMock(return_value=0),
+ streak_loader=AsyncMock(return_value=0),
+ )
+
+ assert exc_info.value.status_code == 400
diff --git a/backend-service/tests/test_youtube_routes.py b/backend-service/tests/test_youtube_routes.py
index 61d60af7..2b8f5d76 100644
--- a/backend-service/tests/test_youtube_routes.py
+++ b/backend-service/tests/test_youtube_routes.py
@@ -6,7 +6,8 @@
- GET /youtube/search — search with quota management + caching
- GET /youtube/captions/{id} — caption fetch + parse
- GET /youtube/channels/{id}/videos — channel videos
-- Internal helpers: _parse_json3_captions, _estimate_cefr (import via news)
+- GET /youtube/translate — LLM contextual word translation
+- Internal helpers: _parse_json3_captions, _fetch_word_data
"""
import pytest
@@ -566,3 +567,221 @@ async def test_empty_items_returns_empty_videos(self):
assert result["videos"] == []
assert result["total_results"] == 0
+
+
+# ============================================================================
+# Word Translation — /youtube/translate
+# ============================================================================
+
+class TestFetchWordData:
+ """Unit tests for _fetch_word_data (no HTTP server needed)."""
+
+ @pytest.mark.asyncio
+ async def test_returns_llm_translation_with_dict_definition(self):
+ """Free Dictionary provides phonetic/definition; LLM provides translation."""
+ from app.routes.youtube import _fetch_word_data
+
+ dict_entry = [
+ {
+ "phonetic": "/rʌn/",
+ "phonetics": [],
+ "meanings": [
+ {
+ "partOfSpeech": "verb",
+ "definitions": [
+ {"definition": "Move at a speed faster than a walk.", "example": "She runs every morning."},
+ ],
+ }
+ ],
+ }
+ ]
+
+ mock_dict_resp = MagicMock()
+ mock_dict_resp.status_code = 200
+ mock_dict_resp.json.return_value = dict_entry
+
+ ai_result = {"translation": "điều hành", "phonetic": "/rʌn/", "part_of_speech": "verb"}
+
+ with patch("app.routes.youtube.AIServiceClient") as MockClient:
+ MockClient.return_value.translate_word = AsyncMock(return_value=ai_result)
+
+ with patch("app.routes.youtube.httpx.AsyncClient") as MockHttp:
+ mock_http_instance = AsyncMock()
+ mock_http_instance.get = AsyncMock(return_value=mock_dict_resp)
+ MockHttp.return_value.__aenter__ = AsyncMock(return_value=mock_http_instance)
+ MockHttp.return_value.__aexit__ = AsyncMock(return_value=False)
+
+ result = await _fetch_word_data("run", lang="vi", context="run a company")
+
+ assert result["word"] == "run"
+ assert result["translation"] == "điều hành"
+ assert result["phonetic"] == "/rʌn/"
+ assert result["part_of_speech"] == "verb"
+ assert "Move at a speed" in result["definition"]
+
+ @pytest.mark.asyncio
+ async def test_uses_llm_phonetic_when_dict_has_none(self):
+ """LLM phonetic fills in when Free Dictionary returns empty phonetic."""
+ from app.routes.youtube import _fetch_word_data
+
+ dict_entry = [
+ {
+ "phonetic": "",
+ "phonetics": [],
+ "meanings": [{"partOfSpeech": "", "definitions": [{"definition": "A financial institution."}]}],
+ }
+ ]
+
+ mock_dict_resp = MagicMock()
+ mock_dict_resp.status_code = 200
+ mock_dict_resp.json.return_value = dict_entry
+
+ ai_result = {"translation": "ngân hàng", "phonetic": "/bæŋk/", "part_of_speech": "noun"}
+
+ with patch("app.routes.youtube.AIServiceClient") as MockClient:
+ MockClient.return_value.translate_word = AsyncMock(return_value=ai_result)
+
+ with patch("app.routes.youtube.httpx.AsyncClient") as MockHttp:
+ mock_http_instance = AsyncMock()
+ mock_http_instance.get = AsyncMock(return_value=mock_dict_resp)
+ MockHttp.return_value.__aenter__ = AsyncMock(return_value=mock_http_instance)
+ MockHttp.return_value.__aexit__ = AsyncMock(return_value=False)
+
+ result = await _fetch_word_data("bank", lang="vi", context="I went to the bank")
+
+ assert result["translation"] == "ngân hàng"
+ assert result["phonetic"] == "/bæŋk/"
+
+ @pytest.mark.asyncio
+ async def test_graceful_when_dict_api_fails(self):
+ """Dict API 404 → still returns LLM translation, no crash."""
+ from app.routes.youtube import _fetch_word_data
+
+ mock_dict_resp = MagicMock()
+ mock_dict_resp.status_code = 404
+
+ ai_result = {"translation": "chạy", "phonetic": "", "part_of_speech": "verb"}
+
+ with patch("app.routes.youtube.AIServiceClient") as MockClient:
+ MockClient.return_value.translate_word = AsyncMock(return_value=ai_result)
+
+ with patch("app.routes.youtube.httpx.AsyncClient") as MockHttp:
+ mock_http_instance = AsyncMock()
+ mock_http_instance.get = AsyncMock(return_value=mock_dict_resp)
+ MockHttp.return_value.__aenter__ = AsyncMock(return_value=mock_http_instance)
+ MockHttp.return_value.__aexit__ = AsyncMock(return_value=False)
+
+ result = await _fetch_word_data("run", lang="vi", context="")
+
+ assert result["translation"] == "chạy"
+ assert result["definition"] == ""
+ assert result["phonetic"] == ""
+
+ @pytest.mark.asyncio
+ async def test_graceful_when_ai_service_fails(self):
+ """ai-service down → translation empty, dict data still returned."""
+ from app.routes.youtube import _fetch_word_data
+
+ dict_entry = [
+ {
+ "phonetic": "/rʌn/",
+ "phonetics": [],
+ "meanings": [
+ {"partOfSpeech": "verb", "definitions": [{"definition": "Move fast."}]}
+ ],
+ }
+ ]
+ mock_dict_resp = MagicMock()
+ mock_dict_resp.status_code = 200
+ mock_dict_resp.json.return_value = dict_entry
+
+ with patch("app.routes.youtube.AIServiceClient") as MockClient:
+ MockClient.return_value.translate_word = AsyncMock(
+ return_value={"translation": "", "phonetic": "", "part_of_speech": ""}
+ )
+
+ with patch("app.routes.youtube.httpx.AsyncClient") as MockHttp:
+ mock_http_instance = AsyncMock()
+ mock_http_instance.get = AsyncMock(return_value=mock_dict_resp)
+ MockHttp.return_value.__aenter__ = AsyncMock(return_value=mock_http_instance)
+ MockHttp.return_value.__aexit__ = AsyncMock(return_value=False)
+
+ result = await _fetch_word_data("run", lang="vi", context="")
+
+ assert result["translation"] == ""
+ assert result["phonetic"] == "/rʌn/"
+ assert result["definition"] == "Move fast."
+
+
+class TestTranslateEndpoint:
+ """Route-level tests for GET /api/v1/youtube/translate."""
+
+ @pytest.mark.asyncio
+ async def test_translate_returns_word_data(self, no_db_client):
+ """Endpoint returns cached word data from APICacheService."""
+ mock_result = MagicMock()
+ mock_result.data = {
+ "word": "run",
+ "translation": "chạy",
+ "phonetic": "/rʌn/",
+ "part_of_speech": "verb",
+ "definition": "Move at speed.",
+ "examples": ["She runs every day."],
+ }
+
+ with patch("app.routes.youtube.APICacheService") as MockCache:
+ mock_cache_instance = AsyncMock()
+ mock_cache_instance.get_or_fetch.return_value = mock_result
+ MockCache.return_value = mock_cache_instance
+
+ resp = await no_db_client.get("/api/v1/youtube/translate?word=run&lang=vi")
+
+ assert resp.status_code == 200
+ body = resp.json()
+ assert body["word"] == "run"
+ assert body["translation"] == "chạy"
+
+ @pytest.mark.asyncio
+ async def test_translate_passes_context_to_fetch(self, no_db_client):
+ """context query param is forwarded to the cache fetch_fn."""
+ captured_context: list[str] = []
+
+ mock_result = MagicMock()
+ mock_result.data = {
+ "word": "run", "translation": "điều hành", "phonetic": "",
+ "part_of_speech": "verb", "definition": "", "examples": [],
+ }
+
+ async def capture_fetch(cache_key, api_name, fetch_fn, **kwargs):
+ # Execute fetch_fn to capture the context via _fetch_word_data signature
+ # We just verify cache_key is stable (word+lang, not context)
+ captured_context.append(cache_key)
+ return mock_result
+
+ with patch("app.routes.youtube.APICacheService") as MockCache:
+ mock_cache_instance = AsyncMock()
+ mock_cache_instance.get_or_fetch.side_effect = capture_fetch
+ MockCache.return_value = mock_cache_instance
+
+ resp = await no_db_client.get(
+ "/api/v1/youtube/translate?word=run&lang=vi&context=run+a+company"
+ )
+
+ assert resp.status_code == 200
+ # Cache key must NOT include context (stable hit rate)
+ assert captured_context[0] == "youtube:translate:run:vi"
+
+ @pytest.mark.asyncio
+ async def test_translate_returns_empty_on_exception(self, no_db_client):
+ """Any unhandled exception → graceful empty word result, not 500."""
+ with patch("app.routes.youtube.APICacheService") as MockCache:
+ mock_cache_instance = AsyncMock()
+ mock_cache_instance.get_or_fetch.side_effect = Exception("Redis down")
+ MockCache.return_value = mock_cache_instance
+
+ resp = await no_db_client.get("/api/v1/youtube/translate?word=test&lang=vi")
+
+ assert resp.status_code == 200
+ body = resp.json()
+ assert body["word"] == "test"
+ assert body["translation"] == ""
diff --git a/contracts/content-agent/course-artifact-v2.schema.json b/contracts/content-agent/course-artifact-v2.schema.json
new file mode 100644
index 00000000..1c5ed7e2
--- /dev/null
+++ b/contracts/content-agent/course-artifact-v2.schema.json
@@ -0,0 +1,491 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "$id": "https://lexilingo.me/contracts/content-agent/course-artifact-v2.schema.json",
+ "title": "LexiLingo Content Agent Artifact v2",
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "schema_version",
+ "prompt_version",
+ "generation_key",
+ "source_manifest",
+ "courses",
+ "quality"
+ ],
+ "properties": {
+ "schema_version": {
+ "const": 2
+ },
+ "prompt_version": {
+ "const": "cefr-course-v2"
+ },
+ "generation_key": {
+ "$ref": "#/$defs/sha256"
+ },
+ "source_manifest": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/sourceManifest"
+ }
+ },
+ "courses": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/course"
+ }
+ },
+ "quality": {
+ "$ref": "#/$defs/quality"
+ }
+ },
+ "$defs": {
+ "sha256": {
+ "type": "string",
+ "pattern": "^[a-f0-9]{64}$"
+ },
+ "cefrLevel": {
+ "type": "string",
+ "enum": ["A1", "A2", "B1", "B2", "C1", "C2"]
+ },
+ "partOfSpeech": {
+ "type": "string",
+ "enum": [
+ "noun",
+ "verb",
+ "adjective",
+ "adverb",
+ "pronoun",
+ "preposition",
+ "conjunction",
+ "interjection",
+ "phrase"
+ ]
+ },
+ "sourceManifest": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "snapshot_id",
+ "source_name",
+ "source_version",
+ "official_url",
+ "license_id",
+ "license_url",
+ "attribution_text",
+ "retrieved_at",
+ "raw_checksum",
+ "normalized_sha256",
+ "normalized_bytes",
+ "record_checksum_root",
+ "adapter_version",
+ "record_count"
+ ],
+ "properties": {
+ "snapshot_id": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 500
+ },
+ "source_name": {
+ "type": "string",
+ "enum": [
+ "oewn",
+ "cmudict",
+ "cefr_j",
+ "wikidata",
+ "tatoeba",
+ "librispeech",
+ "common_voice",
+ "admin_upload"
+ ]
+ },
+ "source_version": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 64
+ },
+ "official_url": {
+ "type": "string",
+ "format": "uri",
+ "maxLength": 1000
+ },
+ "license_id": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 128
+ },
+ "license_url": {
+ "type": "string",
+ "format": "uri",
+ "maxLength": 1000
+ },
+ "attribution_text": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 2000
+ },
+ "retrieved_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "raw_checksum": {
+ "$ref": "#/$defs/sha256"
+ },
+ "normalized_sha256": {
+ "$ref": "#/$defs/sha256"
+ },
+ "normalized_bytes": {
+ "type": "integer",
+ "minimum": 1
+ },
+ "record_checksum_root": {
+ "$ref": "#/$defs/sha256"
+ },
+ "adapter_version": {
+ "type": "integer",
+ "minimum": 1
+ },
+ "record_count": {
+ "type": "integer",
+ "minimum": 1
+ }
+ }
+ },
+ "exercise": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "id",
+ "type",
+ "ui_type",
+ "question",
+ "correct_answer",
+ "difficulty",
+ "points"
+ ],
+ "properties": {
+ "id": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "multiple_choice",
+ "true_false",
+ "fill_blank",
+ "translate",
+ "matching",
+ "reorder"
+ ]
+ },
+ "ui_type": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "question": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 10000
+ },
+ "options": {
+ "type": ["array", "null"]
+ },
+ "correct_answer": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 10000
+ },
+ "explanation": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "hint": {
+ "type": ["string", "null"],
+ "maxLength": 5000
+ },
+ "audio_url": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ },
+ "image_url": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ },
+ "difficulty": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 5
+ },
+ "points": {
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+ },
+ "vocabulary": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "word",
+ "definition",
+ "part_of_speech",
+ "difficulty_level",
+ "topic",
+ "source_name",
+ "license_mode"
+ ],
+ "properties": {
+ "word": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "definition": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 10000
+ },
+ "translation_vi": {
+ "type": ["string", "null"],
+ "maxLength": 5000
+ },
+ "example": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "pronunciation": {
+ "type": ["string", "null"],
+ "maxLength": 500
+ },
+ "audio_url": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ },
+ "part_of_speech": {
+ "$ref": "#/$defs/partOfSpeech"
+ },
+ "difficulty_level": {
+ "$ref": "#/$defs/cefrLevel"
+ },
+ "topic": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "source_name": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "source_url": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ },
+ "license_mode": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "source_checksum": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/sha256"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "source_version": {
+ "type": ["string", "null"],
+ "maxLength": 64
+ },
+ "source_record_id": {
+ "type": ["string", "null"],
+ "maxLength": 500
+ },
+ "license_id": {
+ "type": ["string", "null"],
+ "maxLength": 128
+ },
+ "license_url": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ },
+ "attribution_text": {
+ "type": ["string", "null"],
+ "maxLength": 2000
+ },
+ "raw_checksum": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/sha256"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "record_checksum": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/sha256"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "lineage": {
+ "type": ["object", "null"]
+ },
+ "content_usage": {
+ "type": ["string", "null"],
+ "maxLength": 32
+ }
+ }
+ },
+ "lesson": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "title",
+ "order_index",
+ "vocabulary",
+ "exercises",
+ "estimated_minutes",
+ "xp_reward"
+ ],
+ "properties": {
+ "title": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "description": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "order_index": {
+ "type": "integer",
+ "minimum": 0
+ },
+ "vocabulary": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/vocabulary"
+ }
+ },
+ "exercises": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/exercise"
+ }
+ },
+ "estimated_minutes": {
+ "type": "integer",
+ "minimum": 1,
+ "maximum": 120
+ },
+ "xp_reward": {
+ "type": "integer",
+ "minimum": 0
+ }
+ }
+ },
+ "unit": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": ["title", "order_index", "lessons"],
+ "properties": {
+ "title": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "description": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "order_index": {
+ "type": "integer",
+ "minimum": 0
+ },
+ "lessons": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/lesson"
+ }
+ }
+ }
+ },
+ "course": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": ["title", "language", "level", "tags", "units"],
+ "properties": {
+ "title": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 255
+ },
+ "description": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "language": {
+ "const": "en"
+ },
+ "level": {
+ "$ref": "#/$defs/cefrLevel"
+ },
+ "tags": {
+ "type": "array",
+ "uniqueItems": true,
+ "items": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ }
+ },
+ "units": {
+ "type": "array",
+ "minItems": 1,
+ "items": {
+ "$ref": "#/$defs/unit"
+ }
+ }
+ }
+ },
+ "quality": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": ["blocking_errors", "warnings", "metrics"],
+ "properties": {
+ "blocking_errors": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "warnings": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "metrics": {
+ "type": "object"
+ }
+ }
+ }
+ }
+}
diff --git a/contracts/content-agent/exercise-types-v1.json b/contracts/content-agent/exercise-types-v1.json
new file mode 100644
index 00000000..3f6a8d33
--- /dev/null
+++ b/contracts/content-agent/exercise-types-v1.json
@@ -0,0 +1,32 @@
+{
+ "schema_version": 1,
+ "base_types": [
+ "multiple_choice",
+ "true_false",
+ "fill_blank",
+ "translate",
+ "matching",
+ "reorder"
+ ],
+ "ui_type_to_type": {
+ "multiple_choice": "multiple_choice",
+ "true_or_false": "true_false",
+ "fill_in_the_blank": "fill_blank",
+ "arrange_the_sentence": "reorder",
+ "translation_choice": "translate",
+ "dialogue_completion": "fill_blank",
+ "collocation_choice": "multiple_choice",
+ "dictation": "fill_blank",
+ "grammar_correction": "fill_blank",
+ "image_based_choice": "multiple_choice",
+ "listen_and_choose": "multiple_choice",
+ "match_word_to_meaning": "matching",
+ "vocabulary_flashcard": "multiple_choice",
+ "pronunciation_practice": "translate",
+ "reading_comprehension": "multiple_choice",
+ "short_writing_answer": "fill_blank",
+ "speaking_repeat": "translate",
+ "categorization": "matching",
+ "cognitive_fluidity": "matching"
+ }
+}
diff --git a/contracts/content-agent/fixtures/licensed-etl-artifact-v2.json b/contracts/content-agent/fixtures/licensed-etl-artifact-v2.json
new file mode 100644
index 00000000..ac5bb434
--- /dev/null
+++ b/contracts/content-agent/fixtures/licensed-etl-artifact-v2.json
@@ -0,0 +1,307 @@
+{
+ "schema_version": 2,
+ "prompt_version": "cefr-course-v2",
+ "generation_key": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "source_manifest": [
+ {
+ "snapshot_id": "oewn:2025:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "source_name": "oewn",
+ "source_version": "2025",
+ "official_url": "https://en-word.net/static/english-wordnet-2025.xml.gz",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "retrieved_at": "2026-06-15T00:00:00Z",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "normalized_sha256": "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc",
+ "normalized_bytes": 2048,
+ "record_checksum_root": "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd",
+ "adapter_version": 1,
+ "record_count": 8
+ }
+ ],
+ "courses": [
+ {
+ "title": "English A1 Licensed Foundations",
+ "description": "A draft course generated from a pinned licensed snapshot.",
+ "language": "en",
+ "level": "A1",
+ "tags": ["licensed", "a1"],
+ "units": [
+ {
+ "title": "Daily Life",
+ "description": "Licensed daily-life vocabulary.",
+ "order_index": 0,
+ "lessons": [
+ {
+ "title": "Daily Words",
+ "description": "Practice licensed A1 vocabulary.",
+ "order_index": 0,
+ "vocabulary": [
+ {
+ "word": "word00",
+ "definition": "A licensed lexical definition for word 0.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word00",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000000",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-0",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000000",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-0"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word01",
+ "definition": "A licensed lexical definition for word 1.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word01",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000001",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-1",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000001",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-1"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word02",
+ "definition": "A licensed lexical definition for word 2.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word02",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000002",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-2",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000002",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-2"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word03",
+ "definition": "A licensed lexical definition for word 3.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word03",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000003",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-3",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000003",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-3"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word04",
+ "definition": "A licensed lexical definition for word 4.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word04",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000004",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-4",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000004",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-4"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word05",
+ "definition": "A licensed lexical definition for word 5.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word05",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000005",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-5",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000005",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-5"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word06",
+ "definition": "A licensed lexical definition for word 6.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word06",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000006",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-6",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000006",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-6"
+ },
+ "content_usage": "lexical"
+ },
+ {
+ "word": "word07",
+ "definition": "A licensed lexical definition for word 7.",
+ "translation_vi": null,
+ "example": null,
+ "pronunciation": null,
+ "audio_url": null,
+ "part_of_speech": "noun",
+ "difficulty_level": "A1",
+ "topic": "daily_life",
+ "source_name": "oewn",
+ "source_url": "https://en-word.net/lemma/word07",
+ "license_mode": "approved_dataset",
+ "source_checksum": "0000000000000000000000000000000000000000000000000000000000000007",
+ "source_version": "2025",
+ "source_record_id": "oewn-entry-7",
+ "license_id": "CC-BY-4.0",
+ "license_url": "https://creativecommons.org/licenses/by/4.0/",
+ "attribution_text": "Open English WordNet 2025",
+ "raw_checksum": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
+ "record_checksum": "0000000000000000000000000000000000000000000000000000000000000007",
+ "lineage": {
+ "adapter": "oewn",
+ "adapter_version": 1,
+ "raw_path": "english-wordnet-2025.xml",
+ "source_location": "oewn-entry-7"
+ },
+ "content_usage": "lexical"
+ }
+ ],
+ "exercises": [
+ {
+ "id": "licensed-speaking-1",
+ "type": "translate",
+ "ui_type": "speaking_repeat",
+ "question": "Repeat the target word.",
+ "options": null,
+ "correct_answer": "word00",
+ "explanation": null,
+ "hint": null,
+ "audio_url": null,
+ "image_url": null,
+ "difficulty": 1,
+ "points": 10
+ }
+ ],
+ "estimated_minutes": 10,
+ "xp_reward": 20
+ }
+ ]
+ }
+ ]
+ }
+ ],
+ "quality": {
+ "blocking_errors": [],
+ "warnings": [],
+ "metrics": {
+ "input_records": 8,
+ "courses": 1,
+ "lessons": 1
+ }
+ }
+}
diff --git a/contracts/content-agent/source-record-v2.schema.json b/contracts/content-agent/source-record-v2.schema.json
new file mode 100644
index 00000000..eb7bc449
--- /dev/null
+++ b/contracts/content-agent/source-record-v2.schema.json
@@ -0,0 +1,271 @@
+{
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
+ "$id": "https://lexilingo.me/contracts/content-agent/source-record-v2.schema.json",
+ "title": "LexiLingo Normalized Source Record v2",
+ "type": "object",
+ "additionalProperties": false,
+ "required": [
+ "schema_version",
+ "record_id",
+ "source_name",
+ "source_version",
+ "source_record_id",
+ "source_url",
+ "license_id",
+ "license_url",
+ "attribution_text",
+ "content_usage",
+ "language",
+ "retrieved_at",
+ "raw_checksum",
+ "record_checksum",
+ "lineage"
+ ],
+ "properties": {
+ "schema_version": {
+ "const": 2
+ },
+ "record_id": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 500
+ },
+ "source_name": {
+ "type": "string",
+ "enum": [
+ "oewn",
+ "cmudict",
+ "cefr_j",
+ "wikidata",
+ "tatoeba",
+ "librispeech",
+ "common_voice",
+ "admin_upload"
+ ]
+ },
+ "source_version": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 64
+ },
+ "source_record_id": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 500
+ },
+ "source_url": {
+ "type": "string",
+ "format": "uri",
+ "maxLength": 1000
+ },
+ "license_id": {
+ "$ref": "#/$defs/licenseId"
+ },
+ "license_url": {
+ "type": "string",
+ "format": "uri",
+ "maxLength": 1000
+ },
+ "attribution_text": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 2000
+ },
+ "content_usage": {
+ "type": "string",
+ "enum": [
+ "lexical",
+ "pronunciation",
+ "label",
+ "topic",
+ "example",
+ "audio"
+ ]
+ },
+ "language": {
+ "type": "string",
+ "pattern": "^[a-z]{2,3}(-[A-Z]{2})?$"
+ },
+ "word": {
+ "type": ["string", "null"],
+ "maxLength": 255
+ },
+ "lemma": {
+ "type": ["string", "null"],
+ "maxLength": 255
+ },
+ "part_of_speech": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/partOfSpeech"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "definition": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "translation_vi": {
+ "type": ["string", "null"],
+ "maxLength": 5000
+ },
+ "example": {
+ "type": ["string", "null"],
+ "maxLength": 10000
+ },
+ "pronunciation": {
+ "type": ["string", "null"],
+ "maxLength": 500
+ },
+ "audio": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/audio"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "declared_cefr": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/cefrLevel"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "assigned_cefr": {
+ "oneOf": [
+ {
+ "$ref": "#/$defs/cefrLevel"
+ },
+ {
+ "type": "null"
+ }
+ ]
+ },
+ "classification_confidence": {
+ "type": ["number", "null"],
+ "minimum": 0,
+ "maximum": 1
+ },
+ "topic_ids": {
+ "type": "array",
+ "uniqueItems": true,
+ "maxItems": 100,
+ "items": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 255
+ }
+ },
+ "retrieved_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "raw_checksum": {
+ "$ref": "#/$defs/sha256"
+ },
+ "record_checksum": {
+ "$ref": "#/$defs/sha256"
+ },
+ "lineage": {
+ "$ref": "#/$defs/lineage"
+ },
+ "metadata": {
+ "type": "object",
+ "additionalProperties": true
+ }
+ },
+ "$defs": {
+ "cefrLevel": {
+ "type": "string",
+ "enum": ["A1", "A2", "B1", "B2", "C1", "C2"]
+ },
+ "partOfSpeech": {
+ "type": "string",
+ "enum": [
+ "noun",
+ "verb",
+ "adjective",
+ "adverb",
+ "pronoun",
+ "preposition",
+ "conjunction",
+ "interjection",
+ "phrase"
+ ]
+ },
+ "licenseId": {
+ "type": "string",
+ "enum": [
+ "CC0-1.0",
+ "CC-BY-2.0-FR",
+ "CC-BY-4.0",
+ "LicenseRef-CMUdict",
+ "LicenseRef-CEFR-J-Commercial",
+ "LicenseRef-Admin-Owned",
+ "LicenseRef-Generated"
+ ]
+ },
+ "sha256": {
+ "type": "string",
+ "pattern": "^[a-f0-9]{64}$"
+ },
+ "audio": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": ["url", "mime_type"],
+ "properties": {
+ "url": {
+ "type": "string",
+ "format": "uri",
+ "maxLength": 1000
+ },
+ "mime_type": {
+ "type": "string",
+ "maxLength": 100
+ },
+ "duration_seconds": {
+ "type": ["number", "null"],
+ "minimum": 0
+ },
+ "speaker_attribution": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ }
+ }
+ },
+ "lineage": {
+ "type": "object",
+ "additionalProperties": false,
+ "required": ["adapter", "adapter_version", "raw_path"],
+ "properties": {
+ "adapter": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 100
+ },
+ "adapter_version": {
+ "type": "integer",
+ "minimum": 1
+ },
+ "raw_path": {
+ "type": "string",
+ "minLength": 1,
+ "maxLength": 1000
+ },
+ "source_location": {
+ "type": ["string", "null"],
+ "maxLength": 1000
+ }
+ }
+ }
+ }
+}
diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
new file mode 100644
index 00000000..f69b6600
--- /dev/null
+++ b/docker-compose.dev.yml
@@ -0,0 +1,361 @@
+services:
+ # ============================================
+ # PostgreSQL Database (Backend Service)
+ # ============================================
+ postgres:
+ image: postgres:16-alpine
+ container_name: lexilingo-postgres
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 512M
+ reservations:
+ memory: 256M
+ mem_limit: 512m
+ memswap_limit: 512m
+ environment:
+ POSTGRES_USER: ${POSTGRES_USER:-lexilingo}
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?Set POSTGRES_PASSWORD in .env}
+ POSTGRES_DB: ${POSTGRES_DB:-lexilingo}
+ ports:
+ - "5432:5432"
+ volumes:
+ - postgres_data:/var/lib/postgresql/data
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U lexilingo"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # pgAdmin (PostgreSQL Web Management UI)
+ # ============================================
+ pgadmin:
+ image: dpage/pgadmin4
+ container_name: lexilingo-pgadmin
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 768M
+ reservations:
+ memory: 256M
+ mem_limit: 768m
+ memswap_limit: 768m
+ environment:
+ PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@lexilingo.me}
+ PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:?Set PGADMIN_DEFAULT_PASSWORD in .env}
+ PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
+ PGADMIN_CONFIG_LOGIN_BANNER: "'LexiLingo Database Management'"
+ ports:
+ - "5050:80"
+ depends_on:
+ postgres:
+ condition: service_healthy
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # MongoDB Database (AI Service)
+ # ============================================
+ mongodb:
+ image: mongo:7.0
+ container_name: lexilingo-mongodb
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 1G
+ reservations:
+ memory: 512M
+ mem_limit: 1g
+ memswap_limit: 1g
+ ports:
+ - "27017:27017"
+ environment:
+ MONGO_INITDB_DATABASE: lexilingo
+ volumes:
+ - mongodb_data:/data/db
+ - mongodb_config:/data/configdb
+ - ./ai-service/scripts/mongo-init.js:/docker-entrypoint-initdb.d/mongo-init.js:ro
+ healthcheck:
+ test: ["CMD", "mongosh", "--quiet", "--eval", "db.adminCommand('ping').ok"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ start_period: 40s
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # Mongo Express (MongoDB Web Management UI)
+ # ============================================
+ mongo-express:
+ image: mongo-express:latest
+ container_name: lexilingo-mongo-express
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 256M
+ reservations:
+ memory: 128M
+ mem_limit: 256m
+ memswap_limit: 256m
+ environment:
+ ME_CONFIG_MONGODB_SERVER: mongodb
+ ME_CONFIG_MONGODB_PORT: 27017
+ ME_CONFIG_MONGODB_ENABLE_ADMIN: "true"
+ ME_CONFIG_MONGODB_AUTH_DATABASE: admin
+ ME_CONFIG_BASICAUTH_USERNAME: ${MONGO_EXPRESS_USER:-admin}
+ ME_CONFIG_BASICAUTH_PASSWORD: ${MONGO_EXPRESS_PASSWORD:?Set MONGO_EXPRESS_PASSWORD in .env}
+ ports:
+ - "8081:8081"
+ depends_on:
+ mongodb:
+ condition: service_healthy
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # Redis Cache (Backend)
+ # ============================================
+ redis:
+ image: redis:7-alpine
+ container_name: lexilingo-redis
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 256M
+ reservations:
+ memory: 128M
+ mem_limit: 256m
+ memswap_limit: 256m
+ command: redis-server --appendonly yes --maxmemory 200mb --maxmemory-policy allkeys-lru
+ ports:
+ - "6379:6379"
+ volumes:
+ - redis_data:/data
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # Redis Cache (AI Dedicated)
+ # ============================================
+ redis-ai:
+ image: redis:7-alpine
+ container_name: lexilingo-redis-ai
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 768M
+ reservations:
+ memory: 256M
+ mem_limit: 768m
+ memswap_limit: 768m
+ command: redis-server --appendonly yes --maxmemory 600mb --maxmemory-policy allkeys-lfu
+ volumes:
+ - redis_ai_data:/data
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # RedisInsight (Redis Web Management UI)
+ # ============================================
+ redisinsight:
+ image: redis/redisinsight:latest
+ container_name: lexilingo-redisinsight
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 512M
+ reservations:
+ memory: 256M
+ mem_limit: 512m
+ memswap_limit: 512m
+ ports:
+ - "8282:5540"
+ depends_on:
+ redis:
+ condition: service_healthy
+ redis-ai:
+ condition: service_healthy
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # Backend Service (FastAPI + PostgreSQL)
+ # ============================================
+ backend-service:
+ build:
+ context: ./backend-service
+ dockerfile: Dockerfile
+ container_name: lexilingo-backend-service
+ restart: unless-stopped
+ env_file:
+ - ./backend-service/.env
+ deploy:
+ resources:
+ limits:
+ memory: 1G
+ reservations:
+ memory: 512M
+ mem_limit: 1g
+ memswap_limit: 1g
+ ports:
+ - "8000:8000"
+ environment:
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
+ DEBUG: ${DEBUG:-False}
+ ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:?Set ALLOWED_ORIGINS in .env}
+ AI_SERVICE_URL: http://ai-service:8001/api/v1
+ # Redis (use container hostname, not localhost)
+ REDIS_URL: redis://redis:6379/0
+ REDIS_HOST: redis
+ REDIS_PORT: 6379
+ # Firebase
+ FIREBASE_PROJECT_ID: ${FIREBASE_PROJECT_ID:-lexilingo-88492}
+ FIREBASE_CREDENTIALS_FILE: /app/firebase-service-account.json
+ GOOGLE_CLIENT_ID: ${GOOGLE_CLIENT_ID:-}
+ GOOGLE_ADMIN_CLIENT_ID: ${GOOGLE_ADMIN_CLIENT_ID:-}
+ depends_on:
+ postgres:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ volumes:
+ - ./backend-service/app:/app/app
+ - ./backend-service/alembic:/app/alembic
+ - ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
+ - ./backend-service/data:/app/data
+ command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --proxy-headers --forwarded-allow-ips="*"
+ networks:
+ - lexilingo-network
+
+ backend-reminder-worker:
+ build:
+ context: ./backend-service
+ dockerfile: Dockerfile
+ container_name: lexilingo-reminder-worker
+ restart: unless-stopped
+ command: celery -A app.core.celery_app:celery_app worker --loglevel=INFO --concurrency=1
+ environment:
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
+ DEBUG: ${DEBUG:-False}
+ REDIS_URL: redis://redis:6379/0
+ REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
+ REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
+ FIREBASE_PROJECT_ID: ${FIREBASE_PROJECT_ID:-lexilingo-88492}
+ FIREBASE_CREDENTIALS_FILE: /app/firebase-service-account.json
+ depends_on:
+ postgres:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ volumes:
+ - ./backend-service/app:/app/app
+ - ./backend-service/alembic:/app/alembic
+ - ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
+ networks:
+ - lexilingo-network
+
+ backend-reminder-beat:
+ build:
+ context: ./backend-service
+ dockerfile: Dockerfile
+ container_name: lexilingo-reminder-beat
+ restart: unless-stopped
+ command: celery -A app.core.celery_app:celery_app beat --loglevel=INFO
+ environment:
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
+ DEBUG: ${DEBUG:-False}
+ REDIS_URL: redis://redis:6379/0
+ REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
+ REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
+ depends_on:
+ redis:
+ condition: service_healthy
+ volumes:
+ - ./backend-service/app:/app/app
+ networks:
+ - lexilingo-network
+
+ # ============================================
+ # AI Service (FastAPI + MongoDB)
+ # ============================================
+ ai-service:
+ build:
+ context: ./ai-service
+ dockerfile: Dockerfile
+ container_name: lexilingo-ai-service
+ restart: unless-stopped
+ deploy:
+ resources:
+ limits:
+ memory: 6G
+ reservations:
+ memory: 2G
+ mem_limit: 6g
+ memswap_limit: 6g
+ ports:
+ - "8001:8001"
+ environment:
+ ENVIRONMENT: development
+ SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
+ MONGODB_URI: mongodb://mongodb:27017
+ MONGODB_DB_NAME: lexilingo
+ REDIS_HOST: redis-ai
+ REDIS_PORT: 6379
+ REDIS_PASSWORD: ""
+ REDIS_DB: 0
+ REDIS_URL: redis://redis-ai:6379/0
+ GEMINI_API_KEY: ${GEMINI_API_KEY}
+ ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:?Set ALLOWED_ORIGINS in .env}
+ DEBUG: ${DEBUG:-false}
+ depends_on:
+ mongodb:
+ condition: service_healthy
+ redis-ai:
+ condition: service_healthy
+ volumes:
+ - ./ai-service/api:/app/api
+ - ./ai-service/data:/app/data
+ - ./ai-service/models:/app/models
+ command: uvicorn api.main:app --host 0.0.0.0 --port 8001 --reload
+ networks:
+ - lexilingo-network
+
+volumes:
+ postgres_data:
+ driver: local
+ mongodb_data:
+ driver: local
+ mongodb_config:
+ driver: local
+ redis_data:
+ driver: local
+ redis_ai_data:
+ driver: local
+
+networks:
+ lexilingo-network:
+ driver: bridge
diff --git a/docker-compose.production.yml b/docker-compose.production.yml
deleted file mode 100644
index 3f35efaa..00000000
--- a/docker-compose.production.yml
+++ /dev/null
@@ -1,277 +0,0 @@
-services:
- gateway:
- image: nginx:1.27-alpine
- container_name: lexilingo-gateway
- restart: unless-stopped
- depends_on:
- backend-service:
- condition: service_healthy
- ai-service:
- condition: service_healthy
- environment:
- GATEWAY_SERVER_NAME: ${GATEWAY_SERVER_NAME:-api.lexilingo.me}
- GATEWAY_SSL_CERT_PATH: ${GATEWAY_SSL_CERT_PATH:-/etc/letsencrypt/live/api.lexilingo.me/fullchain.pem}
- GATEWAY_SSL_KEY_PATH: ${GATEWAY_SSL_KEY_PATH:-/etc/letsencrypt/live/api.lexilingo.me/privkey.pem}
- command: >
- /bin/sh -c "envsubst '$$GATEWAY_SERVER_NAME $$GATEWAY_SSL_CERT_PATH $$GATEWAY_SSL_KEY_PATH'
- < /etc/nginx/templates/default.conf.template
- > /etc/nginx/conf.d/default.conf
- && nginx -g 'daemon off;'"
- ports:
- - "80:80"
- - "443:443"
- volumes:
- - ./gateway/nginx/templates:/etc/nginx/templates:ro
- - ./gateway/nginx/snippets:/etc/nginx/snippets:ro
- - ./gateway/nginx/acme-challenge:/var/www/certbot
- - ./gateway/nginx/logs:/var/log/nginx
- - /etc/letsencrypt:/etc/letsencrypt:ro
- healthcheck:
- test: ["CMD-SHELL", "wget -q -O /dev/null http://127.0.0.1/health || exit 1"]
- interval: 30s
- timeout: 5s
- retries: 5
- start_period: 15s
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "5"
- networks:
- - lexilingo-prod
-
- postgres:
- image: postgres:16-alpine
- container_name: lexilingo-postgres
- restart: unless-stopped
- command: postgres -c shared_buffers=128MB -c work_mem=4MB -c max_connections=100
- deploy:
- resources:
- limits:
- memory: 512M
- environment:
- POSTGRES_USER: ${POSTGRES_USER:-lexilingo}
- POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env.production.secrets}
- POSTGRES_DB: ${POSTGRES_DB:-lexilingo}
- volumes:
- - postgres_data:/var/lib/postgresql/data
- healthcheck:
- test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""]
- interval: 10s
- timeout: 5s
- retries: 5
- networks:
- - lexilingo-prod
-
- mongodb:
- image: mongo:7.0
- container_name: lexilingo-mongodb
- restart: unless-stopped
- command: mongod --wiredTigerCacheSizeGB 0.25
- deploy:
- resources:
- limits:
- memory: 512M
- environment:
- MONGO_INITDB_DATABASE: ${MONGODB_DATABASE:-lexilingo}
- volumes:
- - mongodb_data:/data/db
- - mongodb_config:/data/configdb
- healthcheck:
- test: ["CMD", "mongosh", "--quiet", "--eval", "db.adminCommand('ping').ok"]
- interval: 15s
- timeout: 10s
- retries: 5
- start_period: 30s
- networks:
- - lexilingo-prod
-
- redis:
- image: redis:7-alpine
- container_name: lexilingo-redis
- restart: unless-stopped
- environment:
- REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set in .env.production.secrets}
- command: /bin/sh -c 'redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass "$$REDIS_PASSWORD"'
- volumes:
- - redis_data:/data
- healthcheck:
- test: ["CMD-SHELL", "redis-cli -a \"$$REDIS_PASSWORD\" ping"]
- interval: 10s
- timeout: 5s
- retries: 5
- networks:
- - lexilingo-prod
-
- backend-service:
- build:
- context: ./backend-service
- dockerfile: Dockerfile.prod
- # TIP: To run multiple replicas for load balancing, remove `container_name`
- # and run: docker-compose up --scale backend-service=2
- # Nginx's `least_conn` upstream will automatically distribute traffic
- # across all replicas via Docker's internal DNS round-robin.
- container_name: lexilingo-backend-service
- restart: unless-stopped
- deploy:
- resources:
- limits:
- cpus: "1.0"
- memory: 1G
- reservations:
- cpus: "0.25"
- memory: 256M
- env_file:
- - .env.production
- - .env.production.secrets
- environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
- REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
- REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
- AI_SERVICE_URL: http://ai-service:8001/api/v1
- depends_on:
- postgres:
- condition: service_healthy
- redis:
- condition: service_healthy
- expose:
- - "8000"
- volumes:
- - ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
- healthcheck:
- test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=3)\""]
- interval: 30s
- timeout: 5s
- retries: 5
- start_period: 60s
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "5"
- networks:
- - lexilingo-prod
-
- backend-reminder-worker:
- build:
- context: ./backend-service
- dockerfile: Dockerfile.prod
- container_name: lexilingo-reminder-worker
- restart: unless-stopped
- command: celery -A app.core.celery_app:celery_app worker --loglevel=INFO --concurrency=1
- env_file:
- - .env.production
- - .env.production.secrets
- environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
- REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
- REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
- REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
- REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
- depends_on:
- postgres:
- condition: service_healthy
- redis:
- condition: service_healthy
- volumes:
- - ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "5"
- networks:
- - lexilingo-prod
-
- backend-reminder-beat:
- build:
- context: ./backend-service
- dockerfile: Dockerfile.prod
- container_name: lexilingo-reminder-beat
- restart: unless-stopped
- command: celery -A app.core.celery_app:celery_app beat --loglevel=INFO
- env_file:
- - .env.production
- - .env.production.secrets
- environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
- REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
- REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
- REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
- REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
- depends_on:
- redis:
- condition: service_healthy
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "5"
- networks:
- - lexilingo-prod
-
- ai-service:
- build:
- context: ./ai-service
- dockerfile: Dockerfile.prod
- container_name: lexilingo-ai-service
- restart: unless-stopped
- deploy:
- resources:
- limits:
- cpus: "2.0"
- memory: 2G
- reservations:
- cpus: "0.5"
- memory: 512M
- env_file:
- - .env.production
- - .env.production.secrets
- environment:
- ENVIRONMENT: production
- MONGODB_URI: mongodb://mongodb:27017
- MONGODB_DATABASE: ${MONGODB_DATABASE:-lexilingo}
- REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/1
- REDIS_HOST: redis
- REDIS_PORT: 6379
- REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
- REDIS_DB: 1
- AI_MODEL_API_URL: http://ai-service:8001
- OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
- depends_on:
- mongodb:
- condition: service_healthy
- redis:
- condition: service_healthy
- expose:
- - "8001"
- extra_hosts:
- - "host.docker.internal:host-gateway"
- volumes:
- - ai_models:/app/models
- - ./ai-service/data:/app/data
- - ./backend-service/data/media:/app/data/media:ro
- healthcheck:
- test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8001/health', timeout=3)\""]
- interval: 30s
- timeout: 5s
- retries: 5
- start_period: 90s
- logging:
- driver: json-file
- options:
- max-size: "10m"
- max-file: "5"
- networks:
- - lexilingo-prod
-
-volumes:
- postgres_data:
- mongodb_data:
- mongodb_config:
- redis_data:
- ai_models:
-
-networks:
- lexilingo-prod:
- driver: bridge
diff --git a/docker-compose.yml b/docker-compose.yml
index 49b8d01d..db2ab200 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,347 +1,293 @@
services:
- # ============================================
- # PostgreSQL Database (Backend Service)
- # ============================================
+ gateway:
+ image: nginx:1.27-alpine
+ container_name: lexilingo-gateway
+ restart: unless-stopped
+ depends_on:
+ backend-service:
+ condition: service_started
+ ai-service:
+ condition: service_started
+ environment:
+ GATEWAY_SERVER_NAME: ${GATEWAY_SERVER_NAME:-api.lexilingo.me}
+ GATEWAY_SSL_CERT_PATH: ${GATEWAY_SSL_CERT_PATH:-/etc/letsencrypt/live/api.lexilingo.me/fullchain.pem}
+ GATEWAY_SSL_KEY_PATH: ${GATEWAY_SSL_KEY_PATH:-/etc/letsencrypt/live/api.lexilingo.me/privkey.pem}
+ command: >
+ /bin/sh -c "envsubst '$$GATEWAY_SERVER_NAME $$GATEWAY_SSL_CERT_PATH $$GATEWAY_SSL_KEY_PATH'
+ < /etc/nginx/templates/default.conf.template
+ > /etc/nginx/conf.d/default.conf
+ && nginx -g 'daemon off;'"
+ ports:
+ - "80:80"
+ - "443:443"
+ volumes:
+ - ./gateway/nginx/templates:/etc/nginx/templates:ro
+ - ./gateway/nginx/snippets:/etc/nginx/snippets:ro
+ - ./gateway/nginx/acme-challenge:/var/www/certbot
+ - ./gateway/nginx/logs:/var/log/nginx
+ - /etc/letsencrypt:/etc/letsencrypt:ro
+ healthcheck:
+ test: ["CMD-SHELL", "wget -q -O /dev/null http://127.0.0.1/health || exit 1"]
+ interval: 30s
+ timeout: 5s
+ retries: 5
+ start_period: 15s
+ logging:
+ driver: json-file
+ options:
+ max-size: "10m"
+ max-file: "5"
+ networks:
+ - lexilingo-prod
+
postgres:
image: postgres:16-alpine
container_name: lexilingo-postgres
restart: unless-stopped
+ command: postgres -c shared_buffers=128MB -c work_mem=4MB -c max_connections=100
deploy:
resources:
limits:
memory: 512M
- reservations:
- memory: 256M
- mem_limit: 512m
- memswap_limit: 512m
environment:
POSTGRES_USER: ${POSTGRES_USER:-lexilingo}
- POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?Set POSTGRES_PASSWORD in .env}
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set in .env.production.secrets}
POSTGRES_DB: ${POSTGRES_DB:-lexilingo}
- ports:
- - "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
+ ports:
+ - "127.0.0.1:5432:5432"
healthcheck:
- test: ["CMD-SHELL", "pg_isready -U lexilingo"]
+ test: ["CMD-SHELL", "pg_isready -U \"$$POSTGRES_USER\" -d \"$$POSTGRES_DB\""]
interval: 10s
timeout: 5s
retries: 5
networks:
- - lexilingo-network
-
- # ============================================
- # pgAdmin (PostgreSQL Web Management UI)
- # ============================================
- pgadmin:
- image: dpage/pgadmin4
- container_name: lexilingo-pgadmin
- restart: unless-stopped
- deploy:
- resources:
- limits:
- memory: 768M
- reservations:
- memory: 256M
- mem_limit: 768m
- memswap_limit: 768m
- environment:
- PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-admin@lexilingo.me}
- PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:?Set PGADMIN_DEFAULT_PASSWORD in .env}
- PGADMIN_CONFIG_ENHANCED_COOKIE_PROTECTION: "True"
- PGADMIN_CONFIG_LOGIN_BANNER: "'LexiLingo Database Management'"
- ports:
- - "5050:80"
- depends_on:
- postgres:
- condition: service_healthy
- networks:
- - lexilingo-network
+ - lexilingo-prod
- # ============================================
- # MongoDB Database (AI Service)
- # ============================================
mongodb:
image: mongo:7.0
container_name: lexilingo-mongodb
restart: unless-stopped
+ command: mongod --wiredTigerCacheSizeGB 0.25
deploy:
resources:
limits:
- memory: 1G
- reservations:
memory: 512M
- mem_limit: 1g
- memswap_limit: 1g
- ports:
- - "27017:27017"
environment:
- MONGO_INITDB_DATABASE: lexilingo
+ MONGO_INITDB_DATABASE: ${MONGODB_DATABASE:-lexilingo}
volumes:
- mongodb_data:/data/db
- mongodb_config:/data/configdb
- - ./ai-service/scripts/mongo-init.js:/docker-entrypoint-initdb.d/mongo-init.js:ro
healthcheck:
test: ["CMD", "mongosh", "--quiet", "--eval", "db.adminCommand('ping').ok"]
- interval: 10s
- timeout: 5s
+ interval: 15s
+ timeout: 10s
retries: 5
- start_period: 40s
- networks:
- - lexilingo-network
-
- # ============================================
- # Mongo Express (MongoDB Web Management UI)
- # ============================================
- mongo-express:
- image: mongo-express:latest
- container_name: lexilingo-mongo-express
- restart: unless-stopped
- deploy:
- resources:
- limits:
- memory: 256M
- reservations:
- memory: 128M
- mem_limit: 256m
- memswap_limit: 256m
- environment:
- ME_CONFIG_MONGODB_SERVER: mongodb
- ME_CONFIG_MONGODB_PORT: 27017
- ME_CONFIG_MONGODB_ENABLE_ADMIN: "true"
- ME_CONFIG_MONGODB_AUTH_DATABASE: admin
- ME_CONFIG_BASICAUTH_USERNAME: ${MONGO_EXPRESS_USER:-admin}
- ME_CONFIG_BASICAUTH_PASSWORD: ${MONGO_EXPRESS_PASSWORD:?Set MONGO_EXPRESS_PASSWORD in .env}
- ports:
- - "8081:8081"
- depends_on:
- mongodb:
- condition: service_healthy
+ start_period: 30s
networks:
- - lexilingo-network
+ - lexilingo-prod
- # ============================================
- # Redis Cache (Backend)
- # ============================================
redis:
image: redis:7-alpine
container_name: lexilingo-redis
restart: unless-stopped
- deploy:
- resources:
- limits:
- memory: 256M
- reservations:
- memory: 128M
- mem_limit: 256m
- memswap_limit: 256m
- command: redis-server --appendonly yes --maxmemory 200mb --maxmemory-policy allkeys-lru
- ports:
- - "6379:6379"
+ environment:
+ REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set in .env.production.secrets}
+ command: /bin/sh -c 'redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru --requirepass "$$REDIS_PASSWORD"'
volumes:
- redis_data:/data
+ ports:
+ - "127.0.0.1:6379:6379"
healthcheck:
- test: ["CMD", "redis-cli", "ping"]
- interval: 10s
- timeout: 5s
- retries: 5
- networks:
- - lexilingo-network
-
- # ============================================
- # Redis Cache (AI Dedicated)
- # ============================================
- redis-ai:
- image: redis:7-alpine
- container_name: lexilingo-redis-ai
- restart: unless-stopped
- deploy:
- resources:
- limits:
- memory: 768M
- reservations:
- memory: 256M
- mem_limit: 768m
- memswap_limit: 768m
- command: redis-server --appendonly yes --maxmemory 600mb --maxmemory-policy allkeys-lfu
- volumes:
- - redis_ai_data:/data
- healthcheck:
- test: ["CMD", "redis-cli", "ping"]
+ test: ["CMD-SHELL", "redis-cli -a \"$$REDIS_PASSWORD\" ping"]
interval: 10s
timeout: 5s
retries: 5
networks:
- - lexilingo-network
-
- # ============================================
- # RedisInsight (Redis Web Management UI)
- # ============================================
- redisinsight:
- image: redis/redisinsight:latest
- container_name: lexilingo-redisinsight
- restart: unless-stopped
- deploy:
- resources:
- limits:
- memory: 512M
- reservations:
- memory: 256M
- mem_limit: 512m
- memswap_limit: 512m
- ports:
- - "8282:5540"
- depends_on:
- redis:
- condition: service_healthy
- redis-ai:
- condition: service_healthy
- networks:
- - lexilingo-network
+ - lexilingo-prod
- # ============================================
- # Backend Service (FastAPI + PostgreSQL)
- # ============================================
backend-service:
build:
context: ./backend-service
- dockerfile: Dockerfile
+ dockerfile: Dockerfile.prod
+ # TIP: To run multiple replicas for load balancing, remove `container_name`
+ # and run: docker-compose up --scale backend-service=2
+ # Nginx's `least_conn` upstream will automatically distribute traffic
+ # across all replicas via Docker's internal DNS round-robin.
container_name: lexilingo-backend-service
restart: unless-stopped
- env_file:
- - ./backend-service/.env
deploy:
resources:
limits:
+ cpus: "1.0"
memory: 1G
reservations:
- memory: 512M
- mem_limit: 1g
- memswap_limit: 1g
- ports:
- - "8000:8000"
+ cpus: "0.25"
+ memory: 256M
+ env_file:
+ - .env.production
+ - .env.production.secrets
environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
- SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
- DEBUG: ${DEBUG:-False}
- ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:?Set ALLOWED_ORIGINS in .env}
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
+ REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
AI_SERVICE_URL: http://ai-service:8001/api/v1
- # Redis (use container hostname, not localhost)
- REDIS_URL: redis://redis:6379/0
- REDIS_HOST: redis
- REDIS_PORT: 6379
- # Firebase
- FIREBASE_PROJECT_ID: ${FIREBASE_PROJECT_ID:-lexilingo-88492}
- FIREBASE_CREDENTIALS_FILE: /app/firebase-service-account.json
- GOOGLE_CLIENT_ID: ${GOOGLE_CLIENT_ID:-}
- GOOGLE_ADMIN_CLIENT_ID: ${GOOGLE_ADMIN_CLIENT_ID:-}
+ CONTENT_AGENT_ENABLED: ${CONTENT_AGENT_ENABLED:-false}
+ CONTENT_AGENT_SERVICE_TOKEN: ${CONTENT_AGENT_SERVICE_TOKEN:-}
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
+ expose:
+ - "8000"
volumes:
- - ./backend-service/app:/app/app
- - ./backend-service/alembic:/app/alembic
- ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
- command: uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload --proxy-headers --forwarded-allow-ips="*"
+ healthcheck:
+ test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/health', timeout=3)\""]
+ interval: 30s
+ timeout: 5s
+ retries: 5
+ start_period: 60s
+ logging:
+ driver: json-file
+ options:
+ max-size: "10m"
+ max-file: "5"
networks:
- - lexilingo-network
+ - lexilingo-prod
backend-reminder-worker:
build:
context: ./backend-service
- dockerfile: Dockerfile
+ dockerfile: Dockerfile.prod
container_name: lexilingo-reminder-worker
restart: unless-stopped
command: celery -A app.core.celery_app:celery_app worker --loglevel=INFO --concurrency=1
+ env_file:
+ - .env.production
+ - .env.production.secrets
environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
+ REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
DEBUG: ${DEBUG:-False}
- REDIS_URL: redis://redis:6379/0
+ AI_SERVICE_URL: http://ai-service:8001/api/v1
+ CONTENT_AGENT_ENABLED: ${CONTENT_AGENT_ENABLED:-false}
+ CONTENT_AGENT_SERVICE_TOKEN: ${CONTENT_AGENT_SERVICE_TOKEN:-}
REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
- FIREBASE_PROJECT_ID: ${FIREBASE_PROJECT_ID:-lexilingo-88492}
- FIREBASE_CREDENTIALS_FILE: /app/firebase-service-account.json
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
volumes:
- - ./backend-service/app:/app/app
- - ./backend-service/alembic:/app/alembic
- ./backend-service/firebase-service-account.json:/app/firebase-service-account.json:ro
+ logging:
+ driver: json-file
+ options:
+ max-size: "10m"
+ max-file: "5"
networks:
- - lexilingo-network
+ - lexilingo-prod
+ healthcheck:
+ disable: true
backend-reminder-beat:
build:
context: ./backend-service
- dockerfile: Dockerfile
+ dockerfile: Dockerfile.prod
container_name: lexilingo-reminder-beat
restart: unless-stopped
command: celery -A app.core.celery_app:celery_app beat --loglevel=INFO
+ env_file:
+ - .env.production
+ - .env.production.secrets
environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
- SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
- DEBUG: ${DEBUG:-False}
- REDIS_URL: redis://redis:6379/0
+ DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD:?POSTGRES_PASSWORD must be set}@postgres:5432/${POSTGRES_DB:-lexilingo}
+ REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/0
+ REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
depends_on:
redis:
condition: service_healthy
- volumes:
- - ./backend-service/app:/app/app
+ logging:
+ driver: json-file
+ options:
+ max-size: "10m"
+ max-file: "5"
networks:
- - lexilingo-network
+ - lexilingo-prod
+ healthcheck:
+ disable: true
- # ============================================
- # AI Service (FastAPI + MongoDB)
- # ============================================
ai-service:
build:
context: ./ai-service
- dockerfile: Dockerfile
+ dockerfile: Dockerfile.prod
container_name: lexilingo-ai-service
restart: unless-stopped
deploy:
resources:
limits:
- memory: 6G
- reservations:
+ cpus: "2.0"
memory: 2G
- mem_limit: 6g
- memswap_limit: 6g
- ports:
- - "8001:8001"
+ reservations:
+ cpus: "0.5"
+ memory: 512M
+ env_file:
+ - .env.production
+ - .env.production.secrets
environment:
- ENVIRONMENT: development
- SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
+ ENVIRONMENT: production
MONGODB_URI: mongodb://mongodb:27017
- MONGODB_DB_NAME: lexilingo
- REDIS_HOST: redis-ai
+ MONGODB_DATABASE: ${MONGODB_DATABASE:-lexilingo}
+ REDIS_URL: redis://:${REDIS_PASSWORD:?REDIS_PASSWORD must be set}@redis:6379/1
+ REDIS_HOST: redis
REDIS_PORT: 6379
- REDIS_PASSWORD: ""
- REDIS_DB: 0
- REDIS_URL: redis://redis-ai:6379/0
+ REDIS_PASSWORD: ${REDIS_PASSWORD:?REDIS_PASSWORD must be set}
+ REDIS_DB: 1
+ AI_MODEL_API_URL: http://ai-service:8001
+ OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
GEMINI_API_KEY: ${GEMINI_API_KEY}
ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:?Set ALLOWED_ORIGINS in .env}
DEBUG: ${DEBUG:-false}
+ CONTENT_AGENT_SERVICE_TOKEN: ${CONTENT_AGENT_SERVICE_TOKEN:-}
+ CONTENT_AGENT_TTL_SECONDS: ${CONTENT_AGENT_TTL_SECONDS:-3600}
+ CONTENT_AGENT_MAX_RECORDS: ${CONTENT_AGENT_MAX_RECORDS:-20000}
+ CONTENT_AGENT_MAX_BATCH_RECORDS: ${CONTENT_AGENT_MAX_BATCH_RECORDS:-2000}
+ CONTENT_AGENT_ALLOW_LOCAL_STORE: ${CONTENT_AGENT_ALLOW_LOCAL_STORE:-true}
depends_on:
mongodb:
condition: service_healthy
- redis-ai:
+ redis:
condition: service_healthy
+ expose:
+ - "8001"
+ extra_hosts:
+ - "host.docker.internal:host-gateway"
volumes:
- - ./ai-service/api:/app/api
+ - ai_models:/app/models
- ./ai-service/data:/app/data
- - ./ai-service/models:/app/models
- command: uvicorn api.main:app --host 0.0.0.0 --port 8001 --reload
+ - ./backend-service/data/media:/app/data/media:ro
+ - content_etl_data:/data/content-etl
+ healthcheck:
+ test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://127.0.0.1:8001/health', timeout=3)\""]
+ interval: 30s
+ timeout: 5s
+ retries: 5
+ start_period: 90s
+ logging:
+ driver: json-file
+ options:
+ max-size: "10m"
+ max-file: "5"
networks:
- - lexilingo-network
+ - lexilingo-prod
# ============================================
# Prometheus — metrics scraper
@@ -355,7 +301,7 @@ services:
limits:
memory: 512M
ports:
- - "9090:9090"
+ - "127.0.0.1:9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./monitoring/rules:/etc/prometheus/rules:ro
@@ -366,7 +312,7 @@ services:
- "--storage.tsdb.retention.time=15d"
- "--web.enable-lifecycle"
networks:
- - lexilingo-network
+ - lexilingo-prod
# ============================================
# Grafana — metrics dashboard
@@ -380,7 +326,7 @@ services:
limits:
memory: 512M
ports:
- - "3001:3000"
+ - "127.0.0.1:3001:3000"
environment:
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?Set GRAFANA_ADMIN_PASSWORD in .env}
@@ -392,16 +338,14 @@ services:
depends_on:
- prometheus
networks:
- - lexilingo-network
+ - lexilingo-prod
volumes:
postgres_data:
- driver: local
mongodb_data:
- driver: local
mongodb_config:
- driver: local
redis_data:
+ ai_models:
driver: local
redis_ai_data:
driver: local
@@ -409,7 +353,9 @@ volumes:
driver: local
grafana_data:
driver: local
+ content_etl_data:
+ driver: local
networks:
- lexilingo-network:
+ lexilingo-prod:
driver: bridge
diff --git a/docs/ Plan_Vocabulary Speak.md b/docs/ Plan_Vocabulary Speak.md
deleted file mode 100644
index 46d614b5..00000000
--- a/docs/ Plan_Vocabulary Speak.md
+++ /dev/null
@@ -1,111 +0,0 @@
-# Implementation Plan - Vocabulary Speaking Practice & FSRS Spaced Repetition
-
-This plan details the implementation of a new Vocabulary Speaking Practice feature for LexiLingo. It covers adding FSRS algorithm support in the database and backend, exposing the HuBERT pronunciation evaluation model in the AI service, and building a premium Flutter speaking practice interface.
-
-## User Review Required
-
-> [!IMPORTANT]
-> **FSRS Column Addition**: We will add 8 new columns to the `user_vocabulary` table to support the FSRS algorithm. The existing `next_review_date` and `last_reviewed_at` fields will be shared by both algorithms, maintaining 100% backward compatibility with existing queries.
-> **FSRS Rating Mapping**: In the speaking practice flow, the quality score (0-5) will be mapped from the pronunciation stars:
-> - 3 Stars (Amazing / score >= 80) -> Quality 5 (Easy / Perfect)
-> - 2 Stars (Good / 60 <= score < 80) -> Quality 3 (Good)
-> - 1 Star (Try again / score < 60) -> Quality 1 (Incorrect)
-
-## Open Questions
-
-> [!NOTE]
-> None. The requirements and design are fully specified by the user request and mockup image.
-
----
-
-## Proposed Changes
-
-### Database & Backend Components
-
-#### [NEW] [add_fsrs_and_speaking_fields.py](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/backend-service/alembic/versions/add_fsrs_and_speaking_fields.py)
-- Create a new Alembic migration to add the following columns to `user_vocabulary`:
- - `fsrs_stability` (Float, default 0.0, nullable=True)
- - `fsrs_difficulty` (Float, default 0.0, nullable=True)
- - `fsrs_elapsed_days` (Integer, default=0, nullable=True)
- - `fsrs_scheduled_days` (Integer, default=0, nullable=True)
- - `fsrs_reps` (Integer, default=0, nullable=True)
- - `fsrs_lapses` (Integer, default=0, nullable=True)
- - `fsrs_state` (Integer, default=0, nullable=True)
- - `fsrs_last_review` (DateTime, nullable=True)
-
-#### [MODIFY] [vocabulary.py (model)](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/backend-service/app/models/vocabulary.py)
-- Update the `UserVocabulary` class to declare the new FSRS fields.
-
-#### [MODIFY] [vocabulary.py (schema)](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/backend-service/app/schemas/vocabulary.py)
-- Add FSRS fields to `UserVocabularyResponse`.
-- Add a new schema `PronunciationEvaluationResponse` returning score, stars, feedback label, transcription, and errors list.
-
-#### [MODIFY] [vocabulary.py (crud)](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/backend-service/app/crud/vocabulary.py)
-- Update the `submit_review` function to calculate and update FSRS fields alongside SM-2 fields.
-- Implement the FSRS scheduling formulas:
- - If reps = 0: Initialize stability and difficulty based on the rating.
- - Else: Calculate retrievability, update difficulty and stability, reset or increase repetitions/lapses, and set the next interval.
- - Set `next_review_date = now + timedelta(days=interval)`.
-
-#### [MODIFY] [vocabulary.py (route)](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/backend-service/app/routes/vocabulary.py)
-- Add a new route `POST /api/v1/vocabulary/pronunciation/evaluate`:
- - Accepts `audio` (UploadFile) and `vocabulary_id` (UUID).
- - Fetches the vocabulary word.
- - Forwards the audio file and target word to the `ai-service`.
- - Maps the evaluation score to stars and feedback ("Amazing" / "Good" / "Try again") and returns the result.
-
----
-
-### AI Service Components
-
-#### [NEW] [pronunciation.py (route)](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/ai-service/api/routes/pronunciation.py)
-- Add a route `POST /api/v1/stt/assess-pronunciation`:
- - Accepts `audio` file and `target_text` parameter.
- - Decodes the uploaded audio to a float32 16kHz numpy array using `faster_whisper.audio.decode_audio` or scipy.
- - Runs `HuBERTService` to evaluate pronunciation.
- - Returns `overall_score`, `phoneme_scores`, and detailed phoneme errors/suggestions.
-
-#### [MODIFY] [main.py](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/ai-service/api/main.py)
-- Register the new pronunciation router.
-
----
-
-### Flutter App (Frontend)
-
-#### [NEW] [vocabulary_speaking_practice_screen.dart](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/flutter-app/lib/features/vocabulary/presentation/screens/vocabulary_speaking_practice_screen.dart)
-- Build a premium UI matching the user's mockup:
- - App bar: title `< Lesson 1 Speaking practice` with macOS/Windows style close, maximize, and minimize action controls on the right.
- - Normal speed & Turtle speed (slow) speaker buttons on the top left.
- - Progress bar: green bar displaying progress (e.g. `No. 11 [=======] 11 in total`).
- - Target word in large green font, e.g. "religion", IPA "/rɪˈlɪdʒən/", and a "Meaning" button.
- - Interactive recording: Record user's pronunciation using `record` package with wave animations.
- - Bottom panel containing:
- - Quality text: e.g. "Amazing" / "Good" / "Try again".
- - Play button: "My pronunciation" (plays user's recorded audio using `just_audio`).
- - Stars: 1-3 yellow stars.
- - Buttons: "Try again" (light outline button) and "Submit" (solid green button).
- - Connect to backend:
- - Call evaluation API upon finishing recording.
- - Call review submission API when clicking "Submit" to save the spaced repetition data.
-
-#### [MODIFY] [flashcard_review_screen.dart](file:///Users/nguyenhuuthang/Documents/RepoGitHub/LexiLingo/flutter-app/lib/features/vocabulary/presentation/screens/flashcard_review_screen.dart)
-- Provide a navigation entry or option to switch to Speaking Practice Mode.
-
----
-
-## Verification Plan
-
-### Automated Tests
-- Run backend unit tests to verify FSRS scheduling formulas:
- `pytest backend-service/tests`
-- Run AI service test cases to verify HuBERT pronunciation evaluation:
- `pytest ai-service/tests`
-
-### Manual Verification
-- Launch backend and AI services.
-- Test the new Speaking Practice screen on the iOS/Android simulator:
- - Check speaking practice page layout and alignment with mockup.
- - Click audio/speaker buttons to hear reference pronunciation.
- - Press record button, speak, verify that it uploads and displays correct pronunciation score (stars, text feedback, phonemes).
- - Press "My pronunciation" to replay own voice.
- - Click "Submit", verify database successfully updates FSRS properties (`fsrs_stability`, `fsrs_reps`etc.).
diff --git a/docs/Checklist_Vocabulary_Speak.md b/docs/Checklist_Vocabulary_Speak.md
deleted file mode 100644
index 8944b280..00000000
--- a/docs/Checklist_Vocabulary_Speak.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# Vocabulary Speaking Practice Checklist
-
-Source plan: `docs/ Plan_Vocabulary Speak.md`
-
-## Scope
-
-- [x] Add FSRS columns to `user_vocabulary` with backward-compatible defaults.
-- [x] Expose FSRS fields from backend models and response schemas.
-- [x] Update vocabulary review submission to calculate SM-2 and FSRS in parallel.
-- [x] Add backend pronunciation evaluation endpoint under `/api/v1/vocabulary/pronunciation/evaluate`.
-- [x] Add AI service HuBERT pronunciation endpoint under `/api/v1/stt/assess-pronunciation`.
-- [x] Forward pronunciation audio from backend to AI service with the user's authorization header.
-- [x] Map pronunciation score to speaking feedback:
- - [x] `score >= 80`: 3 stars, `Amazing`, quality `5`.
- - [x] `60 <= score < 80`: 2 stars, `Good`, quality `3`.
- - [x] `score < 60`: 1 star, `Try again`, quality `1`.
-- [x] Add Flutter API model and repository method for pronunciation evaluation.
-- [x] Build Flutter vocabulary speaking practice screen with record, evaluate, replay, retry, and submit controls.
-- [x] Add navigation from flashcard review to speaking practice mode.
-- [x] Run backend tests for vocabulary/FSRS.
-- [x] Run AI service tests for pronunciation route.
-- [x] Run Flutter static analysis.
-- [x] Run Flutter model test for pronunciation evaluation mapping.
-
-## Implementation Order
-
-- [x] Backend database migration and SQLAlchemy model fields.
-- [x] Backend FSRS calculation and schema exposure.
-- [x] Backend pronunciation proxy route.
-- [x] AI pronunciation route and router registration.
-- [x] Flutter data/domain API wiring.
-- [x] Flutter speaking practice UI and navigation.
-- [x] Verification pass and checklist status update.
-
-## Next Plan: FSRS Reminder Scheduler
-
-Plan: `docs/superpowers/plans/2026-06-01-fsrs-reminder-scheduler.md`
-
-- [ ] Add backend reminder preferences and delivery audit tables.
-- [ ] Add user-configurable reminder settings API.
-- [ ] Add persisted notification API.
-- [ ] Add FSRS due-count scheduler adapter based on `next_review_date`.
-- [ ] Add FCM push service for vocabulary review reminders.
-- [ ] Add vocabulary review reminder email service and templates.
-- [ ] Add Celery worker and beat process configuration.
-- [ ] Sync Flutter Settings reminder controls with backend.
-- [ ] Route reminder notification taps to vocabulary review.
-- [ ] Verify backend, Flutter, worker dry-run, and production rollout safety.
diff --git a/docs/Report/RPT-001_REPORT_INDEX.md b/docs/Report/RPT-001_REPORT_INDEX.md
index 22e9b15b..95ae62e8 100644
--- a/docs/Report/RPT-001_REPORT_INDEX.md
+++ b/docs/Report/RPT-001_REPORT_INDEX.md
@@ -51,6 +51,7 @@ Bộ tài liệu Report được đánh mã theo thứ tự để theo dõi rõ
| 26 | `RPT-026_SPACED_REPETITION_VOCABULARY.md` | **Spaced Repetition & Vocab** — Thuật toán SM-2, Mastery Score tracking |
| 27 | `RPT-027_PROFICIENCY_CEFR_ASSESSMENT.md` | **Proficiency CEFR Assessment** — Đánh giá đa kỹ năng, weight system |
| 28 | `RPT-028_REALWORLD_CONTENT_INTEGRATION.md` | **Real-world Content Integration** — Tích hợp NewsAPI & YouTube với AI |
+| 29 | `RPT-029_FLUTTER_UI_FILE_INVENTORY.md` | **Kiểm kê file giao diện Flutter** — 158 file UI (trừ admin), độ ưu tiên đổi icon, trạng thái redesign |
| 0 | `RPT-000_UPDATE_SUMMARY.md` | **Tổng hợp cập nhật** — danh sách thay đổi và số liệu thống kê |
---
diff --git a/docs/Report/RPT-024_GAMES_ENGINE.md b/docs/Report/RPT-024_GAMES_ENGINE.md
index 316a3623..ae7420f5 100644
--- a/docs/Report/RPT-024_GAMES_ENGINE.md
+++ b/docs/Report/RPT-024_GAMES_ENGINE.md
@@ -1,6 +1,6 @@
# RPT-024 — Hệ Thống Câu Hỏi và Mini-Game (Games Engine)
-> **Cập nhật:** 2026-04-24 | **Nguồn:** `backend-service/app/routes/games.py` (1351 dòng, 67KB)
+> **Cập nhật:** 2026-06-15 | **Nguồn:** `backend-service/app/routes/games.py`, `app/services/`, `flutter-app/lib/features/games/`
---
@@ -208,4 +208,29 @@ Game hoàn thành → score được tính
---
+---
+
+## Cập Nhật Stability 2026-06-15
+
+### Server-Authoritative XP
+
+Tất cả XP từ game đi qua `xp_service.py::award_xp_transaction()`. Client không thể submit `base_xp` tuỳ ý — server tính lại từ game type và CEFR level. `source_id` (game session ID) là khoá dedup để chống duplicate award.
+
+### Pronunciation Service (Flutter)
+
+`GamePronunciationService` ưu tiên `audio_url` từ payload, fallback TTS qua `VoiceRemoteDataSource`, emit `AudioError` state nếu cả hai đều fail.
+
+### Flutter Game Tests (104 tests)
+
+- `game_completion_test.dart` — 13 tests: tất cả 6 game types, XP earned/capped/failed
+- `game_load_state_test.dart` — 13 tests: loading spinner, error + retry
+- `game_accessibility_test.dart` — 14 tests: semantic labels, touch targets ≥40px, 375/390/768px
+- `game_pronunciation_service_test.dart` — 10 tests: audio_url prefer, TTS fallback, error
+
+### Acceptance Verification
+
+Xem `docs/qa/game-system-acceptance.md` để biết kết quả automated tests và checklist manual validation.
+
+---
+
*Tham khảo: [RPT-018](RPT-018_FEATURE_ANALYSIS.md) | [RPT-020](RPT-020_BACKEND_SERVICE_REPORT.md) | [RPT-025](RPT-025_GAMIFICATION_XP_SYSTEM.md)*
diff --git a/docs/Report/RPT-025_GAMIFICATION_XP_SYSTEM.md b/docs/Report/RPT-025_GAMIFICATION_XP_SYSTEM.md
index 7aa8dd6a..ce53bde1 100644
--- a/docs/Report/RPT-025_GAMIFICATION_XP_SYSTEM.md
+++ b/docs/Report/RPT-025_GAMIFICATION_XP_SYSTEM.md
@@ -1,6 +1,6 @@
# RPT-025 — Hệ Thống Gamification: XP, Streak, Wallet, Shop, Leaderboard
-> **Cập nhật:** 2026-04-24 | **Nguồn:** `backend-service/app/routes/gamification.py` (31KB), `xp.py` (16KB), `backend-service/app/models/gamification.py` (14KB)
+> **Cập nhật:** 2026-06-15 | **Nguồn:** `backend-service/app/routes/gamification.py`, `app/services/xp_service.py`, `app/models/gamification.py`
---
@@ -275,4 +275,25 @@ Daily Goal ──► Streak Tracker ──► Wallet (streak rewards)
---
+---
+
+## Cập Nhật Security Model 2026-06-15
+
+### source_id Requirement
+
+Các `source_id` repeat-sensitive (`game`, `lesson`, `daily_challenge`) bắt buộc phải có `source_id` khi gọi `award_xp_transaction()`. Thiếu `source_id` → HTTP 422. Cơ chế này ngăn client tạo XP không có session server-issued.
+
+```python
+REPEAT_SENSITIVE_SOURCES = frozenset({"game", "lesson", "daily_challenge"})
+```
+
+### Test Coverage
+
+`tests/test_xp_service.py` — 70 tests bao gồm:
+- source_id validation cho tất cả 3 repeat-sensitive sources
+- Daily XP cap per source type
+- Streak multiplier calculation
+- Level progression thresholds
+- Achievement unlock triggers
+
*Tham khảo: [RPT-024](RPT-024_GAMES_ENGINE.md) | [RPT-018](RPT-018_FEATURE_ANALYSIS.md) | [RPT-020](RPT-020_BACKEND_SERVICE_REPORT.md)*
diff --git a/docs/Report/RPT-029_FLUTTER_UI_FILE_INVENTORY.md b/docs/Report/RPT-029_FLUTTER_UI_FILE_INVENTORY.md
new file mode 100644
index 00000000..07f46f9d
--- /dev/null
+++ b/docs/Report/RPT-029_FLUTTER_UI_FILE_INVENTORY.md
@@ -0,0 +1,382 @@
+# RPT-029 — Kiểm Kê File Giao Diện Flutter (Loại Trừ Admin)
+
+> **Cập nhật:** 2026-06-21
+> **Liên quan:** [`RPT-005_FLUTTER_MODULE_CATALOG.md`](./RPT-005_FLUTTER_MODULE_CATALOG.md) (catalog module ở mức feature), [`RPT-022_FLUTTER_APP_ARCHITECTURE.md`](./RPT-022_FLUTTER_APP_ARCHITECTURE.md) (kiến trúc tổng thể)
+
+---
+
+## 1. Mục Đích
+
+Báo cáo này kiểm kê **chính xác** toàn bộ file giao diện (UI rendering) trong `flutter-app/lib/`, phục vụ cho chiến dịch cải cách giao diện ("biến app giống một game hơn", dùng icon tự tạo thay icon thư viện Material). Phạm vi: **toàn app, trừ `features/admin/*`** (module quản trị riêng, không thuộc app người dùng cuối).
+
+> **Lưu ý sửa số liệu:** một báo cáo miệng trước đó trong cùng phiên làm việc đã nói "~190 file giao diện" — số đó thực chất là **tổng số file trong toàn bộ lớp `presentation/`**, bao gồm cả `providers/` (state management) và `utils/` (helper, không vẽ UI). Báo cáo này tách rõ ba nhóm để số liệu phản ánh đúng "file giao diện" theo nghĩa hẹp.
+
+---
+
+## 2. Phương Pháp Kiểm Kê
+
+- Quét toàn bộ `*.dart` dưới `lib/features/*/presentation/` và `lib/core/`, loại `lib/features/admin/**`.
+- Phân loại theo subfolder thực tế trong từng feature:
+ - **File giao diện thật** (UI rendering — đối tượng cải cách): `pages/`, `screens/`, `widgets/`, `painters/`
+ - **Không phải giao diện** (giữ nguyên khi redesign): `providers/` (state/ViewModel), `utils/` (resolver/helper logic)
+- Core dùng chung (`lib/core/widgets/`, `lib/core/theme/`, `lib/core/services/theme_preference_store.dart`) tính riêng vì áp dụng cho mọi feature.
+- Độ ưu tiên redesign mỗi feature = số lượt gọi `Icons.*` (Material icon) hiện có trong feature đó — càng nhiều, càng cần thay icon tự tạo (`GameIcon`, xem mục 6).
+
+---
+
+## 3. Số Liệu Tổng Quan
+
+| Nhóm | Số file |
+|---|---|
+| **File giao diện thật** (pages + screens + widgets + painters), trong `features/*` (trừ admin) | **134** |
+| Core/Shared UI (`core/widgets/` × 22 + `core/theme/app_theme.dart` + `core/services/theme_preference_store.dart`) | **24** |
+| **Tổng file giao diện thật (core + feature)** | **158** |
+| Providers (state management, *không* phải giao diện) | 31 |
+| Utils (resolver/helper, *không* phải giao diện) | 2 |
+| **Tổng toàn bộ lớp `presentation/` (giao diện + provider + utils)** | 167 |
+
+Breakdown theo loại subfolder trong `features/*/presentation/`:
+
+| Loại | Số file |
+|---|---|
+| `widgets/` | 74 |
+| `screens/` | 39 |
+| `providers/` | 31 |
+| `pages/` | 20 |
+| `utils/` | 2 |
+| `painters/` | 1 |
+
+---
+
+## 4. Core / Shared UI (áp dụng toàn app — ưu tiên cải cách đầu tiên)
+
+| File | Vai trò |
+|---|---|
+| `core/theme/app_theme.dart` | Theme tokens trung tâm (màu, `AppColorRoles`, gradient) |
+| `core/services/theme_preference_store.dart` | Lưu lựa chọn dark/light mode |
+| `core/widgets/game_icon.dart` | **Mới** — registry `GameIcon` enum, map icon bán → asset `assets/icon-library/`, fallback Material khi chưa có art |
+| `core/widgets/widgets.dart` | Barrel export toàn bộ widget dùng chung |
+| `core/widgets/app_button.dart` | Button chuẩn |
+| `core/widgets/animated_components.dart` / `animated_ui_components.dart` / `custom_animations.dart` | Hiệu ứng animation dùng chung |
+| `core/widgets/animation_showcase_page.dart` | Trang demo animation (dev tool) |
+| `core/widgets/badge_generator.dart` / `cefr_badge.dart` | Sinh badge cấp độ CEFR |
+| `core/widgets/celebration_widget.dart` | Hiệu ứng ăn mừng (level up, hoàn thành) |
+| `core/widgets/empty_state_widget.dart` / `error_widget.dart` | Trạng thái rỗng / lỗi chuẩn |
+| `core/widgets/glassmorphic_components.dart` | Hiệu ứng glassmorphism (progress ring...) |
+| `core/widgets/language_switcher_button.dart` | Đổi ngôn ngữ app |
+| `core/widgets/lottie_animation_widget.dart` / `lottie_loading_widget.dart` | Lottie animation wrapper |
+| `core/widgets/network_avatar_image.dart` | Avatar tải từ network |
+| `core/widgets/premium_gate.dart` | Khoá tính năng premium |
+| `core/widgets/quick_save_selection_area.dart` / `quick_save_word_sheet.dart` | Lưu nhanh từ vựng |
+| `core/widgets/skeleton_loading.dart` | Skeleton/shimmer loading |
+| `core/widgets/stagger_list.dart` | List có animation stagger |
+
+---
+
+## 5. File Giao Diện Theo Feature (134 file, sort theo số lượng)
+
+| # | Feature | File giao diện | Lượt dùng `Icons.*` (độ ưu tiên đổi icon) | Trạng thái redesign |
+|---|---|---|---|---|
+| 1 | `games` | 16 | 32 | Chưa làm |
+| 2 | `gamification` | 15 | 53 | Chưa làm |
+| 3 | `vocabulary` | 11 | 91 | Chưa làm |
+| 4 | `learning` | 10 | 72 | Chưa làm |
+| 5 | `chat` | 10 | 162 (cao nhất) | Chưa làm |
+| 6 | `auth` | 10 | 57 | Chưa làm |
+| 7 | `progress` | 9 | 64 | Chưa làm |
+| 8 | `voice` | 7 | 33 | Chưa làm |
+| 9 | `podcast` | 7 | 36 | Chưa làm |
+| 10 | `books` | 7 | 37 | Chưa làm |
+| 11 | `profile` | 4 | 61 | Chưa làm |
+| 12 | `lexi_chat` | 4 | 20 | Chưa làm |
+| 13 | `achievements` | 4 | 62 | Chưa làm |
+| 14 | `news` | 3 | 28 | Chưa làm |
+| 15 | `level` | 3 | 20 | Chưa làm |
+| 16 | `home` | 3 | 28 (↓ từ 58) | ✅ **Pilot xong** |
+| 17 | `course` | 3 | 76 | Chưa làm |
+| 18 | `youtube` | 2 | 45 | Chưa làm |
+| 19 | `user` | 2 | 32 | Chưa làm |
+| 20 | `notifications` | 2 | 15 | Chưa làm |
+| 21 | `social` | 1 | 23 | Chưa làm |
+| 22 | `premium` | 1 | 6 | Chưa làm |
+
+> `course` có 76 lượt `Icons.*` chỉ trong 3 file UI — đa số nằm ở `home_page.dart` (đã giảm khi redesign Home) hiển thị card khoá học; cần kiểm tra lại khi vào lượt `course`.
+
+### Danh sách đầy đủ từng file (bấm để mở rộng từng feature)
+
+
features/achievements — 4 file
+
+- `features/achievements/presentation/screens/achievements_screen.dart`
+- `features/achievements/presentation/screens/screens.dart`
+- `features/achievements/presentation/widgets/achievement_unlock_overlay.dart`
+- `features/achievements/presentation/widgets/achievement_widgets.dart`
+
+
+
+
features/auth — 10 file
+
+- `features/auth/presentation/pages/email_verification_pending_page.dart`
+- `features/auth/presentation/pages/forgot_password_page.dart`
+- `features/auth/presentation/pages/login_page.dart`
+- `features/auth/presentation/pages/onboarding_page.dart`
+- `features/auth/presentation/pages/pre_auth_questions_page.dart`
+- `features/auth/presentation/pages/register_page.dart`
+- `features/auth/presentation/pages/reset_password_page.dart`
+- `features/auth/presentation/pages/welcome_page.dart`
+- `features/auth/presentation/widgets/auth_gradient_background.dart`
+- `features/auth/presentation/widgets/auth_wrapper.dart`
+
+
+
+
features/books — 7 file
+
+- `features/books/presentation/screens/book_detail_screen.dart`
+- `features/books/presentation/screens/book_library_screen.dart`
+- `features/books/presentation/screens/book_quiz_screen.dart`
+- `features/books/presentation/screens/book_reader_screen.dart`
+- `features/books/presentation/widgets/book_card.dart`
+- `features/books/presentation/widgets/bookmark_button.dart`
+- `features/books/presentation/widgets/reader_controls.dart`
+
+
+
+
features/chat — 10 file
+
+- `features/chat/presentation/pages/story_selection_page.dart`
+- `features/chat/presentation/pages/topic_chat_page.dart`
+- `features/chat/presentation/widgets/audio_waveform.dart`
+- `features/chat/presentation/widgets/chat_ui_components.dart`
+- `features/chat/presentation/widgets/educational_hints_widgets.dart`
+- `features/chat/presentation/widgets/markdown_message_content.dart`
+- `features/chat/presentation/widgets/message_bubble.dart`
+- `features/chat/presentation/widgets/session_list_drawer.dart`
+- `features/chat/presentation/widgets/topic_card.dart`
+- `features/chat/presentation/widgets/widgets.dart`
+
+
+
+
features/course — 3 file
+
+- `features/course/presentation/screens/category_detail_screen.dart`
+- `features/course/presentation/screens/course_detail_screen.dart`
+- `features/course/presentation/screens/course_list_screen.dart`
+
+
+
+
features/games — 16 file
+
+- `features/games/presentation/screens/fill_blank_screen.dart`
+- `features/games/presentation/screens/game_result_screen.dart`
+- `features/games/presentation/screens/games_hub_screen.dart`
+- `features/games/presentation/screens/grammar_quiz_screen.dart`
+- `features/games/presentation/screens/hangman_screen.dart`
+- `features/games/presentation/screens/matching_game_screen.dart`
+- `features/games/presentation/screens/spelling_bee_screen.dart`
+- `features/games/presentation/screens/word_scramble_screen.dart`
+- `features/games/presentation/widgets/daily_challenge_card.dart`
+- `features/games/presentation/widgets/game_card.dart`
+- `features/games/presentation/widgets/game_load_state.dart`
+- `features/games/presentation/widgets/hangman_figure.dart`
+- `features/games/presentation/widgets/letter_tile.dart`
+- `features/games/presentation/widgets/level_up_dialog.dart`
+- `features/games/presentation/widgets/streak_indicator.dart`
+- `features/games/presentation/widgets/xp_progress_bar.dart`
+
+
+
+
features/gamification — 15 file
+
+- `features/gamification/presentation/screens/leaderboard_screen.dart`
+- `features/gamification/presentation/screens/league_ceremony_screen.dart`
+- `features/gamification/presentation/screens/shop_screen.dart`
+- `features/gamification/presentation/screens/wallet_screen.dart`
+- `features/gamification/presentation/widgets/active_boosts_bar.dart`
+- `features/gamification/presentation/widgets/boost_purchase_animation.dart`
+- `features/gamification/presentation/widgets/gem_counter.dart`
+- `features/gamification/presentation/widgets/leaderboard_podium.dart`
+- `features/gamification/presentation/widgets/league_card.dart`
+- `features/gamification/presentation/widgets/level_rank_display.dart`
+- `features/gamification/presentation/widgets/rank_asset_icon.dart`
+- `features/gamification/presentation/widgets/rank_badge.dart`
+- `features/gamification/presentation/widgets/rank_up_dialog.dart`
+- `features/gamification/presentation/widgets/shop_item_card.dart`
+- `features/gamification/presentation/widgets/starter_reward_dialog.dart`
+
+
+
+
features/home — 3 file (✅ pilot redesign đã xong)
+
+- `features/home/presentation/pages/home_page.dart`
+- `features/home/presentation/pages/main_screen.dart` *(bottom nav dùng `phosphor_flutter`, không phải Material icon — chưa cần đổi)*
+- `features/home/presentation/widgets/home_ui_components.dart`
+
+
+
+
features/learning — 10 file
+
+- `features/learning/presentation/painters/roadmap_path_painter.dart`
+- `features/learning/presentation/screens/learning_roadmap_screen.dart`
+- `features/learning/presentation/screens/learning_session_screen.dart`
+- `features/learning/presentation/widgets/lesson_content_widget.dart`
+- `features/learning/presentation/widgets/lesson_speaking_recorder.dart`
+- `features/learning/presentation/widgets/lesson_ui_components.dart`
+- `features/learning/presentation/widgets/premium_exercise_widgets.dart`
+- `features/learning/presentation/widgets/quiz_widget.dart`
+- `features/learning/presentation/widgets/roadmap_header_widget.dart`
+- `features/learning/presentation/widgets/roadmap_node_widget.dart`
+
+
+
+
features/level — 3 file
+
+- `features/level/presentation/widgets/level_widgets.dart`
+- `features/level/presentation/widgets/proficiency_card.dart`
+- `features/level/presentation/widgets/proficiency_radar_chart.dart`
+
+
+
+
features/lexi_chat — 4 file
+
+- `features/lexi_chat/presentation/pages/lexi_chat_page.dart`
+- `features/lexi_chat/presentation/widgets/lexi_corrections_sheet.dart`
+- `features/lexi_chat/presentation/widgets/lexi_dialogue_bubble.dart`
+- `features/lexi_chat/presentation/widgets/lexi_typing_indicator.dart`
+
+
+
+
features/news — 3 file
+
+- `features/news/presentation/screens/news_detail_screen.dart`
+- `features/news/presentation/screens/news_list_screen.dart`
+- `features/news/presentation/screens/news_quiz_screen.dart`
+
+
+
+
features/notifications — 2 file
+
+- `features/notifications/presentation/pages/notifications_page.dart`
+- `features/notifications/presentation/widgets/empty_notification_widget.dart`
+
+
+
+
features/podcast — 7 file
+
+- `features/podcast/presentation/screens/podcast_detail_screen.dart`
+- `features/podcast/presentation/screens/podcast_explore_screen.dart`
+- `features/podcast/presentation/screens/podcast_player_screen.dart`
+- `features/podcast/presentation/widgets/audio_player_controls.dart`
+- `features/podcast/presentation/widgets/episode_tile.dart`
+- `features/podcast/presentation/widgets/podcast_card.dart`
+- `features/podcast/presentation/widgets/transcript_panel.dart`
+
+
+
+
features/premium — 1 file
+
+- `features/premium/presentation/screens/paywall_screen.dart`
+
+
+
+
features/profile — 4 file
+
+- `features/profile/presentation/pages/edit_profile_screen.dart`
+- `features/profile/presentation/pages/learning_stats_pages.dart`
+- `features/profile/presentation/pages/profile_page.dart`
+- `features/profile/presentation/widgets/profile_ui_components.dart`
+
+
+
+
features/progress — 9 file
+
+- `features/progress/presentation/screens/my_progress_screen.dart`
+- `features/progress/presentation/widgets/course_progress_card.dart`
+- `features/progress/presentation/widgets/daily_challenges_widget.dart`
+- `features/progress/presentation/widgets/daily_reward_dialog.dart`
+- `features/progress/presentation/widgets/points_calendar_dialog.dart`
+- `features/progress/presentation/widgets/progress_card.dart`
+- `features/progress/presentation/widgets/streak_milestone_overlay.dart`
+- `features/progress/presentation/widgets/streak_widget.dart`
+- `features/progress/presentation/widgets/xp_line_chart.dart`
+
+
+
+
features/social — 1 file
+
+- `features/social/presentation/screens/social_screen.dart`
+
+
+
+
features/user — 2 file
+
+- `features/user/presentation/pages/legal_page.dart`
+- `features/user/presentation/pages/settings_page.dart`
+
+
+
+
features/vocabulary — 11 file
+
+- `features/vocabulary/presentation/pages/vocab_library_page.dart`
+- `features/vocabulary/presentation/screens/flashcard_review_screen.dart`
+- `features/vocabulary/presentation/screens/session_complete_screen.dart`
+- `features/vocabulary/presentation/screens/vocabulary_speaking_practice_screen.dart`
+- `features/vocabulary/presentation/screens/word_of_day_screen.dart`
+- `features/vocabulary/presentation/widgets/daily_review_card.dart`
+- `features/vocabulary/presentation/widgets/flashcard_widget.dart`
+- `features/vocabulary/presentation/widgets/review_quality_buttons.dart`
+- `features/vocabulary/presentation/widgets/session_header.dart`
+- `features/vocabulary/presentation/widgets/vocab_word_detail_sheet.dart`
+- `features/vocabulary/presentation/widgets/word_of_day_card.dart`
+
+
+
+
features/voice — 7 file
+
+- `features/voice/presentation/screens/voice_practice_screen.dart`
+- `features/voice/presentation/widgets/playback_controls.dart`
+- `features/voice/presentation/widgets/pronunciation_score_card.dart`
+- `features/voice/presentation/widgets/record_button.dart`
+- `features/voice/presentation/widgets/speak_button.dart`
+- `features/voice/presentation/widgets/speech_recognition_button.dart`
+- `features/voice/presentation/widgets/tts_speed_selector.dart`
+
+
+
+
features/youtube — 2 file
+
+- `features/youtube/presentation/screens/youtube_explore_screen.dart`
+- `features/youtube/presentation/screens/youtube_player_screen.dart`
+
+
+
+---
+
+## 6. Hệ Thống Icon Tự Tạo (`GameIcon`)
+
+Hạ tầng đã dựng tại `lib/core/widgets/game_icon.dart`, nguồn art tại `assets/icon-library/` (gitignore — xem [`.gitignore`](../../flutter-app/.gitignore), cần tự giải nén `assets/icon-library.zip` khi build máy mới) + `assets/learning-icons/` (đã track).
+
+**Đã có asset thật (24 icon):** star, trophy, xp, gem, crown, checkmark, giftBox, treasureChest, speechBubble, settings, padlockUnlocked, clock, lessonBoard, playArrow, fastForward, rewind, backArrow, speakerOn, speakerMuted, lightBulb, heart, flashcards, grammar, listening, quizzes, speaking, vocabulary, streakFire, bolt, gameController, notificationBell, calendar, sunMorning, moonNight, book.
+
+**Còn thiếu, đang fallback Material icon:** `newspaper`, `headphones`, `sunsetAfternoon`, `translate`.
+
+Cách dùng: `AppGameIcon(GameIcon.trophy, size: 24)` thay cho `Icon(Icons.emoji_events)`. Icon chưa có asset vẫn render được (fallback), nên có thể wire toàn bộ feature trước, bổ sung art sau mà không cần sửa lại UI lần hai.
+
+---
+
+## 7. Đề Xuất Thứ Tự Cải Cách Tiếp Theo
+
+Dựa trên độ ưu tiên (mục 5) — feature càng nhiều `Icons.*` + càng hay gặp trong hành trình hằng ngày của user nên làm trước:
+
+1. **`learning`** — roadmap zigzag là màn hình lõi của trải nghiệm "giống game", đã có layout game sẵn, chỉ cần thay icon + polish thêm.
+2. **`games`** — trung tâm 7 mini-game, nơi cảm giác "game" cần rõ nhất.
+3. **`gamification`** — shop, leaderboard, wallet, đã mang tinh thần game nhẹ.
+4. **`vocabulary`** — nhiều `Icons.*` nhất sau chat (91 lượt), tần suất dùng cao (flashcard, ôn tập hằng ngày).
+5. **`chat` / `lexi_chat`** — nhiều `Icons.*` nhất tuyệt đối (162) nhưng bản chất là giao diện chat, mức độ "game hoá" cần cân nhắc riêng (tránh làm rối giao diện hội thoại).
+6. Các feature còn lại theo bảng mục 5.
+
+---
+
+## 8. Quy Tắc Bảo Trì
+
+- File này nên cập nhật lại số liệu mỗi khi một feature hoàn thành redesign (đổi cột "Trạng thái redesign" ở mục 5, cập nhật lại lượt `Icons.*` còn lại).
+- Khi thêm icon mới vào `GameIcon`, cập nhật danh sách "Đã có asset thật" / "Còn thiếu" ở mục 6.
+- Không tính `features/admin/*` vào báo cáo này — module quản trị có vòng đời thiết kế riêng.
diff --git a/docs/badge-design-prompts.md b/docs/badge-design-prompts.md
new file mode 100644
index 00000000..e2934a96
--- /dev/null
+++ b/docs/badge-design-prompts.md
@@ -0,0 +1,358 @@
+# Badge Design Prompts — LexiLingo
+**Style: Anime / 2D Game Art**
+
+---
+
+## Base Style DNA
+
+Dùng đoạn prefix này cho **mọi** prompt badge:
+
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+vibrant saturated colors, bold black outline (2-3px), soft inner glow,
+glossy enamel pin aesthetic, centered composition, transparent background,
+512x512px, no text, no watermark
+```
+
+---
+
+## Bộ Badge Theo Cấp Độ CEFR
+
+### A1 — Beginner Spark
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+vibrant saturated colors, bold black outline,
+a glowing yellow shooting star with sparkle trail, tiny open book floating beside it,
+soft pastel background aura, kawaii cute style, rounded badge shape,
+warm gold and cream color palette,
+transparent background, 512x512px, no text
+```
+
+### A2 — Rising Flame
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a small orange flame with anime-style face (determined expression),
+flame has gradient from yellow core to deep orange tips,
+surrounded by floating Japanese-style kanji energy marks,
+circular badge with inner ring glow, game RPG icon style,
+orange and amber color palette,
+transparent background, 512x512px, no text
+```
+
+### B1 — Thunder Blade
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a lightning bolt sword with electric aura,
+blue-white electric sparks radiating outward,
+game skill icon style (like JRPG),
+hexagonal badge frame with metallic border,
+electric blue and silver color palette,
+transparent background, 512x512px, no text
+```
+
+### B2 — Crystal Mind
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a floating crystal prism splitting light into rainbow,
+magical sparkles and stardust particles around it,
+elegant shoujo manga aesthetic, soft magical girl vibe,
+circular badge with ornate border,
+purple violet and iridescent color palette,
+transparent background, 512x512px, no text
+```
+
+### C1 — Shadow Phoenix
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a phoenix rising from dark flames, wings spread wide,
+feathers with glowing ember tips, dramatic shading,
+dark fantasy anime style, bold and powerful,
+shield-shaped badge with ornate dragon crest border,
+deep crimson and black with gold accent color palette,
+transparent background, 512x512px, no text
+```
+
+### C2 — Sovereign Dragon
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a majestic eastern dragon coiled around a glowing orb,
+iridescent scales, golden mane, ethereal energy wisps,
+epic JRPG boss aesthetic, regal and awe-inspiring,
+octagonal badge with ancient rune border,
+deep royal purple gold and midnight blue color palette,
+transparent background, 512x512px, no text
+```
+
+---
+
+## Bộ Badge Thành Tích (Achievement)
+
+### First Word — Khởi Đầu
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a chibi anime character holding a glowing speech bubble with a star inside,
+confetti and sparkles exploding around them, joyful expression,
+soft pastel circle badge with dotted border,
+sky blue and yellow color palette,
+transparent background, 512x512px, no text
+```
+
+### 7-Day Streak — Lửa Kiên Trì
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+seven small flames arranged in a crown/arc formation,
+each flame slightly larger than the last, flame on the right is biggest and brightest,
+calendar grid pattern subtly in background,
+fiery badge shape with flame-tipped border,
+deep orange red and warm yellow color palette,
+transparent background, 512x512px, no text
+```
+
+### 30-Day Streak — Chiến Binh Ngọn Lửa
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a samurai helmet (kabuto) engulfed in controlled blue flames,
+flames forming a perfect halo behind the helmet, intense determination aura,
+katana crossed below the helmet,
+angular warrior badge frame, dark blue with flame accent,
+midnight blue cobalt and electric blue color palette,
+transparent background, 512x512px, no text
+```
+
+### 100-Day Streak — Huyền Thoại Bất Diệt
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a majestic golden crown radiating blinding light,
+100 small stars orbiting the crown in two rings,
+dramatic god-rays / lens flare effect in 2D flat style,
+legendary item glow, ultimate rarity aesthetic,
+star-shaped badge with jeweled tips,
+brilliant gold white and celestial blue color palette,
+transparent background, 512x512px, no text
+```
+
+### First Lesson Complete
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a shining graduation scroll with anime star burst seal,
+gold ribbon tied around it, small owl mascot perched on top with wink,
+celebratory badge with streamers and stars around border,
+parchment gold and emerald green color palette,
+transparent background, 512x512px, no text
+```
+
+### Course Complete
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a glowing trophy cup with wings spreading from its sides, anime style,
+winner stars burst from the top, sparkle effects at cup's rim,
+laurel wreath integrated into badge border,
+epic victory aesthetic (anime tournament arc energy),
+gold and royal blue color palette,
+transparent background, 512x512px, no text
+```
+
+### Perfect Score
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a flawless diamond with an 'S' rank holographic overlay inside it,
+rainbow prismatic light rays emanating outward,
+game rank badge aesthetic (like fighting games),
+diamond-shaped badge frame with gem facet border,
+prismatic white silver and rainbow accent color palette,
+transparent background, 512x512px, no text
+```
+
+### Night Owl — Học Đêm
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a cute chibi owl wearing round glasses and holding a tiny glowing lantern,
+crescent moon and scattered stars in the background aura,
+sleepy but determined expression, soft night aesthetic,
+circular badge with moon-phase border motif,
+deep navy indigo and soft amber color palette,
+transparent background, 512x512px, no text
+```
+
+### Speed Runner — Tốc Chiến
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a lightning bolt with anime motion lines (speed streaks),
+a tiny stopwatch with wings flying alongside it,
+dynamic action pose energy, like a game speed power-up icon,
+shield badge with jagged electric border,
+electric yellow and deep blue color palette,
+transparent background, 512x512px, no text
+```
+
+### Social Butterfly — Nhà Giao Tiếp
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+two cartoon speech bubbles overlapping with hearts and stars inside,
+tiny anime characters peeking from each bubble,
+soft romantic comedy manga style, warm and friendly,
+rounded badge with heart-dotted border,
+rose pink coral and soft purple color palette,
+transparent background, 512x512px, no text
+```
+
+---
+
+## Bộ Badge Kỹ Năng (Skill Mastery)
+
+### Vocabulary Master
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+an open spellbook glowing with magical floating letters and runes,
+letters swirling upward like spells being cast,
+wizard/mage RPG class aesthetic,
+oval badge with arcane rune border,
+deep purple and gold luminescent color palette,
+transparent background, 512x512px, no text
+```
+
+### Grammar Guardian
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a medieval knight shield with glowing grammar sentence structure lines carved into it,
+sword and pen crossed behind the shield,
+protector class RPG aesthetic, noble and disciplined,
+heraldic badge shape with chain border,
+steel blue silver and crimson color palette,
+transparent background, 512x512px, no text
+```
+
+### Listening Legend
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a pair of anime-style ears with sound wave rings pulsing outward,
+musical notes and frequency equalizer bars floating around,
+cute but cool character design energy, like a bard class icon,
+circular badge with sound wave border,
+teal and warm orange color palette,
+transparent background, 512x512px, no text
+```
+
+### Speaking Star
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a microphone wrapped in glowing star energy,
+sound waves forming a star burst pattern,
+anime idol / singer aesthetic, confident and bright,
+star-shaped badge with sparkle border,
+hot pink and gold color palette,
+transparent background, 512x512px, no text
+```
+
+### Reading Ronin
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a lone samurai silhouette sitting under a sakura tree reading a scroll,
+cherry blossom petals floating in the air, moonlit atmosphere,
+bushido discipline meets scholarly wisdom,
+circle badge with torii gate border motif,
+deep red and ink black with cherry blossom pink accent,
+transparent background, 512x512px, no text
+```
+
+---
+
+## Bộ Badge Đặc Biệt (Seasonal / Event)
+
+### Tết Badge — Năm Mới
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a golden dragon coiled around a red lantern with golden tassels,
+lucky cloud motifs (xiangyun) surrounding it, festive firecrackers,
+traditional Asian new year meets anime art style,
+octagonal badge with red lacquer border,
+vermillion red gold and lucky green color palette,
+transparent background, 512x512px, no text
+```
+
+### Summer Special
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a glowing sun wearing anime sunglasses, surfboard leaning against it,
+tropical hibiscus flowers and sparkle stars floating around,
+beach festival summer anime vibe (natsu matsuri),
+wavy circular badge with splash border,
+bright coral turquoise and sunny yellow color palette,
+transparent background, 512x512px, no text
+```
+
+### Halloween Event
+```
+2D game badge, anime art style, flat vector illustration, cel-shading,
+a cute chibi witch girl (anime style) casting a spell over a glowing pumpkin,
+bats and crescent moon in background aura, magical sparkles,
+spooky but kawaii balance, gothic lolita fashion touch,
+rounded badge with bat-wing border accents,
+dark purple orange and moonlight white color palette,
+transparent background, 512x512px, no text
+```
+
+---
+
+## Rarity Tier — Khung Viền Theo Độ Hiếm
+
+Thêm vào prompt để phân biệt độ hiếm của badge:
+
+| Rarity | Thêm vào prompt |
+|--------|----------------|
+| Common ⚪ | `simple thin white border, matte finish` |
+| Uncommon 🟢 | `green glowing border, slight metallic sheen` |
+| Rare 🔵 | `blue crystalline border, inner blue glow pulse` |
+| Epic 🟣 | `purple energy border with electric arcs, radiant inner light` |
+| Legendary 🟡 | `ornate gold filigree border, blinding golden aura, crown motif top` |
+| Mythic 🌈 | `rainbow holographic shifting border, prismatic light explosion, divine rays` |
+
+---
+
+## Tips Khi Dùng Với AI Image Generator
+
+### Midjourney
+```
+[base prompt] --ar 1:1 --style anime --v 6.1 --no text, watermark, background
+```
+
+### Stable Diffusion (SDXL)
+- Model gợi ý: `animagine-xl-3.1`, `pony-diffusion-v6-xl`
+- Negative prompt:
+```
+text, watermark, blurry, 3d render, photorealistic, dark background,
+low quality, deformed, ugly, extra limbs, nsfw
+```
+
+### DALL-E 3
+- Thêm vào đầu: `"Create a 2D game badge icon in anime art style:"`
+- Thêm vào cuối: `"Make it look like a collectible game achievement badge with transparent background."`
+
+### Adobe Firefly
+- Dùng Content Type: **Graphic**
+- Style: **Anime**, **Vector look**, **Vibrant**
+- Thêm reference image: icon game RPG/JRPG làm style anchor
+
+---
+
+## Quy Ước Màu Sắc LexiLingo
+
+Khi muốn badge match với brand color của app:
+
+```
+use LexiLingo brand colors: primary indigo #6366F1,
+accent amber #F59E0B, success emerald #10B981,
+danger rose #F43F5E, as the dominant badge palette
+```
+
+---
+
+## Workflow Đề Xuất
+
+1. Chọn **Base Style DNA** → dán vào đầu
+2. Chọn prompt badge muốn tạo
+3. Thêm **Rarity Tier** phù hợp
+4. Thêm flag model-specific ở cuối
+5. Generate → upscale → export PNG với nền trong suốt
+6. Đặt tên file: `badge_{category}_{name}_{rarity}.png`
+ - Ví dụ: `badge_cefr_a1_common.png`, `badge_streak_30day_rare.png`
diff --git a/docs/dev_status_report_2026-06-13.md b/docs/dev_status_report_2026-06-13.md
deleted file mode 100644
index 8b1c0490..00000000
--- a/docs/dev_status_report_2026-06-13.md
+++ /dev/null
@@ -1,170 +0,0 @@
-# LexiLingo — Báo Cáo Tổng Hợp Phát Triển
-**Ngày:** 2026-06-13
-**Branch:** `dev`
-**Người tổng hợp:** Auto-generated từ logs & git history
-
----
-
-## 1. Tổng Quan Hệ Thống
-
-| Service | Trạng thái | Ghi chú |
-|---------|-----------|---------|
-| Backend (FastAPI) | ⚠️ Lỗi khởi động | `DEBUG=True` khi `APP_ENV=production` |
-| AI Service (FastAPI) | ⚠️ Lỗi khởi động | `DEBUG=True` khi `ENVIRONMENT=production` |
-| Admin Dashboard (Vite) | ✅ Chạy bình thường | Port 5176 (2026-06-02) |
-| PostgreSQL (Docker) | ✅ Đã fix | Docker socket issue đã được giải quyết |
-| Redis | ✅ OK | Kéo image thành công |
-| MongoDB | ✅ OK | Kéo image thành công |
-
----
-
-## 2. Lỗi Tìm Thấy Trong Logs
-
-### 2.1 Backend Service — `logs/backend.log`
-
-**Lỗi:** `pydantic_core.ValidationError` khi khởi động
-
-```
-Value error, DEBUG must be false when APP_ENV=production
-```
-
-**Nguyên nhân:** File `.env` đặt `DEBUG=True` nhưng `APP_ENV=production`.
-**Fix:** Đặt `DEBUG=False` trong `.env` khi chạy production, hoặc dùng `.env.development` cho môi trường local.
-
-### 2.2 AI Service — `logs/ai-service.log`
-
-**Lỗi:** Tương tự backend service
-
-```
-Value error, DEBUG must be false when ENVIRONMENT=production
-```
-
-**Fix:** Tương tự — kiểm tra `ENVIRONMENT` và `DEBUG` trong `.env` của ai-service.
-
-### 2.3 PostgreSQL — `logs/postgres.log`
-
-**Lỗi ban đầu:** Docker daemon không chạy (socket `/Users/nguyenhuuthang/.docker/run/docker.sock` không tồn tại).
-**Trạng thái cuối:** Container `lexilingo-postgres` đã khởi động thành công sau khi Docker daemon được kích hoạt.
-
-### 2.4 Admin Dashboard — `logs/admin.log`
-
-**Trạng thái:** ✅ Khởi động thành công lúc 17:25 ngày 2026-06-02, port 5176, VITE v8.0.15.
-
----
-
-## 3. TRACE-CAG Benchmark — Tóm Tắt Kết Quả
-
-> **Chi tiết đầy đủ:** xem [tracecag_benchmark_report_2026-06-03.md](../ai-service/docs/tracecag_benchmark_report_2026-06-03.md)
-
-### 3.1 Tổng quan 3 Lần Chạy
-
-| Run | Ngày | Model | n | Datasets | Trạng thái |
-|-----|------|-------|---|----------|-----------|
-| Run 1 | 2026-05-30 | llama-3.1-8b-instant | 20 | 3 datasets | ⚠️ Quota exhaustion (hotpotqa, musique) |
-| Run 2 | 2026-05-31 | llama-3.1-8b-instant | 20 | 3 datasets | ✅ Đầy đủ |
-| Run 3 | 2026-06-03 | **llama-3.3-70b-versatile** | 5 | hotpotqa | ✅ Validation (n nhỏ) |
-
-### 3.2 Kết Quả Tốt Nhất (Run 2, trung bình 3 datasets)
-
-| Mode | EM avg | F1 avg | MRR@5 avg | Cache hit | Latency avg |
-|------|--------|--------|-----------|-----------|-------------|
-| cag_vanilla | 15.0% | 23.8% | 72.1% | 34.2% | 1960ms |
-| hipporag_proxy | 18.3% | 27.1% | 69.7% | 0% | 3053ms |
-| **tracecag_rapid** | **18.3%** | **27.3%** | **72.6%** | 30.8% | **2086ms** |
-
-**Kết luận:** TRACE-CAG dẫn đầu MRR@5, cạnh tranh EM/F1 với hipporag, nhanh hơn 32%, có cache.
-
-### 3.3 Thành Tựu Nổi Bật (Run 3)
-
-- **Warm hit rate = 100%** sau khi fix threshold cache (was 65-85%)
-- **P50 cached = 17–29ms** vs cold ~3200ms → speedup **~110–190x**
-- **KG enrichment:** 4,280 → 5,173 concepts (+929 Wikipedia entities từ HotpotQA/2Wiki)
-- Model **llama-3.3-70b-versatile** xác nhận hoạt động
-
-### 3.4 Việc Cần Làm Tiếp
-
-| Priority | Action |
-|----------|--------|
-| HIGH | n=20+ với 70b model, seed=42 (Run 3 n=5 quá nhỏ) |
-| HIGH | Chạy `query_clusters` benchmark để đo L1 cache (hiện 0%) |
-| MEDIUM | Test `tracecag_adaptive` profile |
-| MEDIUM | Expand KG seeding (tăng `--max-samples 256`) |
-
----
-
-## 4. Thay Đổi Code Trong Đợt Này
-
-### 4.1 AI Service — Xóa modules lỗi thời
-
-Các module sau đã bị xóa do refactor:
-
-| Module | Lý do xóa |
-|--------|-----------|
-| `api/routes/user.py` | Chuyển sang backend-service |
-| `api/routes/websocket_simple.py` | Thay bằng SSE |
-| `api/routes/websocket_stream.py` | Thay bằng SSE |
-| `api/services/dual_stream/` (toàn bộ) | Thay bằng SSE pipeline |
-| `api/services/dl_model_service.py` | Deprecated |
-| `api/services/fallback.py` | Hợp nhất vào model_gateway |
-| `api/services/llama_vietnamese_service.py` | Deprecated |
-| `api/services/qwen_engine.py` | Deprecated |
-| `api/services/report_service.py` | Deprecated |
-| `api/services/resource_manager.py` | Deprecated |
-| `api/services/smart_router.py` | Hợp nhất vào model_gateway |
-| `api/services/spaced_repetition_service.py` | Chuyển sang backend |
-
-### 4.2 Flutter Admin — Toàn bộ bị xóa
-
-`flutter-admin/` đã bị xóa hoàn toàn (đã chuyển sang Vite/React admin dashboard).
-
-### 4.3 TRACE-CAG — Modules mới
-
-| File | Mô tả |
-|------|-------|
-| `api/services/trace_cag/benchmark/` | Benchmark utilities (adaptive, qa_generation, quality, ranking) |
-| `api/services/trace_cag/cache_utils.py` | Cache utilities |
-| `api/services/trace_cag/env_helpers.py` | Environment helpers |
-| `api/services/trace_cag/kg_utils.py` | KG utilities |
-| `api/services/trace_cag/llm_client.py` | LLM client |
-| `api/services/trace_cag/provider_state.py` | Provider state management |
-| `tests/benchmark/` | Benchmark test suite |
-| `tests/trace_cag/test_cache_gate_benchmark_metadata.py` | Cache gate metadata tests |
-
-### 4.4 Backend Service — Cập nhật routes
-
-Các routes đã được cập nhật: `ai_audit.py`, `learning.py`, `proficiency.py`, `progress.py`, `user_management.py`, `rbac.py`, `item_effects_service.py`.
-
----
-
-## 5. Git Cleanup — Tóm Tắt
-
-### Files đã được gitignore (mới thêm):
-
-```
-.claude/worktrees/ # Temporary Claude Code agent workspaces
-.claire/ # Local tool cache
-ai-service/.crawl4ai/ # Web crawler cache
-ai-service/data/kuzu_db # KuzuDB runtime database (generated)
-ai-service/data/kg_output/ # Generated KG artifacts (144MB)
-ai-service/data/sample_stories.expanded.json
-```
-
-### Files cần xóa (chờ xác nhận):
-
-- `ai-service/docs/tracecag_benchmark_report_2026-05-30.md` — superseded bởi report 2026-06-03
-- `ai-service/docs/tracecag_benchmark_report_2026-05-31.md` — superseded bởi report 2026-06-03
-- `.claude/worktrees/agent-a2e02f9977edbf924/` — old agent worktree (duplicate files)
-- `ai-service/.crawl4ai/` — crawler cache (gitignored, có thể xóa local)
-
----
-
-## 6. Vấn Đề Cấu Hình Cần Giải Quyết
-
-1. **DEBUG mode:** Đảm bảo `.env` của backend và ai-service không set `DEBUG=True` khi `APP_ENV/ENVIRONMENT=production`.
-2. **Docker daemon:** Cần bật Docker Desktop trước khi chạy `docker-compose`.
-3. **L1 cache benchmark:** Dataset `query_clusters` đã sẵn sàng — cần chạy benchmark để đo L1 hit rate thực tế.
-4. **Run 4 TRACE-CAG:** Cần chạy full benchmark n=20 với model 70b để xác nhận kết quả Run 3 (n=5).
-
----
-
-*Báo cáo được tổng hợp từ: `logs/` (backend.log, ai-service.log, admin.log, postgres.log, databases.log) và `ai-service/docs/tracecag_benchmark_report_*.md`. Ngày tổng hợp: 2026-06-13.*
diff --git a/docs/gateway/ENDPOINT_AUDIT_2026-03-19.md b/docs/gateway/ENDPOINT_AUDIT_2026-03-19.md
deleted file mode 100644
index f8cb4b87..00000000
--- a/docs/gateway/ENDPOINT_AUDIT_2026-03-19.md
+++ /dev/null
@@ -1,57 +0,0 @@
-# Endpoint Audit for API Gateway (2026-03-19)
-
-This file captures the currently configured service addresses in the repository
-for wiring API Gateway policy/routes in hybrid deployment.
-
-## Runtime topology in repo
-
-- Flutter Web: Vercel or local web dev
-- Admin Dashboard: Vercel (env in `vercel.json`)
-- Backend API: Render
-- AI Service: Cloudflare Tunnel (temporary URL)
-
-## Audited endpoint values
-
-### Backend
-- Production URL: `https://lexilingo-4gu6.onrender.com/api/v1`
-- Local URL: `http://localhost:8000/api/v1`
-- Sources:
- - `flutter-app/lib/core/utils/constants.dart`
- - `flutter-app/.env.example`
- - `admin-service/vercel.json`
-
-### AI Service
-- Production URL (current): `https://enable-tell-memphis-wing.trycloudflare.com/api/v1`
-- Local URL: `http://localhost:8001/api/v1`
-- Sources:
- - `flutter-app/lib/core/utils/constants.dart`
- - `flutter-app/.env.example`
- - `admin-service/vercel.json`
- - `backend-service/render.yaml`
-
-### Frontend origins (observed)
-- Flutter web local: `http://localhost:8080`
-- Admin local: `http://localhost:5173`
-- Flutter prod: `https://lexilingo.vercel.app`
-- Flutter alt prod: `https://flutter-app-nine-pied.vercel.app`
-- Sources:
- - `ai-service/api/main.py`
- - `backend-service/app/core/config.py`
-
-## Gateway config profiles added
-
-- Local-internal profile (Docker internal upstreams):
- - `gateway/kong/kong.yml`
- - `gateway/docker-compose.kong.yml`
-
-- Hybrid profile (Render + Cloudflare tunnel upstreams):
- - `gateway/kong/kong.hybrid.yml`
- - `gateway/docker-compose.kong.hybrid.yml`
- - `gateway/observability/prometheus.hybrid.yml`
-
-## Notes
-
-1. `trycloudflare.com` URL is temporary and changes after tunnel restart.
-2. For stable production, replace AI upstream with a permanent hostname
- (for example `https://ai.yourdomain.com`) and update `gateway/kong/kong.hybrid.yml`.
-3. Keep using one public API base URL from clients (gateway URL) to avoid split routing in apps.
diff --git a/docs/superpowers/plans/2026-06-01-fsrs-reminder-scheduler.md b/docs/superpowers/plans/2026-06-01-fsrs-reminder-scheduler.md
deleted file mode 100644
index a54c1091..00000000
--- a/docs/superpowers/plans/2026-06-01-fsrs-reminder-scheduler.md
+++ /dev/null
@@ -1,1908 +0,0 @@
-# FSRS Reminder Scheduler Implementation Plan
-
-> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking.
-
-**Goal:** Build a backend-managed FSRS reminder system that sends real in-app notifications, FCM push notifications, and occasional review reminder emails without breaking the existing vocabulary review or settings flows.
-
-**Architecture:** Keep the existing FSRS scheduling source of truth: `user_vocabulary.next_review_date`. Add reminder preferences and delivery audit tables, then run Celery worker plus Celery beat as separate processes. The FastAPI app remains request/response only; worker processes scan due preferences, count FSRS due vocabulary, dedupe deliveries, create `notifications` rows, send FCM, and send email only on the configured cadence.
-
-**Tech Stack:** FastAPI, SQLAlchemy async, Alembic, PostgreSQL, Redis, Celery, Firebase Admin SDK, SMTP email templates, Flutter Provider, FCM, local notification fallback, sqflite/shared_preferences.
-
----
-
-## Safety Strategy
-
-- Default all new backend reminder sending behind `REMINDERS_ENABLED=false`.
-- Create new tables instead of changing existing reminder behavior in place.
-- Keep existing Flutter `notificationEnabled` and `notificationTime` fields backward compatible.
-- Keep local daily notification as fallback only when backend sync or FCM is unavailable.
-- Make worker idempotent with a unique delivery `dedupe_key`.
-- Do not change the existing FSRS formula and review submission behavior in the same deployment. First add tests and an adapter boundary, then let reminder services read the already-computed `next_review_date`.
-- Deploy in this order: migrations, API fields disabled, Flutter settings sync, worker dry-run, worker live.
-
-## File Structure
-
-### Backend files to create
-
-- `backend-service/app/models/reminder.py`
- - SQLAlchemy models for user reminder preferences and reminder delivery audit.
-- `backend-service/app/schemas/reminders.py`
- - Pydantic request/response schemas for user-configurable reminder settings.
-- `backend-service/app/routes/reminders.py`
- - Authenticated preference endpoints.
-- `backend-service/app/schemas/notifications.py`
- - Pydantic schemas for persisted notifications.
-- `backend-service/app/routes/notifications.py`
- - Authenticated notification list/read endpoints.
-- `backend-service/app/services/fsrs_scheduler_service.py`
- - Thin adapter around due-count and due-date checks. Keeps scheduler independent from review submission internals.
-- `backend-service/app/services/reminder_service.py`
- - Business logic for preference evaluation, due count checks, dedupe, DB notification creation, and channel dispatch.
-- `backend-service/app/services/push_notification_service.py`
- - Firebase Admin FCM sender with graceful no-op when Firebase is not configured.
-- `backend-service/app/core/celery_app.py`
- - Celery app configuration using Redis broker/backend.
-- `backend-service/app/tasks/reminders.py`
- - Celery task entrypoint for scanning reminder preferences.
-- `backend-service/app/templates/vocabulary_review_reminder.html`
- - HTML email template.
-- `backend-service/app/templates/vocabulary_review_reminder.txt`
- - Plain text email template.
-- `backend-service/alembic/versions/add_fsrs_reminder_scheduler.py`
- - Migration for new tables and indexes.
-- `backend-service/tests/test_reminder_preferences_routes.py`
- - API tests for preference read/update.
-- `backend-service/tests/test_notifications_routes.py`
- - API tests for notification list and read status.
-- `backend-service/tests/test_fsrs_scheduler_service.py`
- - Deterministic due-count and next-check tests.
-- `backend-service/tests/test_reminder_service.py`
- - Channel/cadence/dedupe tests.
-- `backend-service/tests/test_push_notification_service.py`
- - Firebase sender tests with mocks.
-- `backend-service/tests/test_email_service_review_reminder.py`
- - Template rendering and SMTP no-op tests.
-
-### Backend files to modify
-
-- `backend-service/requirements.txt`
- - Add Celery.
-- `backend-service/.env.example`
- - Add reminder, Celery, and public app URL variables.
-- `backend-service/app/core/config.py`
- - Add typed settings for reminders, Celery, and review deep links.
-- `backend-service/app/models/__init__.py`
- - Import new reminder models for Alembic/create_all.
-- `backend-service/app/main.py`
- - Include reminders and notifications routers.
-- `backend-service/app/routes/devices.py`
- - Fix missing `select` and `and_` imports; keep route contract unchanged.
-- `backend-service/app/services/email_service.py`
- - Add review reminder email method using existing SMTP helper.
-- `backend-service/render.yaml`
- - Add worker and beat process definitions, disabled by env until configured.
-- `docker-compose.yml`
- - Add backend Celery worker and beat services for local full-stack testing.
-- `docker-compose.production.yml`
- - Add production worker and beat services.
-
-### Flutter files to create
-
-- `flutter-app/lib/features/user/data/datasources/settings_remote_data_source.dart`
- - Remote API bridge for reminder preferences.
-- `flutter-app/lib/core/services/app_navigation_service.dart`
- - Global navigator key and notification route handling.
-- `flutter-app/test/features/user/settings_model_test.dart`
- - Model parse/serialize tests for new reminder fields.
-- `flutter-app/test/features/user/settings_provider_reminder_test.dart`
- - Provider behavior tests for backend sync and local fallback.
-
-### Flutter files to modify
-
-- `flutter-app/lib/features/user/domain/entities/settings.dart`
- - Add reminder channel and email cadence fields with safe defaults.
-- `flutter-app/lib/features/user/data/models/settings_model.dart`
- - Parse both old local camelCase keys and new backend snake_case keys.
-- `flutter-app/lib/features/user/data/repositories/settings_repository_impl.dart`
- - Read/write local settings and sync reminder preferences to backend.
-- `flutter-app/lib/features/user/domain/repositories/settings_repository.dart`
- - Add reminder update method if needed.
-- `flutter-app/lib/features/user/data/datasources/settings_local_data_source.dart`
- - Store new fields in sqflite.
-- `flutter-app/lib/features/user/data/datasources/settings_local_data_source_web.dart`
- - Store new fields in shared_preferences.
-- `flutter-app/lib/core/services/database_helper.dart`
- - Bump DB version and add nullable/defaulted settings columns.
-- `flutter-app/lib/features/user/di/user_di.dart`
- - Inject `ApiClient` into settings repository.
-- `flutter-app/lib/features/user/presentation/providers/settings_provider.dart`
- - Add update methods for push/email/cadence/min due count and backend sync.
-- `flutter-app/lib/features/user/presentation/pages/settings_page.dart`
- - Extend notification card with review reminder controls.
-- `flutter-app/lib/core/services/firebase_messaging_service.dart`
- - Fix device registration path from `/api/devices` to `/devices`; handle `vocabulary_review_reminder`.
-- `flutter-app/lib/features/notifications/domain/entities/notification_entity.dart`
- - Add `vocabularyReviewReminder` type.
-- `flutter-app/lib/features/notifications/presentation/pages/notifications_page.dart`
- - Navigate to review screen when tapping reminder notification.
-- `flutter-app/lib/features/vocabulary/presentation/widgets/daily_review_card.dart`
- - Extract reusable navigation helper or add named route support.
-- `flutter-app/lib/main.dart`
- - Add `navigatorKey` and `/vocabulary/review` named route.
-- `flutter-app/assets/i18n/en.json`
-- `flutter-app/assets/i18n/vi.json`
-- `flutter-app/assets/i18n/ja.json`
-- `flutter-app/assets/i18n/ko.json`
-- `flutter-app/assets/i18n/zh.json`
-- `flutter-app/assets/i18n/fr.json`
-- `flutter-app/assets/i18n/es.json`
- - Add labels for push/email reminders and review notification copy.
-- `flutter-app/test/core/services/firebase_messaging_service_test.dart`
- - Update expected device registration path and reminder payload routing.
-- `flutter-app/test/features/notifications/notification_entity_test.dart`
- - Add type parsing coverage.
-
----
-
-## Chunk 1: Backend Data Model And Config
-
-### Task 1: Add reminder tables
-
-**Files:**
-- Create: `backend-service/app/models/reminder.py`
-- Modify: `backend-service/app/models/__init__.py`
-- Create: `backend-service/alembic/versions/add_fsrs_reminder_scheduler.py`
-- Test: `backend-service/tests/test_reminder_preferences_routes.py`
-
-- [ ] **Step 1: Write failing model import test**
-
-Add a small assertion inside `backend-service/tests/test_reminder_preferences_routes.py`:
-
-```python
-from app.models.reminder import ReminderDelivery, UserReminderPreference
-
-
-def test_reminder_models_have_expected_tables():
- assert UserReminderPreference.__tablename__ == "user_reminder_preferences"
- assert ReminderDelivery.__tablename__ == "reminder_deliveries"
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_preferences_routes.py::test_reminder_models_have_expected_tables -q
-```
-
-Expected: FAIL with `ModuleNotFoundError: No module named 'app.models.reminder'`.
-
-- [ ] **Step 3: Create `backend-service/app/models/reminder.py`**
-
-Implement:
-
-```python
-import uuid
-from datetime import datetime, timezone
-from sqlalchemy import Boolean, ForeignKey, Index, Integer, String, UniqueConstraint
-from sqlalchemy.orm import Mapped, mapped_column
-
-from app.core.database import Base
-from app.core.db_types import GUID, TZDateTime, PortableJSON
-
-
-class UserReminderPreference(Base):
- __tablename__ = "user_reminder_preferences"
-
- user_id: Mapped[uuid.UUID] = mapped_column(
- GUID(),
- ForeignKey("users.id", ondelete="CASCADE"),
- primary_key=True,
- )
- enabled: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
- push_enabled: Mapped[bool] = mapped_column(Boolean, default=True, nullable=False)
- email_enabled: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
- reminder_time_local: Mapped[str] = mapped_column(String(5), default="09:00", nullable=False)
- timezone: Mapped[str] = mapped_column(String(64), default="Asia/Ho_Chi_Minh", nullable=False)
- min_due_count: Mapped[int] = mapped_column(Integer, default=1, nullable=False)
- email_cadence_days: Mapped[int] = mapped_column(Integer, default=7, nullable=False)
- next_check_at: Mapped[datetime] = mapped_column(TZDateTime, default=lambda: datetime.now(timezone.utc), nullable=False, index=True)
- last_push_sent_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
- last_email_sent_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
- created_at: Mapped[datetime] = mapped_column(TZDateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
- updated_at: Mapped[datetime] = mapped_column(TZDateTime, default=lambda: datetime.now(timezone.utc), onupdate=lambda: datetime.now(timezone.utc), nullable=False)
-
- __table_args__ = (
- Index("ix_user_reminder_preferences_enabled_next", "enabled", "next_check_at"),
- )
-
-
-class ReminderDelivery(Base):
- __tablename__ = "reminder_deliveries"
-
- id: Mapped[uuid.UUID] = mapped_column(GUID(), primary_key=True, default=uuid.uuid4)
- user_id: Mapped[uuid.UUID] = mapped_column(GUID(), ForeignKey("users.id", ondelete="CASCADE"), nullable=False, index=True)
- channel: Mapped[str] = mapped_column(String(20), nullable=False)
- reminder_type: Mapped[str] = mapped_column(String(50), default="vocabulary_review", nullable=False)
- status: Mapped[str] = mapped_column(String(20), default="queued", nullable=False)
- due_count: Mapped[int] = mapped_column(Integer, default=0, nullable=False)
- dedupe_key: Mapped[str] = mapped_column(String(180), nullable=False)
- scheduled_for: Mapped[datetime] = mapped_column(TZDateTime, nullable=False, index=True)
- sent_at: Mapped[datetime | None] = mapped_column(TZDateTime, nullable=True)
- error: Mapped[str | None] = mapped_column(String(1000), nullable=True)
- data: Mapped[dict | None] = mapped_column(PortableJSON, nullable=True)
- created_at: Mapped[datetime] = mapped_column(TZDateTime, default=lambda: datetime.now(timezone.utc), nullable=False)
-
- __table_args__ = (
- UniqueConstraint("dedupe_key", name="uq_reminder_deliveries_dedupe_key"),
- Index("ix_reminder_delivery_user_channel_created", "user_id", "channel", "created_at"),
- )
-```
-
-- [ ] **Step 4: Import models in `backend-service/app/models/__init__.py`**
-
-Add:
-
-```python
-from app.models.reminder import UserReminderPreference, ReminderDelivery
-```
-
-Add names to `__all__`.
-
-- [ ] **Step 5: Create Alembic migration**
-
-Create `backend-service/alembic/versions/add_fsrs_reminder_scheduler.py` with only additive DDL:
-
-```python
-"""add fsrs reminder scheduler
-
-Revision ID: add_fsrs_reminder_scheduler
-Revises: 49b1f1d9b26c
-Create Date: 2026-06-01
-"""
-
-from alembic import op
-import sqlalchemy as sa
-from app.core.db_types import GUID, TZDateTime, PortableJSON
-
-revision = "add_fsrs_reminder_scheduler"
-down_revision = "fix_badge_cdn_urls"
-branch_labels = None
-depends_on = None
-
-
-def upgrade() -> None:
- op.create_table(
- "user_reminder_preferences",
- sa.Column("user_id", GUID(), sa.ForeignKey("users.id", ondelete="CASCADE"), primary_key=True),
- sa.Column("enabled", sa.Boolean(), nullable=False, server_default=sa.text("false")),
- sa.Column("push_enabled", sa.Boolean(), nullable=False, server_default=sa.text("true")),
- sa.Column("email_enabled", sa.Boolean(), nullable=False, server_default=sa.text("false")),
- sa.Column("reminder_time_local", sa.String(length=5), nullable=False, server_default="09:00"),
- sa.Column("timezone", sa.String(length=64), nullable=False, server_default="Asia/Ho_Chi_Minh"),
- sa.Column("min_due_count", sa.Integer(), nullable=False, server_default="1"),
- sa.Column("email_cadence_days", sa.Integer(), nullable=False, server_default="7"),
- sa.Column("next_check_at", TZDateTime(), nullable=False, server_default=sa.func.now()),
- sa.Column("last_push_sent_at", TZDateTime(), nullable=True),
- sa.Column("last_email_sent_at", TZDateTime(), nullable=True),
- sa.Column("created_at", TZDateTime(), nullable=False, server_default=sa.func.now()),
- sa.Column("updated_at", TZDateTime(), nullable=False, server_default=sa.func.now()),
- )
- op.create_index("ix_user_reminder_preferences_enabled_next", "user_reminder_preferences", ["enabled", "next_check_at"])
- op.create_index("ix_user_reminder_preferences_next_check_at", "user_reminder_preferences", ["next_check_at"])
-
- op.create_table(
- "reminder_deliveries",
- sa.Column("id", GUID(), primary_key=True),
- sa.Column("user_id", GUID(), sa.ForeignKey("users.id", ondelete="CASCADE"), nullable=False),
- sa.Column("channel", sa.String(length=20), nullable=False),
- sa.Column("reminder_type", sa.String(length=50), nullable=False, server_default="vocabulary_review"),
- sa.Column("status", sa.String(length=20), nullable=False, server_default="queued"),
- sa.Column("due_count", sa.Integer(), nullable=False, server_default="0"),
- sa.Column("dedupe_key", sa.String(length=180), nullable=False),
- sa.Column("scheduled_for", TZDateTime(), nullable=False),
- sa.Column("sent_at", TZDateTime(), nullable=True),
- sa.Column("error", sa.String(length=1000), nullable=True),
- sa.Column("data", PortableJSON(), nullable=True),
- sa.Column("created_at", TZDateTime(), nullable=False, server_default=sa.func.now()),
- sa.UniqueConstraint("dedupe_key", name="uq_reminder_deliveries_dedupe_key"),
- )
- op.create_index("ix_reminder_deliveries_user_id", "reminder_deliveries", ["user_id"])
- op.create_index("ix_reminder_deliveries_scheduled_for", "reminder_deliveries", ["scheduled_for"])
- op.create_index("ix_reminder_delivery_user_channel_created", "reminder_deliveries", ["user_id", "channel", "created_at"])
-
-
-def downgrade() -> None:
- op.drop_index("ix_reminder_delivery_user_channel_created", table_name="reminder_deliveries")
- op.drop_index("ix_reminder_deliveries_scheduled_for", table_name="reminder_deliveries")
- op.drop_index("ix_reminder_deliveries_user_id", table_name="reminder_deliveries")
- op.drop_table("reminder_deliveries")
- op.drop_index("ix_user_reminder_preferences_next_check_at", table_name="user_reminder_preferences")
- op.drop_index("ix_user_reminder_preferences_enabled_next", table_name="user_reminder_preferences")
- op.drop_table("user_reminder_preferences")
-```
-
-Confirm `down_revision` against the actual current head before applying. At plan time, the latest observed head is `fix_badge_cdn_urls`.
-
-- [ ] **Step 6: Run model test**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_preferences_routes.py::test_reminder_models_have_expected_tables -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/app/models/reminder.py backend-service/app/models/__init__.py backend-service/alembic/versions/add_fsrs_reminder_scheduler.py backend-service/tests/test_reminder_preferences_routes.py
-git commit -m "feat: add reminder scheduler data model"
-```
-
-### Task 2: Add backend config and dependencies
-
-**Files:**
-- Modify: `backend-service/requirements.txt`
-- Modify: `backend-service/app/core/config.py`
-- Modify: `backend-service/.env.example`
-
-- [ ] **Step 1: Add failing config test**
-
-Add to `backend-service/tests/test_reminder_service.py`:
-
-```python
-def test_reminder_settings_have_safe_defaults():
- from app.core.config import settings
-
- assert settings.REMINDERS_ENABLED is False
- assert settings.REMINDER_DEFAULT_TIMEZONE == "Asia/Ho_Chi_Minh"
- assert settings.REMINDER_SCAN_BATCH_SIZE >= 1
-```
-
-- [ ] **Step 2: Run test to verify it fails**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py::test_reminder_settings_have_safe_defaults -q
-```
-
-Expected: FAIL because config fields do not exist.
-
-- [ ] **Step 3: Add Celery dependency**
-
-In `backend-service/requirements.txt` add:
-
-```txt
-# Background jobs
-celery[redis]>=5.4,<6
-```
-
-- [ ] **Step 4: Add settings in `backend-service/app/core/config.py`**
-
-Add near Redis/Firebase settings:
-
-```python
- # Reminder scheduler
- REMINDERS_ENABLED: bool = False
- REMINDER_DRY_RUN: bool = True
- REMINDER_SCAN_BATCH_SIZE: int = 250
- REMINDER_SCAN_INTERVAL_SECONDS: int = 300
- REMINDER_DEFAULT_TIMEZONE: str = "Asia/Ho_Chi_Minh"
- REMINDER_REVIEW_ROUTE: str = "/vocabulary/review"
- APP_PUBLIC_URL: str = "https://lexilingo.me"
-
- # Celery
- CELERY_BROKER_URL: str | None = None
- CELERY_RESULT_BACKEND: str | None = None
-
- @property
- def effective_celery_broker_url(self) -> str:
- return self.CELERY_BROKER_URL or self.REDIS_URL
-
- @property
- def effective_celery_result_backend(self) -> str:
- return self.CELERY_RESULT_BACKEND or self.REDIS_URL
-```
-
-- [ ] **Step 5: Document env vars in `.env.example`**
-
-Add:
-
-```env
-# Reminder scheduler
-REMINDERS_ENABLED=false
-REMINDER_DRY_RUN=true
-REMINDER_SCAN_BATCH_SIZE=250
-REMINDER_SCAN_INTERVAL_SECONDS=300
-REMINDER_DEFAULT_TIMEZONE=Asia/Ho_Chi_Minh
-REMINDER_REVIEW_ROUTE=/vocabulary/review
-APP_PUBLIC_URL=https://lexilingo.me
-
-# Celery. Defaults to REDIS_URL when empty.
-CELERY_BROKER_URL=
-CELERY_RESULT_BACKEND=
-```
-
-- [ ] **Step 6: Run config test**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py::test_reminder_settings_have_safe_defaults -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/requirements.txt backend-service/app/core/config.py backend-service/.env.example backend-service/tests/test_reminder_service.py
-git commit -m "feat: configure reminder scheduler defaults"
-```
-
----
-
-## Chunk 2: Backend Preference And Notification APIs
-
-### Task 3: Add reminder preference endpoints
-
-**Files:**
-- Create: `backend-service/app/schemas/reminders.py`
-- Create: `backend-service/app/routes/reminders.py`
-- Modify: `backend-service/app/main.py`
-- Test: `backend-service/tests/test_reminder_preferences_routes.py`
-
-- [ ] **Step 1: Write route tests first**
-
-Test cases:
-
-```python
-@pytest.mark.asyncio
-async def test_get_reminder_preferences_creates_default(async_client, auth_headers):
- response = await async_client.get("/api/v1/users/me/reminder-preferences", headers=auth_headers)
- assert response.status_code == 200
- data = response.json()["data"]
- assert data["enabled"] is False
- assert data["push_enabled"] is True
- assert data["email_enabled"] is False
- assert data["reminder_time_local"] == "09:00"
-
-
-@pytest.mark.asyncio
-async def test_update_reminder_preferences(async_client, auth_headers):
- payload = {
- "enabled": True,
- "push_enabled": True,
- "email_enabled": True,
- "reminder_time_local": "20:30",
- "timezone": "Asia/Ho_Chi_Minh",
- "min_due_count": 3,
- "email_cadence_days": 7,
- }
- response = await async_client.put(
- "/api/v1/users/me/reminder-preferences",
- headers=auth_headers,
- json=payload,
- )
- assert response.status_code == 200
- assert response.json()["data"]["reminder_time_local"] == "20:30"
-```
-
-- [ ] **Step 2: Run tests to verify they fail**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_preferences_routes.py -q
-```
-
-Expected: FAIL with 404 for new route.
-
-- [ ] **Step 3: Create schemas**
-
-In `backend-service/app/schemas/reminders.py`:
-
-```python
-from datetime import datetime
-from pydantic import BaseModel, Field, field_validator
-
-
-class ReminderPreferenceResponse(BaseModel):
- enabled: bool
- push_enabled: bool
- email_enabled: bool
- reminder_time_local: str
- timezone: str
- min_due_count: int
- email_cadence_days: int
- next_check_at: datetime
- last_push_sent_at: datetime | None = None
- last_email_sent_at: datetime | None = None
-
- class Config:
- from_attributes = True
-
-
-class ReminderPreferenceUpdate(BaseModel):
- enabled: bool | None = None
- push_enabled: bool | None = None
- email_enabled: bool | None = None
- reminder_time_local: str | None = Field(default=None, pattern=r"^\d{2}:\d{2}$")
- timezone: str | None = Field(default=None, min_length=1, max_length=64)
- min_due_count: int | None = Field(default=None, ge=1, le=1000)
- email_cadence_days: int | None = Field(default=None, ge=1, le=30)
-
- @field_validator("reminder_time_local")
- @classmethod
- def validate_time(cls, value: str | None) -> str | None:
- if value is None:
- return value
- hour, minute = [int(part) for part in value.split(":")]
- if hour > 23 or minute > 59:
- raise ValueError("reminder_time_local must be HH:MM")
- return value
-```
-
-- [ ] **Step 4: Create route**
-
-In `backend-service/app/routes/reminders.py`, implement:
-
-```python
-from datetime import datetime, timezone, timedelta
-from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
-from fastapi import APIRouter, Depends
-from sqlalchemy import select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.core.config import settings
-from app.core.database import get_db
-from app.core.dependencies import get_current_user
-from app.models.reminder import UserReminderPreference
-from app.models.user import User
-from app.schemas.common import ApiResponse
-from app.schemas.reminders import ReminderPreferenceResponse, ReminderPreferenceUpdate
-
-router = APIRouter(prefix="/users/me/reminder-preferences", tags=["Reminder Preferences"])
-
-
-def compute_next_check_at(time_text: str, timezone_name: str, now_utc: datetime | None = None) -> datetime:
- now_utc = now_utc or datetime.now(timezone.utc)
- try:
- tz = ZoneInfo(timezone_name)
- except ZoneInfoNotFoundError:
- tz = ZoneInfo(settings.REMINDER_DEFAULT_TIMEZONE)
- hour, minute = [int(part) for part in time_text.split(":")]
- local_now = now_utc.astimezone(tz)
- candidate = local_now.replace(hour=hour, minute=minute, second=0, microsecond=0)
- if candidate <= local_now:
- candidate = candidate + timedelta(days=1)
- return candidate.astimezone(timezone.utc)
-```
-
-Then add GET and PUT endpoints that create a default row when missing and recalculate `next_check_at` on update.
-
-- [ ] **Step 5: Include router in `backend-service/app/main.py`**
-
-Add import:
-
-```python
-from app.routes.reminders import router as reminders_router
-```
-
-Add include:
-
-```python
-app.include_router(reminders_router, prefix=f"{settings.API_V1_PREFIX}", tags=["Reminder Preferences"])
-```
-
-- [ ] **Step 6: Run route tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_preferences_routes.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/app/schemas/reminders.py backend-service/app/routes/reminders.py backend-service/app/main.py backend-service/tests/test_reminder_preferences_routes.py
-git commit -m "feat: add reminder preference endpoints"
-```
-
-### Task 4: Add persisted notification endpoints
-
-**Files:**
-- Create: `backend-service/app/schemas/notifications.py`
-- Create: `backend-service/app/routes/notifications.py`
-- Modify: `backend-service/app/main.py`
-- Test: `backend-service/tests/test_notifications_routes.py`
-
-- [ ] **Step 1: Write tests**
-
-Cover:
-
-- unauthenticated requests return 401
-- `GET /api/v1/notifications` returns only current user notifications
-- `PATCH /api/v1/notifications/{notification_id}/read` marks one notification read
-- `PATCH /api/v1/notifications/read-all` marks all current user notifications read
-
-- [ ] **Step 2: Run tests to verify they fail**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_notifications_routes.py -q
-```
-
-Expected: FAIL with 404.
-
-- [ ] **Step 3: Implement schemas**
-
-Use:
-
-```python
-from datetime import datetime
-from uuid import UUID
-from pydantic import BaseModel
-
-
-class NotificationResponse(BaseModel):
- id: UUID
- title: str
- body: str
- type: str
- data: dict | None = None
- is_read: bool
- read_at: datetime | None = None
- created_at: datetime
-
- class Config:
- from_attributes = True
-```
-
-- [ ] **Step 4: Implement routes**
-
-Use existing `Notification` model. Return `app.schemas.common.ApiResponse` for list and single-object responses.
-
-- [ ] **Step 5: Include router in `backend-service/app/main.py`**
-
-Add:
-
-```python
-from app.routes.notifications import router as notifications_router
-app.include_router(notifications_router, prefix=f"{settings.API_V1_PREFIX}/notifications", tags=["Notifications"])
-```
-
-- [ ] **Step 6: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_notifications_routes.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/app/schemas/notifications.py backend-service/app/routes/notifications.py backend-service/app/main.py backend-service/tests/test_notifications_routes.py
-git commit -m "feat: expose persisted notifications"
-```
-
----
-
-## Chunk 3: FSRS Reminder Service
-
-### Task 5: Add FSRS scheduler adapter
-
-**Files:**
-- Create: `backend-service/app/services/fsrs_scheduler_service.py`
-- Test: `backend-service/tests/test_fsrs_scheduler_service.py`
-
-- [ ] **Step 1: Write deterministic tests**
-
-Cover:
-
-- due count uses `UserVocabulary.next_review_date <= now`
-- archived vocabulary is ignored
-- no due vocabulary returns zero
-- service accepts injected `now` so tests are stable
-
-- [ ] **Step 2: Run tests to verify they fail**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_fsrs_scheduler_service.py -q
-```
-
-Expected: FAIL because service does not exist.
-
-- [ ] **Step 3: Implement service**
-
-Create a small adapter, not a new algorithm:
-
-```python
-from datetime import datetime, timezone
-from uuid import UUID
-from sqlalchemy import and_, func, select
-from sqlalchemy.ext.asyncio import AsyncSession
-
-from app.models.vocabulary import UserVocabulary, VocabularyStatus
-
-
-class FSRSSchedulerService:
- async def count_due_vocabulary(
- self,
- db: AsyncSession,
- *,
- user_id: UUID,
- now: datetime | None = None,
- ) -> int:
- now = now or datetime.now(timezone.utc)
- result = await db.execute(
- select(func.count()).select_from(UserVocabulary).where(
- and_(
- UserVocabulary.user_id == user_id,
- UserVocabulary.next_review_date <= now,
- UserVocabulary.status != VocabularyStatus.ARCHIVED,
- )
- )
- )
- return int(result.scalar() or 0)
-```
-
-- [ ] **Step 4: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_fsrs_scheduler_service.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 5: Commit**
-
-```bash
-git add backend-service/app/services/fsrs_scheduler_service.py backend-service/tests/test_fsrs_scheduler_service.py
-git commit -m "test: guard fsrs due scheduling contract"
-```
-
-### Task 6: Add push notification service
-
-**Files:**
-- Create: `backend-service/app/services/push_notification_service.py`
-- Modify: `backend-service/app/routes/devices.py`
-- Test: `backend-service/tests/test_push_notification_service.py`
-- Test: `backend-service/tests/test_devices_routes.py`
-
-- [ ] **Step 1: Write tests**
-
-Cover:
-
-- service returns `False` when Firebase credentials are absent
-- service calls `firebase_admin.messaging.send_each_for_multicast` or equivalent when tokens exist
-- invalid tokens do not crash reminder job
-- `app/routes/devices.py` imports `select` and `and_`
-
-- [ ] **Step 2: Run tests to verify current gaps**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_push_notification_service.py tests/test_devices_routes.py -q
-```
-
-Expected: FAIL for missing push service or device route import bugs.
-
-- [ ] **Step 3: Fix device route imports**
-
-In `backend-service/app/routes/devices.py`, add:
-
-```python
-from sqlalchemy import and_, select
-```
-
-- [ ] **Step 4: Implement `PushNotificationService`**
-
-Use `app.core.firebase_auth._init_firebase_app()` so Firebase is initialized consistently. Send payload:
-
-```python
-{
- "type": "vocabulary_review_reminder",
- "route": settings.REMINDER_REVIEW_ROUTE,
- "due_count": str(due_count),
-}
-```
-
-The method should be:
-
-```python
-async def send_review_reminder(
- self,
- *,
- tokens: list[str],
- due_count: int,
- title: str,
- body: str,
- data: dict[str, str],
-) -> bool:
- ...
-```
-
-Return `False` when tokens are empty or Firebase is not configured. Log exceptions; do not raise to Celery unless the code bug is unrecoverable.
-
-- [ ] **Step 5: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_push_notification_service.py tests/test_devices_routes.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add backend-service/app/services/push_notification_service.py backend-service/app/routes/devices.py backend-service/tests/test_push_notification_service.py backend-service/tests/test_devices_routes.py
-git commit -m "feat: add fcm review reminder sender"
-```
-
-### Task 7: Add review reminder email method and templates
-
-**Files:**
-- Modify: `backend-service/app/services/email_service.py`
-- Create: `backend-service/app/templates/vocabulary_review_reminder.html`
-- Create: `backend-service/app/templates/vocabulary_review_reminder.txt`
-- Test: `backend-service/tests/test_email_service_review_reminder.py`
-
-- [ ] **Step 1: Write tests**
-
-Cover:
-
-- template renders display name, due count, and review link
-- missing SMTP returns `False` and logs no-send like existing reset/verification methods
-- configured SMTP builds `EmailMessage` with subject `LexiLingo - Time to review your vocabulary`
-
-- [ ] **Step 2: Run tests to verify failure**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_email_service_review_reminder.py -q
-```
-
-Expected: FAIL because method/templates do not exist.
-
-- [ ] **Step 3: Add templates**
-
-Text template must include:
-
-```txt
-Hi {display_name},
-
-You have {due_count} vocabulary review(s) waiting in LexiLingo.
-
-Review now: {review_link}
-
-You can change reminder settings here: {settings_link}
-```
-
-HTML template should match the existing email style but remain simple.
-
-- [ ] **Step 4: Add email method**
-
-In `EmailService` add:
-
-```python
-@classmethod
-async def send_vocabulary_review_reminder_email(
- cls,
- *,
- to_email: str,
- display_name: str | None,
- due_count: int,
-) -> bool:
- review_link = f"{settings.APP_PUBLIC_URL.rstrip('/')}{settings.REMINDER_REVIEW_ROUTE}"
- settings_link = f"{settings.APP_PUBLIC_URL.rstrip('/')}/settings"
- ...
-```
-
-Use `_render_template` and `_send_message_blocking` exactly like existing methods.
-
-- [ ] **Step 5: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_email_service_review_reminder.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add backend-service/app/services/email_service.py backend-service/app/templates/vocabulary_review_reminder.html backend-service/app/templates/vocabulary_review_reminder.txt backend-service/tests/test_email_service_review_reminder.py
-git commit -m "feat: add vocabulary review reminder email"
-```
-
-### Task 8: Add reminder orchestration service
-
-**Files:**
-- Create: `backend-service/app/services/reminder_service.py`
-- Test: `backend-service/tests/test_reminder_service.py`
-
-- [ ] **Step 1: Write service tests**
-
-Cover:
-
-- disabled global `REMINDERS_ENABLED` returns zero sent
-- `REMINDER_DRY_RUN=true` creates skipped/dry-run delivery records but does not call FCM/SMTP
-- user preference disabled is skipped
-- due count below `min_due_count` is skipped
-- push delivery creates `Notification` and `ReminderDelivery`
-- email respects `email_cadence_days`
-- duplicate scan for same local date/channel does not send twice
-- `next_check_at` is moved to next local reminder time after evaluation
-
-- [ ] **Step 2: Run tests to verify failure**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py -q
-```
-
-Expected: FAIL because service does not exist.
-
-- [ ] **Step 3: Implement helper methods**
-
-In `ReminderService`, implement:
-
-- `build_dedupe_key(user_id, channel, local_date)`
-- `compute_next_check_at(reminder_time_local, timezone_name, now_utc)`
-- `scan_due_preferences(db, now_utc, limit)`
-- `process_preference(db, preference, now_utc)`
-
-- [ ] **Step 4: Implement DB notification creation**
-
-Notification rows should use:
-
-```python
-Notification(
- user_id=user.id,
- title="Vocabulary review is ready",
- body=f"You have {due_count} word{'s' if due_count != 1 else ''} to review.",
- type="vocabulary_review_reminder",
- data={
- "route": settings.REMINDER_REVIEW_ROUTE,
- "due_count": due_count,
- },
-)
-```
-
-- [ ] **Step 5: Implement channel dispatch**
-
-Push:
-
-- load active tokens from `UserDevice` where `user_id == preference.user_id` and `fcm_token is not null`
-- create delivery with `channel="push"`
-- call `PushNotificationService.send_review_reminder`
-- mark delivery `sent`, `skipped`, or `failed`
-
-Email:
-
-- only if `email_enabled`
-- only if user has email
-- only if `last_email_sent_at` is null or older than `email_cadence_days`
-- create delivery with `channel="email"`
-- call `EmailService.send_vocabulary_review_reminder_email`
-
-- [ ] **Step 6: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/app/services/reminder_service.py backend-service/tests/test_reminder_service.py
-git commit -m "feat: orchestrate fsrs reminders"
-```
-
----
-
-## Chunk 4: Celery Worker And Deployment
-
-### Task 9: Add Celery app and reminder task
-
-**Files:**
-- Create: `backend-service/app/core/celery_app.py`
-- Create: `backend-service/app/tasks/reminders.py`
-- Test: `backend-service/tests/test_reminder_service.py`
-
-- [ ] **Step 1: Write Celery config test**
-
-Add:
-
-```python
-def test_celery_has_reminder_schedule():
- from app.core.celery_app import celery_app
-
- assert "scan-fsrs-reminders" in celery_app.conf.beat_schedule
-```
-
-- [ ] **Step 2: Run test to verify failure**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py::test_celery_has_reminder_schedule -q
-```
-
-Expected: FAIL because Celery app does not exist.
-
-- [ ] **Step 3: Implement Celery app**
-
-`backend-service/app/core/celery_app.py`:
-
-```python
-from celery import Celery
-from app.core.config import settings
-
-celery_app = Celery(
- "lexilingo",
- broker=settings.effective_celery_broker_url,
- backend=settings.effective_celery_result_backend,
- include=["app.tasks.reminders"],
-)
-
-celery_app.conf.timezone = "UTC"
-celery_app.conf.enable_utc = True
-celery_app.conf.task_acks_late = True
-celery_app.conf.worker_prefetch_multiplier = 1
-celery_app.conf.beat_schedule = {
- "scan-fsrs-reminders": {
- "task": "app.tasks.reminders.scan_fsrs_reminders",
- "schedule": settings.REMINDER_SCAN_INTERVAL_SECONDS,
- }
-}
-```
-
-- [ ] **Step 4: Implement task**
-
-`backend-service/app/tasks/reminders.py`:
-
-```python
-import asyncio
-import logging
-from datetime import datetime, timezone
-
-from app.core.celery_app import celery_app
-from app.core.database import AsyncSessionLocal
-from app.services.reminder_service import ReminderService
-
-logger = logging.getLogger(__name__)
-
-
-@celery_app.task(name="app.tasks.reminders.scan_fsrs_reminders")
-def scan_fsrs_reminders() -> dict:
- return asyncio.run(_scan())
-
-
-async def _scan() -> dict:
- async with AsyncSessionLocal() as db:
- result = await ReminderService().scan_due_preferences(
- db,
- now_utc=datetime.now(timezone.utc),
- )
- await db.commit()
- return result
-```
-
-- [ ] **Step 5: Run tests**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m pytest tests/test_reminder_service.py::test_celery_has_reminder_schedule -q
-```
-
-Expected: PASS.
-
-- [ ] **Step 6: Smoke test Celery import**
-
-Run:
-
-```bash
-cd backend-service
-APP_ENV=testing DEBUG=false ../venv/bin/python -m celery -A app.core.celery_app inspect ping
-```
-
-Expected locally: may report no nodes if worker is not running, but import must not crash.
-
-- [ ] **Step 7: Commit**
-
-```bash
-git add backend-service/app/core/celery_app.py backend-service/app/tasks/reminders.py backend-service/tests/test_reminder_service.py
-git commit -m "feat: schedule fsrs reminder worker"
-```
-
-### Task 10: Add local and production process config
-
-**Files:**
-- Modify: `docker-compose.yml`
-- Modify: `docker-compose.production.yml`
-- Modify: `backend-service/render.yaml`
-
-- [ ] **Step 1: Add local worker service**
-
-In `docker-compose.yml`, add:
-
-```yaml
- backend-reminder-worker:
- build:
- context: ./backend-service
- dockerfile: Dockerfile
- command: celery -A app.core.celery_app worker --loglevel=INFO --concurrency=1
- environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
- REDIS_URL: redis://redis:6379/0
- SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
- REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
- REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
- depends_on:
- postgres:
- condition: service_healthy
- redis:
- condition: service_healthy
- networks:
- - lexilingo-network
-```
-
-- [ ] **Step 2: Add local beat service**
-
-In `docker-compose.yml`, add:
-
-```yaml
- backend-reminder-beat:
- build:
- context: ./backend-service
- dockerfile: Dockerfile
- command: celery -A app.core.celery_app beat --loglevel=INFO
- environment:
- DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-lexilingo}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-lexilingo}
- REDIS_URL: redis://redis:6379/0
- SECRET_KEY: ${SECRET_KEY:?Set SECRET_KEY in .env}
- REMINDERS_ENABLED: ${REMINDERS_ENABLED:-false}
- REMINDER_DRY_RUN: ${REMINDER_DRY_RUN:-true}
- depends_on:
- redis:
- condition: service_healthy
- networks:
- - lexilingo-network
-```
-
-- [ ] **Step 3: Add production compose services**
-
-Mirror worker and beat in `docker-compose.production.yml`, using `Dockerfile.prod`, `env_file`, and existing production Redis URL.
-
-- [ ] **Step 4: Add Render workers**
-
-In `backend-service/render.yaml`, add two worker services:
-
-```yaml
- - type: worker
- name: lexilingo-reminder-worker
- env: python
- region: singapore
- buildCommand: |
- pip install --upgrade pip
- pip install -r requirements.txt
- startCommand: celery -A app.core.celery_app worker --loglevel=INFO --concurrency=1
- autoDeploy: true
- branch: main
- envVars:
- - key: REMINDERS_ENABLED
- value: "false"
- - key: REMINDER_DRY_RUN
- value: "true"
- - key: DATABASE_URL
- sync: false
- - key: REDIS_URL
- sync: false
- - key: SECRET_KEY
- sync: false
-
- - type: worker
- name: lexilingo-reminder-beat
- env: python
- region: singapore
- buildCommand: |
- pip install --upgrade pip
- pip install -r requirements.txt
- startCommand: celery -A app.core.celery_app beat --loglevel=INFO
- autoDeploy: true
- branch: main
- envVars:
- - key: REMINDERS_ENABLED
- value: "false"
- - key: REMINDER_DRY_RUN
- value: "true"
- - key: DATABASE_URL
- sync: false
- - key: REDIS_URL
- sync: false
- - key: SECRET_KEY
- sync: false
-```
-
-- [ ] **Step 5: Validate process config**
-
-Run:
-
-```bash
-docker compose -f docker-compose.yml config >/tmp/lexilingo-compose.yml
-docker compose -f docker-compose.production.yml config >/tmp/lexilingo-compose-production.yml
-cd backend-service
-python -m compileall app/core/celery_app.py app/tasks/reminders.py
-```
-
-Expected: both Compose files parse and Celery modules compile. Validate `backend-service/render.yaml` with Render Blueprint preview before production apply.
-
-- [ ] **Step 6: Commit**
-
-```bash
-git add docker-compose.yml docker-compose.production.yml backend-service/render.yaml
-git commit -m "chore: configure reminder worker processes"
-```
-
----
-
-## Chunk 5: Flutter Settings And Notification UX
-
-### Task 11: Extend settings model safely
-
-**Files:**
-- Modify: `flutter-app/lib/features/user/domain/entities/settings.dart`
-- Modify: `flutter-app/lib/features/user/data/models/settings_model.dart`
-- Modify: `flutter-app/lib/features/user/data/datasources/settings_local_data_source.dart`
-- Modify: `flutter-app/lib/features/user/data/datasources/settings_local_data_source_web.dart`
-- Modify: `flutter-app/lib/core/services/database_helper.dart`
-- Test: `flutter-app/test/features/user/settings_model_test.dart`
-
-- [ ] **Step 1: Write model tests**
-
-Cover:
-
-- old local JSON without new fields still parses
-- backend snake_case JSON parses
-- `toJson()` preserves old local keys and includes new fields
-
-- [ ] **Step 2: Run tests to verify failure**
-
-Run:
-
-```bash
-cd flutter-app
-flutter test test/features/user/settings_model_test.dart
-```
-
-Expected: FAIL because new fields do not exist.
-
-- [ ] **Step 3: Add fields to `Settings`**
-
-Add defaults:
-
-```dart
-final bool pushReminderEnabled;
-final bool emailReminderEnabled;
-final int emailCadenceDays;
-final int reminderMinDueCount;
-final String reminderTimezone;
-```
-
-Defaults:
-
-- `pushReminderEnabled = true`
-- `emailReminderEnabled = false`
-- `emailCadenceDays = 7`
-- `reminderMinDueCount = 1`
-- `reminderTimezone = 'Asia/Ho_Chi_Minh'`
-
-- [ ] **Step 4: Update `SettingsModel` parsing**
-
-Accept both:
-
-- local camelCase: `pushReminderEnabled`
-- backend snake_case: `push_enabled`
-
-Map `notificationEnabled` to backend `enabled` when reading remote response.
-
-- [ ] **Step 5: Update local data sources**
-
-Add new fields to create/update payloads. Keep existing keys unchanged.
-
-- [ ] **Step 6: Bump sqflite DB version**
-
-In `flutter-app/lib/core/services/database_helper.dart`, bump `version` from `6` to `7` and add:
-
-```dart
-if (oldVersion < 7) {
- await db.execute('ALTER TABLE settings ADD COLUMN pushReminderEnabled BOOLEAN DEFAULT 1');
- await db.execute('ALTER TABLE settings ADD COLUMN emailReminderEnabled BOOLEAN DEFAULT 0');
- await db.execute('ALTER TABLE settings ADD COLUMN emailCadenceDays INTEGER DEFAULT 7');
- await db.execute('ALTER TABLE settings ADD COLUMN reminderMinDueCount INTEGER DEFAULT 1');
- await db.execute('ALTER TABLE settings ADD COLUMN reminderTimezone TEXT DEFAULT "Asia/Ho_Chi_Minh"');
-}
-```
-
-Also update `_createSettingsTable`.
-
-- [ ] **Step 7: Run model tests**
-
-Run:
-
-```bash
-cd flutter-app
-flutter test test/features/user/settings_model_test.dart
-```
-
-Expected: PASS.
-
-- [ ] **Step 8: Commit**
-
-```bash
-git add flutter-app/lib/features/user/domain/entities/settings.dart flutter-app/lib/features/user/data/models/settings_model.dart flutter-app/lib/features/user/data/datasources/settings_local_data_source.dart flutter-app/lib/features/user/data/datasources/settings_local_data_source_web.dart flutter-app/lib/core/services/database_helper.dart flutter-app/test/features/user/settings_model_test.dart
-git commit -m "feat: extend reminder settings model"
-```
-
-### Task 12: Sync reminder settings with backend
-
-**Files:**
-- Create: `flutter-app/lib/features/user/data/datasources/settings_remote_data_source.dart`
-- Modify: `flutter-app/lib/features/user/data/repositories/settings_repository_impl.dart`
-- Modify: `flutter-app/lib/features/user/domain/repositories/settings_repository.dart`
-- Modify: `flutter-app/lib/features/user/di/user_di.dart`
-- Modify: `flutter-app/lib/features/user/presentation/providers/settings_provider.dart`
-- Test: `flutter-app/test/features/user/settings_provider_reminder_test.dart`
-
-- [ ] **Step 1: Write provider tests**
-
-Cover:
-
-- load settings uses local data when backend is unavailable
-- update notification settings persists locally first
-- backend update is called with `/users/me/reminder-preferences`
-- local fallback notification is scheduled only when backend sync fails or FCM is unavailable
-
-- [ ] **Step 2: Run tests to verify failure**
-
-Run:
-
-```bash
-cd flutter-app
-flutter test test/features/user/settings_provider_reminder_test.dart
-```
-
-Expected: FAIL.
-
-- [ ] **Step 3: Implement remote data source**
-
-Use `ApiClient.get` and `ApiClient.putEnvelope` or `ApiClient.put` depending on existing response helpers:
-
-```dart
-class SettingsRemoteDataSource {
- final ApiClient apiClient;
-
- SettingsRemoteDataSource({required this.apiClient});
-
- Future
getReminderPreferences(String userId) async {
- final response = await apiClient.get('/users/me/reminder-preferences');
- return SettingsModel.fromJson(response['data'] as Map);
- }
-
- Future updateReminderPreferences(Settings settings) async {
- final response = await apiClient.putEnvelope