|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
import asyncio
|
|
import asyncio
|
|
|
import hashlib
|
|
import hashlib
|
|
|
import logging
|
|
import logging
|
|
|
|
|
+import sys
|
|
|
from collections import defaultdict
|
|
from collections import defaultdict
|
|
|
from datetime import datetime, timezone
|
|
from datetime import datetime, timezone
|
|
|
from typing import Any, Dict
|
|
from typing import Any, Dict
|
|
@@ -29,6 +30,8 @@ from news_mcp.storage.sqlite_store import SQLiteClusterStore
|
|
|
from news_mcp.trends_resolution import resolve_entity_via_trends
|
|
from news_mcp.trends_resolution import resolve_entity_via_trends
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+MAX_ENRICHMENT_RETRIES = 3 # per-cluster retries before giving up for this cycle
|
|
|
|
|
+
|
|
|
async def _enrich_single_cluster(
|
|
async def _enrich_single_cluster(
|
|
|
c: dict,
|
|
c: dict,
|
|
|
topic: str,
|
|
topic: str,
|
|
@@ -37,7 +40,13 @@ async def _enrich_single_cluster(
|
|
|
store: SQLiteClusterStore,
|
|
store: SQLiteClusterStore,
|
|
|
logger: logging.Logger,
|
|
logger: logging.Logger,
|
|
|
) -> dict:
|
|
) -> dict:
|
|
|
- """Enrich one cluster: heuristic + optional LLM extraction, concurrency-limited."""
|
|
|
|
|
|
|
+ """Enrich one cluster: heuristic + optional LLM extraction, concurrency-limited.
|
|
|
|
|
+
|
|
|
|
|
+ On LLM failure the cluster is retried up to MAX_ENRICHMENT_RETRIES times
|
|
|
|
|
+ with exponential backoff. If all retries are exhausted the cluster is
|
|
|
|
|
+ marked with enrichment_failed_at and enrichment_retry_count so the next
|
|
|
|
|
+ polling cycle can re-attempt it.
|
|
|
|
|
+ """
|
|
|
c2 = enrich_cluster(c)
|
|
c2 = enrich_cluster(c)
|
|
|
c2.setdefault("topic", topic)
|
|
c2.setdefault("topic", topic)
|
|
|
|
|
|
|
@@ -64,16 +73,35 @@ async def _enrich_single_cluster(
|
|
|
if existing.get("topic"):
|
|
if existing.get("topic"):
|
|
|
c2["topic"] = existing.get("topic")
|
|
c2["topic"] = existing.get("topic")
|
|
|
else:
|
|
else:
|
|
|
- # Acquire semaphore before making outbound LLM call
|
|
|
|
|
- async with semaphore:
|
|
|
|
|
|
|
+ # Retry loop with exponential backoff | semaphore held per-attempt
|
|
|
|
|
+ last_err = ""
|
|
|
|
|
+ for attempt in range(1 + MAX_ENRICHMENT_RETRIES):
|
|
|
|
|
+ if attempt > 0:
|
|
|
|
|
+ backoff = 2 ** attempt
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ "retry cluster=%s topic=%s attempt=%d/%d backoff=%.0fs",
|
|
|
|
|
+ cluster_id, topic, attempt, MAX_ENRICHMENT_RETRIES, backoff,
|
|
|
|
|
+ )
|
|
|
|
|
+ await asyncio.sleep(backoff)
|
|
|
try:
|
|
try:
|
|
|
- c2 = await classify_cluster_llm(c2)
|
|
|
|
|
|
|
+ async with semaphore:
|
|
|
|
|
+ c2 = await classify_cluster_llm(dict(c2))
|
|
|
|
|
+ break # success
|
|
|
except Exception:
|
|
except Exception:
|
|
|
- logger.exception(
|
|
|
|
|
- "LLM enrichment failed for cluster %s (topic %s)",
|
|
|
|
|
- c2.get("cluster_id"), topic,
|
|
|
|
|
|
|
+ last_err = str(sys.exc_info()[1])[:200] if sys.exc_info()[1] else "unknown"
|
|
|
|
|
+ logger.warning(
|
|
|
|
|
+ "LLM enrichment failed cluster=%s topic=%s attempt=%d/%d err=%s",
|
|
|
|
|
+ cluster_id, topic, attempt, MAX_ENRICHMENT_RETRIES, last_err,
|
|
|
)
|
|
)
|
|
|
- c2["enrichment_failed_at"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
|
+ else:
|
|
|
|
|
+ # Loop completed without break = all retries exhausted
|
|
|
|
|
+ prev_count = c2.get("enrichment_retry_count", 0)
|
|
|
|
|
+ c2["enrichment_failed_at"] = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
+ c2["enrichment_retry_count"] = prev_count + 1
|
|
|
|
|
+ logger.error(
|
|
|
|
|
+ "LLM enrichment exhausted cluster=%s topic=%s after %d retries",
|
|
|
|
|
+ cluster_id, topic, MAX_ENRICHMENT_RETRIES,
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
return c2
|
|
return c2
|
|
|
|
|
|
|
@@ -210,6 +238,32 @@ async def refresh_clusters(topic: str | None = None, limit: int = 80) -> None:
|
|
|
store.upsert_clusters(group, topic=final_topic)
|
|
store.upsert_clusters(group, topic=final_topic)
|
|
|
logger.info("refresh stored topic=%s clusters=%s", final_topic, len(group))
|
|
logger.info("refresh stored topic=%s clusters=%s", final_topic, len(group))
|
|
|
|
|
|
|
|
|
|
+ # Retry previously failed enrichments
|
|
|
|
|
+ failed_clusters = store.get_failed_enrichment_clusters(max_retries=3)
|
|
|
|
|
+ if failed_clusters:
|
|
|
|
|
+ logger.info("retry enrich failed clusters count=%d", len(failed_clusters))
|
|
|
|
|
+ retry_tasks = [
|
|
|
|
|
+ _enrich_single_cluster(
|
|
|
|
|
+ c, str(c.get("topic") or "other"), True, llm_semaphore, store, logger
|
|
|
|
|
+ )
|
|
|
|
|
+ for c in failed_clusters
|
|
|
|
|
+ ]
|
|
|
|
|
+ retry_results = await asyncio.gather(*retry_tasks, return_exceptions=False)
|
|
|
|
|
+ # Persist retried results
|
|
|
|
|
+ by_topic_retry: Dict[str, list] = {}
|
|
|
|
|
+ for c2 in retry_results:
|
|
|
|
|
+ # Clear stale failure marker on success
|
|
|
|
|
+ if not c2.get("enrichment_failed_at") or c2.get("entities"):
|
|
|
|
|
+ c2.pop("enrichment_failed_at", None)
|
|
|
|
|
+ c2.pop("enrichment_retry_count", None)
|
|
|
|
|
+ t = str(c2.get("topic") or "other").strip().lower()
|
|
|
|
|
+ if t not in {x.lower() for x in DEFAULT_TOPICS}:
|
|
|
|
|
+ t = "other"
|
|
|
|
|
+ by_topic_retry.setdefault(t, []).append(c2)
|
|
|
|
|
+ for t, group in by_topic_retry.items():
|
|
|
|
|
+ store.upsert_clusters(group, topic=t)
|
|
|
|
|
+ logger.info("retry stored topic=%s clusters=%s", t, len(group))
|
|
|
|
|
+
|
|
|
prune_result = store.prune_if_due(
|
|
prune_result = store.prune_if_due(
|
|
|
pruning_enabled=NEWS_PRUNING_ENABLED,
|
|
pruning_enabled=NEWS_PRUNING_ENABLED,
|
|
|
retention_days=NEWS_RETENTION_DAYS,
|
|
retention_days=NEWS_RETENTION_DAYS,
|