llm_enrich.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. from __future__ import annotations
  2. from fnmatch import fnmatchcase
  3. from typing import Any, Dict
  4. from news_mcp.config import NEWS_ENTITY_BLACKLIST
  5. from news_mcp.entity_normalize import normalize_entities
  6. from news_mcp.llm import call_extraction, call_summary
  7. from news_mcp.trends_resolution import resolve_entity_via_trends
  8. def _matches_blacklist(value: str, blacklist=None) -> bool:
  9. patterns = [x.strip().lower() for x in (blacklist if blacklist is not None else NEWS_ENTITY_BLACKLIST) if x and x.strip()]
  10. key = str(value).strip().lower()
  11. if not key:
  12. return True
  13. return any(fnmatchcase(key, pattern) for pattern in patterns)
  14. def _filter_entities(entities, blacklist=None):
  15. out = []
  16. for ent in entities or []:
  17. if _matches_blacklist(ent, blacklist=blacklist):
  18. continue
  19. out.append(ent)
  20. return out
  21. async def classify_cluster_llm(cluster: Dict[str, Any]) -> Dict[str, Any]:
  22. parsed = await call_extraction(cluster)
  23. out = dict(cluster)
  24. topic = parsed.get("topic", cluster.get("topic"))
  25. if topic and _matches_blacklist(topic):
  26. topic = "other"
  27. entities = normalize_entities(_filter_entities(parsed.get("entities", [])))
  28. keywords = normalize_entities(_filter_entities(parsed.get("keywords", [])))
  29. out.update({
  30. "topic": topic,
  31. "entities": entities,
  32. "entityResolutions": [resolve_entity_via_trends(e) for e in entities],
  33. "sentiment": parsed.get("sentiment", "neutral"),
  34. "sentimentScore": parsed.get("sentimentScore"),
  35. "keywords": keywords,
  36. })
  37. return out
  38. async def summarize_cluster_llm(cluster: Dict[str, Any]) -> Dict[str, Any]:
  39. parsed = await call_summary(cluster)
  40. return parsed
  41. # Backward-compatible aliases during the transition away from provider-specific naming.
  42. classify_cluster_groq = classify_cluster_llm
  43. summarize_cluster_groq = summarize_cluster_llm