llm_enrich.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from __future__ import annotations
  2. from fnmatch import fnmatchcase
  3. from typing import Any, Dict
  4. from news_mcp.config import NEWS_ENTITY_BLACKLIST
  5. from news_mcp.entity_normalize import normalize_entities
  6. from news_mcp.llm import call_extraction, call_summary
  7. def _matches_blacklist(value: str, blacklist=None) -> bool:
  8. patterns = [x.strip().lower() for x in (blacklist if blacklist is not None else NEWS_ENTITY_BLACKLIST) if x and x.strip()]
  9. key = str(value).strip().lower()
  10. if not key:
  11. return True
  12. return any(fnmatchcase(key, pattern) for pattern in patterns)
  13. def _filter_entities(entities, blacklist=None):
  14. out = []
  15. for ent in entities or []:
  16. if _matches_blacklist(ent, blacklist=blacklist):
  17. continue
  18. out.append(ent)
  19. return out
  20. async def classify_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]:
  21. parsed = await call_extraction(cluster)
  22. out = dict(cluster)
  23. topic = parsed.get("topic", cluster.get("topic"))
  24. if topic and _matches_blacklist(topic):
  25. topic = "other"
  26. out.update({
  27. "topic": topic,
  28. "entities": normalize_entities(_filter_entities(parsed.get("entities", []))),
  29. "sentiment": parsed.get("sentiment", "neutral"),
  30. "sentimentScore": parsed.get("sentimentScore"),
  31. "keywords": normalize_entities(_filter_entities(parsed.get("keywords", []))),
  32. })
  33. return out
  34. async def summarize_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]:
  35. parsed = await call_summary(cluster)
  36. return parsed