| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051 |
- from __future__ import annotations
- from fnmatch import fnmatchcase
- from typing import Any, Dict
- from news_mcp.config import NEWS_ENTITY_BLACKLIST
- from news_mcp.entity_normalize import normalize_entities
- from news_mcp.llm import call_extraction, call_summary
- from news_mcp.trends_resolution import resolve_entity_via_trends
- def _matches_blacklist(value: str, blacklist=None) -> bool:
- patterns = [x.strip().lower() for x in (blacklist if blacklist is not None else NEWS_ENTITY_BLACKLIST) if x and x.strip()]
- key = str(value).strip().lower()
- if not key:
- return True
- return any(fnmatchcase(key, pattern) for pattern in patterns)
- def _filter_entities(entities, blacklist=None):
- out = []
- for ent in entities or []:
- if _matches_blacklist(ent, blacklist=blacklist):
- continue
- out.append(ent)
- return out
- async def classify_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]:
- parsed = await call_extraction(cluster)
- out = dict(cluster)
- topic = parsed.get("topic", cluster.get("topic"))
- if topic and _matches_blacklist(topic):
- topic = "other"
- entities = normalize_entities(_filter_entities(parsed.get("entities", [])))
- keywords = normalize_entities(_filter_entities(parsed.get("keywords", [])))
- out.update({
- "topic": topic,
- "entities": entities,
- "entityResolutions": [resolve_entity_via_trends(e) for e in entities],
- "sentiment": parsed.get("sentiment", "neutral"),
- "sentimentScore": parsed.get("sentimentScore"),
- "keywords": keywords,
- })
- return out
- async def summarize_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]:
- parsed = await call_summary(cluster)
- return parsed
|