from __future__ import annotations from fnmatch import fnmatchcase from typing import Any, Dict from news_mcp.config import NEWS_ENTITY_BLACKLIST from news_mcp.entity_normalize import normalize_entities from news_mcp.llm import call_extraction, call_summary def _matches_blacklist(value: str, blacklist=None) -> bool: patterns = [x.strip().lower() for x in (blacklist if blacklist is not None else NEWS_ENTITY_BLACKLIST) if x and x.strip()] key = str(value).strip().lower() if not key: return True return any(fnmatchcase(key, pattern) for pattern in patterns) def _filter_entities(entities, blacklist=None): out = [] for ent in entities or []: if _matches_blacklist(ent, blacklist=blacklist): continue out.append(ent) return out async def classify_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]: parsed = await call_extraction(cluster) out = dict(cluster) topic = parsed.get("topic", cluster.get("topic")) if topic and _matches_blacklist(topic): topic = "other" out.update({ "topic": topic, "entities": normalize_entities(_filter_entities(parsed.get("entities", []))), "sentiment": parsed.get("sentiment", "neutral"), "sentimentScore": parsed.get("sentimentScore"), "keywords": normalize_entities(_filter_entities(parsed.get("keywords", []))), }) return out async def summarize_cluster_groq(cluster: Dict[str, Any]) -> Dict[str, Any]: parsed = await call_summary(cluster) return parsed