1 viikko sitten · 33f1015593
--- a/news_mcp/dashboard/dashboard_store.py
+++ b/news_mcp/dashboard/dashboard_store.py
@@ -156,17 +156,22 @@ class DashboardStore:
 
				     # ── Sentiment Series ────────────────────────────────────────────
			
 
				 
			
 
				     def get_sentiment_series(
			
 
				-        self,
			
 
				-        topic: str | None = None,
			
 
				-        hours: float = 24,
			
 
				-        bucket_hours: float = 1,
			
 
				-    ) -> list[dict[str, Any]]:
			
 
				-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
			
 
				-        now = datetime.now(timezone.utc).isoformat()
			
 
				-        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
			
 
				-        params: list = [cutoff, now]
			
 
				+            self,
			
 
				+            topic: str | None = None,
			
 
				+            hours: float = 24,
			
 
				+            bucket_hours: float = 1,
			
 
				+        ) -> list[dict[str, Any]]:
			
 
				+        """Sentiment score averaged per time bucket.
			
 
				+
			
 
				+        Filters by the cluster's own event timestamp (payload.timestamp),
			
 
				+        not by updated_at which tracks row modification time.
			
 
				+        """
			
 
				+        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
			
 
				+
			
 
				+        query = "SELECT payload FROM clusters"
			
 
				+        params: list = []
			
 
				         if topic and topic != "all":
			
 
				-            query += " AND topic = ?"
			
 
				+            query += " WHERE topic = ?"
			
 
				             params.append(topic)
			
 
				         query += " ORDER BY updated_at ASC"
			
 
				 
			
@@ -177,7 +182,7 @@ class DashboardStore:
 
				         def _parse_ts(ts: Any) -> datetime | None:
			
 
				             if not ts:
			
 
				                 return None
			
 
				-            s = str(ts)
			
 
				+            s = str(ts).strip()
			
 
				             try:
			
 
				                 dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
			
 
				             except Exception:
			
@@ -189,7 +194,6 @@ class DashboardStore:
 
				                 dt = dt.replace(tzinfo=timezone.utc)
			
 
				             return dt.astimezone(timezone.utc)
			
 
				 
			
 
				-        step_hours = max(1, int(bucket_hours))
			
 
				         buckets: dict[datetime, list[float]] = {}
			
 
				         for (payload_text,) in rows:
			
 
				             c = json.loads(payload_text)
			
@@ -197,10 +201,12 @@ class DashboardStore:
 
				             score = c.get("sentimentScore")
			
 
				             if dt is None or score is None:
			
 
				                 continue
			
 
				+            if dt < cutoff:
			
 
				+                continue
			
 
				             bucket_key = dt.replace(minute=0, second=0, microsecond=0)
			
 
				-            if step_hours > 1:
			
 
				+            if bucket_hours > 1:
			
 
				                 bucket_key = bucket_key.replace(
			
 
				-                    hour=(bucket_key.hour // step_hours) * step_hours
			
 
				+                    hour=(bucket_key.hour // int(bucket_hours)) * int(bucket_hours)
			
 
				                 )
			
 
				             buckets.setdefault(bucket_key, []).append(float(score))
			
 
				 
			
@@ -223,22 +229,46 @@ class DashboardStore:
 
				         hours: float = 24,
			
 
				         limit: int = 30,
			
 
				     ) -> list[dict[str, Any]]:
			
 
				-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
			
 
				-        now = datetime.now(timezone.utc).isoformat()
			
 
				+        """Top entities by mention count in recent clusters.
			
 
				+
			
 
				+        Filters by the cluster's own event timestamp (payload.timestamp),
			
 
				+        not by updated_at which tracks row modification time.
			
 
				+        """
			
 
				+        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
			
 
				+    
			
 
				+        query = "SELECT payload FROM clusters"
			
 
				+        params: list = []
			
 
				         with self._store._conn() as conn:
			
 
				-            cur = conn.execute(
			
 
				-                "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ? "
			
 
				-                "ORDER BY updated_at DESC LIMIT 500",
			
 
				-                (cutoff, now),
			
 
				-            )
			
 
				+            cur = conn.execute(query, params)
			
 
				             rows = cur.fetchall()
			
 
				-
			
 
				+    
			
 
				+        def _parse_ts(ts):
			
 
				+            if not ts:
			
 
				+                return None
			
 
				+            s = str(ts).strip()
			
 
				+            try:
			
 
				+                dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
			
 
				+            except Exception:
			
 
				+                try:
			
 
				+                    from email.utils import parsedate_to_datetime
			
 
				+                    dt = parsedate_to_datetime(s)
			
 
				+                except Exception:
			
 
				+                    return None
			
 
				+            if dt.tzinfo is None:
			
 
				+                dt = dt.replace(tzinfo=timezone.utc)
			
 
				+            return dt.astimezone(timezone.utc)
			
 
				+    
			
 
				         counter: dict[str, int] = {}
			
 
				         for (payload_text,) in rows:
			
 
				             c = json.loads(payload_text)
			
 
				+            dt = _parse_ts(c.get("timestamp"))
			
 
				+            if dt is None:
			
 
				+                continue
			
 
				+            if dt < cutoff:
			
 
				+                continue
			
 
				             for ent in c.get("entities", []):
			
 
				                 counter[ent] = counter.get(ent, 0) + 1
			
 
				-
			
 
				+    
			
 
				         sorted_entities = sorted(counter.items(), key=lambda x: -x[1])[:limit]
			
 
				         result: list[dict[str, Any]] = []
			
 
				         for label, count in sorted_entities:
			
@@ -250,3 +280,4 @@ class DashboardStore:
 
				                 "mid": meta["mid"] if meta else None,
			
 
				             })
			
 
				         return result
			
 
				+
			
--- a/news_mcp/mcp_server_fastmcp.py
+++ b/news_mcp/mcp_server_fastmcp.py
@@ -1,7 +1,10 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				 import asyncio
			
 
				+import hashlib
			
 
				 import logging
			
 
				+import subprocess
			
 
				+
			
 
				 import math
			
 
				 import re
			
 
				 import time
			
@@ -41,6 +44,15 @@ logging.basicConfig(
 
				 
			
 
				 _PROCESS_STARTED_AT = time.monotonic()
			
 
				 
			
 
				+_VERSION_HASH = (
			
 
				+    subprocess.check_output(
			
 
				+        ["git", "rev-parse", "--short=9", "HEAD"],
			
 
				+        cwd=__file__,
			
 
				+    )
			
 
				+    .decode()
			
 
				+    .strip()
			
 
				+)
			
 
				+
			
 
				 mcp = FastMCP(
			
 
				     "news-mcp",
			
 
				     transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False),
			
@@ -987,7 +999,9 @@ def api_health():
 
				     """Extended health + dashboard stats."""
			
 
				     try:
			
 
				         store = DashboardStore(_shared_store)
			
 
				-        return store.get_dashboard_stats()
			
 
				+        stats = store.get_dashboard_stats()
			
 
				+        stats["version"] = _VERSION_HASH
			
 
				+        return stats
			
 
				     except Exception as e:
			
 
				         return _api_err(e, "health")
			
 
				 
			
@@ -1098,4 +1112,5 @@ def health():
 
				     return {
			
 
				         "status": "ok",
			
 
				         "uptime": round(time.monotonic() - _PROCESS_STARTED_AT, 3),
			
 
				+        "version": _VERSION_HASH,
			
 
				     }
			
--- a/news_mcp/storage/sqlite_store.py
+++ b/news_mcp/storage/sqlite_store.py
@@ -673,84 +673,116 @@ class SQLiteClusterStore:
 
				         return clusters
			
 
				 
			
 
				     def get_sentiment_series(
			
 
				+            self,
			
 
				+            topic: str | None = None,
			
 
				+            hours: float = 24,
			
 
				+            bucket_hours: float = 1,
			
 
				+        ) -> list[dict[str, Any]]:
			
 
				+            """Sentiment score averaged per time bucket.
			
 
				+
			
 
				+            Filters by the cluster's own event timestamp (payload.timestamp),
			
 
				+            not by updated_at which tracks row modification time.
			
 
				+            """
			
 
				+            cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
			
 
				+            query = "SELECT payload FROM clusters"
			
 
				+            params: list = []
			
 
				+            if topic and topic != "all":
			
 
				+                query += " WHERE topic = ?"
			
 
				+                params.append(topic)
			
 
				+            query += " ORDER BY updated_at ASC"
			
 
				+
			
 
				+            with self._conn() as conn:
			
 
				+                cur = conn.execute(query, params)
			
 
				+                rows = cur.fetchall()
			
 
				+
			
 
				+            def _parse_ts(ts: Any) -> datetime | None:
			
 
				+                if not ts:
			
 
				+                    return None
			
 
				+                s = str(ts).strip()
			
 
				+                try:
			
 
				+                    dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
			
 
				+                except Exception:
			
 
				+                    try:
			
 
				+                        dt = parsedate_to_datetime(s)
			
 
				+                    except Exception:
			
 
				+                        return None
			
 
				+                if dt.tzinfo is None:
			
 
				+                    dt = dt.replace(tzinfo=timezone.utc)
			
 
				+                return dt.astimezone(timezone.utc)
			
 
				+
			
 
				+            buckets: dict[datetime, list[float]] = {}
			
 
				+            for (payload_text,) in rows:
			
 
				+                c = json.loads(payload_text)
			
 
				+                dt = _parse_ts(c.get("timestamp"))
			
 
				+                score = c.get("sentimentScore")
			
 
				+                if dt is None or score is None:
			
 
				+                    continue
			
 
				+                if dt < cutoff.replace(tzinfo=timezone.utc):
			
 
				+                    continue
			
 
				+                bucket_key = dt.replace(minute=0, second=0, microsecond=0)
			
 
				+                if bucket_hours > 1:
			
 
				+                    bucket_key = bucket_key.replace(
			
 
				+                        hour=(bucket_key.hour // int(bucket_hours)) * int(bucket_hours)
			
 
				+                    )
			
 
				+                buckets.setdefault(bucket_key, []).append(float(score))
			
 
				+
			
 
				+            series: list[dict[str, Any]] = []
			
 
				+            for bucket_key in sorted(buckets):
			
 
				+                scores = buckets[bucket_key]
			
 
				+                series.append({
			
 
				+                    "time": bucket_key.isoformat(),
			
 
				+                    "avg_sentiment": round(sum(scores) / len(scores), 3),
			
 
				+                    "count": len(scores),
			
 
				+                    "min": round(min(scores), 3),
			
 
				+                    "max": round(max(scores), 3),
			
 
				+                })
			
 
				+            return series
			
 
				+
			
 
				+    def get_entity_frequencies(
			
 
				         self,
			
 
				-        topic: str | None = None,
			
 
				         hours: float = 24,
			
 
				-        bucket_hours: float = 1,
			
 
				+        limit: int = 30,
			
 
				     ) -> list[dict[str, Any]]:
			
 
				-        """Sentiment score averaged per time bucket."""
			
 
				-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
			
 
				-        now = datetime.now(timezone.utc).isoformat()
			
 
				-        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
			
 
				-        params: list = [cutoff, now]
			
 
				-        if topic and topic != "all":
			
 
				-            query += " AND topic = ?"
			
 
				-            params.append(topic)
			
 
				-        query += " ORDER BY updated_at ASC"
			
 
				+        """Top entities by mention count in recent clusters.
			
 
				+
			
 
				+        Filters by the cluster's own event timestamp (payload.timestamp),
			
 
				+        not by updated_at which tracks row modification time.
			
 
				+        """
			
 
				+        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
			
 
				+    
			
 
				+        query = "SELECT payload FROM clusters"
			
 
				+        params: list = []
			
 
				         with self._conn() as conn:
			
 
				             cur = conn.execute(query, params)
			
 
				             rows = cur.fetchall()
			
 
				-
			
 
				-        def _parse_ts(ts: Any) -> datetime | None:
			
 
				+    
			
 
				+        def _parse_ts(ts):
			
 
				             if not ts:
			
 
				                 return None
			
 
				-            s = str(ts)
			
 
				+            s = str(ts).strip()
			
 
				             try:
			
 
				                 dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
			
 
				             except Exception:
			
 
				                 try:
			
 
				+                    from email.utils import parsedate_to_datetime
			
 
				                     dt = parsedate_to_datetime(s)
			
 
				                 except Exception:
			
 
				                     return None
			
 
				             if dt.tzinfo is None:
			
 
				                 dt = dt.replace(tzinfo=timezone.utc)
			
 
				             return dt.astimezone(timezone.utc)
			
 
				-
			
 
				-        buckets: dict[datetime, list[float]] = {}
			
 
				+    
			
 
				+        counter: dict[str, int] = {}
			
 
				         for (payload_text,) in rows:
			
 
				             c = json.loads(payload_text)
			
 
				             dt = _parse_ts(c.get("timestamp"))
			
 
				-            score = c.get("sentimentScore")
			
 
				-            if dt is None or score is None:
			
 
				+            if dt is None:
			
 
				+                continue
			
 
				+            if dt < cutoff:
			
 
				                 continue
			
 
				-            bucket_key = dt.replace(minute=0, second=0, microsecond=0)
			
 
				-            if bucket_hours > 1:
			
 
				-                bucket_key = bucket_key.replace(
			
 
				-                    hour=(bucket_key.hour // int(bucket_hours)) * int(bucket_hours)
			
 
				-                )
			
 
				-            buckets.setdefault(bucket_key, []).append(float(score))
			
 
				-
			
 
				-        series: list[dict[str, Any]] = []
			
 
				-        for bucket_key in sorted(buckets):
			
 
				-            scores = buckets[bucket_key]
			
 
				-            series.append({
			
 
				-                "time": bucket_key.isoformat(),
			
 
				-                "avg_sentiment": round(sum(scores) / len(scores), 3),
			
 
				-                "count": len(scores),
			
 
				-                "min": round(min(scores), 3),
			
 
				-                "max": round(max(scores), 3),
			
 
				-            })
			
 
				-        return series
			
 
				-
			
 
				-    def get_entity_frequencies(
			
 
				-        self,
			
 
				-        hours: float = 24,
			
 
				-        limit: int = 30,
			
 
				-    ) -> list[dict[str, Any]]:
			
 
				-        """Top entities by mention count in recent clusters."""
			
 
				-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
			
 
				-        now = datetime.now(timezone.utc).isoformat()
			
 
				-        with self._conn() as conn:
			
 
				-            cur = conn.execute(
			
 
				-                "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ? ORDER BY updated_at DESC LIMIT 500",
			
 
				-                (cutoff, now),
			
 
				-            )
			
 
				-            rows = cur.fetchall()
			
 
				-        counter: dict[str, int] = {}
			
 
				-        for (payload_text,) in rows:
			
 
				-            c = json.loads(payload_text)
			
 
				             for ent in c.get("entities", []):
			
 
				                 counter[ent] = counter.get(ent, 0) + 1
			
 
				+    
			
 
				         sorted_entities = sorted(counter.items(), key=lambda x: -x[1])[:limit]
			
 
				         result: list[dict[str, Any]] = []
			
 
				         for label, count in sorted_entities: