1 неделя назад · b6567f729d
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -52,6 +52,13 @@ This project spans two machines. **Always check which machine you're operating o
 
															 - `include_articles=true` should keep responses compact and only return minimal article fields.
														
 
															 - Timestamps in cluster payloads are normalized to ISO 8601 UTC (`YYYY-MM-DDTHH:MM:SS+00:00`) at write time in `sanitize_cluster_payload()`.
														
 
															+## Timestamp Contract (READ THIS BEFORE TOUCHING ANY TIMESTAMP CODE)
														
 
															+- `payload.timestamp`, `payload.first_seen`, `payload.last_updated` are **guaranteed** `YYYY-MM-DDTHH:MM:SS+00:00` for every row written after the normalization migration (backfill script was run on the live server).
														
 
															+- **Read paths**: use `_read_ts()` from `news_mcp.storage.sqlite_store`, or `datetime.fromisoformat()` directly. That is all that is needed.
														
 
															+- **Never** add `parsedate_to_datetime` / RFC 2822 fallbacks to a read path. If `_read_ts` returns None on a stored timestamp, the bug is in the write path — fix `sanitize_cluster_payload()`, don't paper over it.
														
 
															+- `parsedate_to_datetime` is intentionally retained **only** in `sqlite_store._normalize_ts()` (write path) and `dedup/cluster.py` (raw ingest before normalization). Nowhere else.
														
 
															+- **Never query the dev DB** (`news_mcp/data/news.sqlite` on latitude) to check live data. It is empty/stale. The live DB is on thinkcenter-2 in Docker at `/app/data/news.sqlite`.
														
 
															+
														
 
															 ## Editing Rules
														
 
															 - Keep changes aligned with the docs in `README.md`, `PROJECT.md`, and `OUTLOOK.md`.
														
 
															 - Prefer narrow fixes over contract changes unless the user explicitly asks to expand behavior.
														
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -172,3 +172,22 @@ Cluster payloads stored timestamps as raw RSS strings (RFC 2822 HTTP-date like `
 
															 ### Key invariant
														
 
															 `updated_at` in the DB = row modification time (set to `datetime.now()` on every upsert). For time-range queries, always use `payload.timestamp` parsed from the JSON.
														
 
															+
														
 
															+## Timestamp Read-Path Cleanup (May 2026)
														
 
															+
														
 
															+### Problem
														
 
															+After normalization, all read paths still contained defensive RFC 2822 / `parsedate_to_datetime` fallback parsers. This was dead code on the live server (all stored timestamps are ISO 8601 UTC) and risked being re-introduced by future contributors who misread the defensive pattern as necessary.
														
 
															+
														
 
															+### Fix
														
 
															+- Added `_read_ts(ts) -> float | None` to `sqlite_store.py` (module-level, exported). Uses only `datetime.fromisoformat()`. No RFC 2822 fallback. If it fails, the normalization pipeline has a bug — fix that instead.
														
 
															+- All read-path timestamp parsing in `sqlite_store.py`, `dashboard_store.py`, and `mcp_server_fastmcp.py` now uses `_read_ts` or plain `fromisoformat`.
														
 
															+- `parsedate_to_datetime` removed from `dashboard_store.py` and `mcp_server_fastmcp.py` imports entirely.
														
 
															+- `parsedate_to_datetime` is **only** retained in `sqlite_store._normalize_ts()` (the write path) and `dedup/cluster.py` (raw ingest before normalization).
														
 
															+- Test fixtures updated to use ISO 8601 UTC timestamps.
														
 
															+
														
 
															+### Contract (ENFORCE THIS)
														
 
															+- `payload.timestamp`, `payload.first_seen`, `payload.last_updated` are **always** `YYYY-MM-DDTHH:MM:SS+00:00` for any row written after the normalization migration.
														
 
															+- Read paths: use `_read_ts()` from `sqlite_store` or `datetime.fromisoformat()` directly. **Never** add `parsedate_to_datetime` to a read path.
														
 
															+- Write paths: `sanitize_cluster_payload()` in `sqlite_store.py` is the single normalization point. All writes go through `upsert_clusters()` which calls it.
														
 
															+- This repo is a **dev machine** copy. The live server is on thinkcenter-2 (192.168.0.200). Never query the dev DB to verify live data — the dev DB is stale/empty.
														
 
															+
														
--- a/news_mcp/dashboard/dashboard_store.py
+++ b/news_mcp/dashboard/dashboard_store.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 
															 import json
														
 
															 from datetime import datetime, timedelta, timezone
														
 
															 from typing import Any
														
 
															-from email.utils import parsedate_to_datetime
														
 
															 from news_mcp.config import (
														
 
															     NEWS_PRUNE_INTERVAL_HOURS,
														
@@ -12,7 +11,7 @@ from news_mcp.config import (
 
															     NEWS_RETENTION_DAYS,
														
 
															     DEFAULT_TOPICS,
														
 
															 )
														
 
															-from news_mcp.storage.sqlite_store import SQLiteClusterStore
														
 
															+from news_mcp.storage.sqlite_store import SQLiteClusterStore, _read_ts
														
 
															 class DashboardStore:
														
@@ -86,24 +85,29 @@ class DashboardStore:
 
															         limit: int = 20,
														
 
															         offset: int = 0,
														
 
															     ) -> list[dict[str, Any]]:
														
 
															-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
														
 
															-        now = datetime.now(timezone.utc).isoformat()
														
 
															-        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
														
 
															-        params: list = [cutoff, now]
														
 
															+        """Paginated cluster listing filtered by payload.timestamp (event time).
														
 
															+
														
 
															+        payload.timestamp is guaranteed ISO 8601 UTC — uses _read_ts from
														
 
															+        sqlite_store. Do NOT filter by updated_at (row mod time).
														
 
															+        """
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															+
														
 
															+        query = "SELECT payload FROM clusters"
														
 
															+        params: list = []
														
 
															         if topic and topic != "all":
														
 
															-            query += " AND topic = ?"
														
 
															+            query += " WHERE topic = ?"
														
 
															             params.append(topic)
														
 
															-        query += " ORDER BY updated_at DESC LIMIT ? OFFSET ?"
														
 
															-        params.extend([limit, offset])
														
 
															         with self._store._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															+            rows = conn.execute(query, params).fetchall()
														
 
															-        clusters: list[dict[str, Any]] = []
														
 
															-        for (payload_text,) in rows:
														
 
															-            c = json.loads(payload_text)
														
 
															-            clusters.append({
														
 
															+        filtered = [json.loads(r[0]) for r in rows]
														
 
															+        filtered = [c for c in filtered if (_read_ts(c.get("timestamp")) or 0.0) >= cutoff_ts]
														
 
															+        filtered.sort(key=lambda c: _read_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															+        page = filtered[offset:offset + limit]
														
 
															+
														
 
															+        return [
														
 
															+            {
														
 
															                 "cluster_id": c.get("cluster_id", ""),
														
 
															                 "headline": c.get("headline", ""),
														
 
															                 "topic": c.get("topic", ""),
														
@@ -115,8 +119,9 @@ class DashboardStore:
 
															                 "timestamp": c.get("timestamp", ""),
														
 
															                 "keywords": c.get("keywords", []),
														
 
															                 "article_count": len(c.get("articles", [])),
														
 
															-            })
														
 
															-        return clusters
														
 
															+            }
														
 
															+            for c in page
														
 
															+        ]
														
 
															     def get_cluster_detail(self, cluster_id: str) -> dict[str, Any] | None:
														
 
															         with self._store._conn() as conn:
														
@@ -164,46 +169,26 @@ class DashboardStore:
 
															         ) -> list[dict[str, Any]]:
														
 
															         """Sentiment score averaged per time bucket.
														
 
															-        Filters by the cluster's own event timestamp (payload.timestamp),
														
 
															-        not by updated_at which tracks row modification time.
														
 
															+        Filters by payload.timestamp (event time, ISO 8601 UTC guaranteed).
														
 
															         """
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
														
 
															-
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															         query = "SELECT payload FROM clusters"
														
 
															         params: list = []
														
 
															         if topic and topic != "all":
														
 
															             query += " WHERE topic = ?"
														
 
															             params.append(topic)
														
 
															-        query += " ORDER BY updated_at ASC"
														
 
															         with self._store._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															-
														
 
															-        def _parse_ts(ts: Any) -> datetime | None:
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            s = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
														
 
															-            except Exception:
														
 
															-                try:
														
 
															-                    dt = parsedate_to_datetime(s)
														
 
															-                except Exception:
														
 
															-                    return None
														
 
															-            if dt.tzinfo is None:
														
 
															-                dt = dt.replace(tzinfo=timezone.utc)
														
 
															-            return dt.astimezone(timezone.utc)
														
 
															+            rows = conn.execute(query, params).fetchall()
														
 
															         buckets: dict[datetime, list[float]] = {}
														
 
															         for (payload_text,) in rows:
														
 
															             c = json.loads(payload_text)
														
 
															-            dt = _parse_ts(c.get("timestamp"))
														
 
															+            ts = _read_ts(c.get("timestamp"))
														
 
															             score = c.get("sentimentScore")
														
 
															-            if dt is None or score is None:
														
 
															-                continue
														
 
															-            if dt < cutoff:
														
 
															+            if ts is None or score is None or ts < cutoff_ts:
														
 
															                 continue
														
 
															+            dt = datetime.fromtimestamp(ts, tz=timezone.utc)
														
 
															             bucket_key = dt.replace(minute=0, second=0, microsecond=0)
														
 
															             if bucket_hours > 1:
														
 
															                 bucket_key = bucket_key.replace(
														
@@ -211,17 +196,16 @@ class DashboardStore:
 
															                 )
														
 
															             buckets.setdefault(bucket_key, []).append(float(score))
														
 
															-        series: list[dict[str, Any]] = []
														
 
															-        for bucket_key in sorted(buckets):
														
 
															-            scores = buckets[bucket_key]
														
 
															-            series.append({
														
 
															+        return [
														
 
															+            {
														
 
															                 "time": bucket_key.isoformat(),
														
 
															                 "avg_sentiment": round(sum(scores) / len(scores), 3),
														
 
															                 "count": len(scores),
														
 
															                 "min": round(min(scores), 3),
														
 
															                 "max": round(max(scores), 3),
														
 
															-            })
														
 
															-        return series
														
 
															+            }
														
 
															+            for bucket_key, scores in sorted(buckets.items())
														
 
															+        ]
														
 
															     # ── Entity Frequencies ──────────────────────────────────────────
														
@@ -230,49 +214,22 @@ class DashboardStore:
 
															         hours: float = 24,
														
 
															         limit: int = 30,
														
 
															     ) -> list[dict[str, Any]]:
														
 
															-        """Top entities by mention count in recent clusters.
														
 
															+        """Top entities by mention count filtered by payload.timestamp (ISO 8601 UTC guaranteed)."""
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															-        Filters by the cluster's own event timestamp (payload.timestamp),
														
 
															-        not by updated_at which tracks row modification time.
														
 
															-        """
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
														
 
															-    
														
 
															-        query = "SELECT payload FROM clusters"
														
 
															-        params: list = []
														
 
															         with self._store._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															-    
														
 
															-        def _parse_ts(ts):
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            s = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
														
 
															-            except Exception:
														
 
															-                try:
														
 
															-                    from email.utils import parsedate_to_datetime
														
 
															-                    dt = parsedate_to_datetime(s)
														
 
															-                except Exception:
														
 
															-                    return None
														
 
															-            if dt.tzinfo is None:
														
 
															-                dt = dt.replace(tzinfo=timezone.utc)
														
 
															-            return dt.astimezone(timezone.utc)
														
 
															-    
														
 
															+            rows = conn.execute("SELECT payload FROM clusters").fetchall()
														
 
															+
														
 
															         counter: dict[str, int] = {}
														
 
															         for (payload_text,) in rows:
														
 
															             c = json.loads(payload_text)
														
 
															-            dt = _parse_ts(c.get("timestamp"))
														
 
															-            if dt is None:
														
 
															-                continue
														
 
															-            if dt < cutoff:
														
 
															+            if (_read_ts(c.get("timestamp")) or 0.0) < cutoff_ts:
														
 
															                 continue
														
 
															             for ent in c.get("entities", []):
														
 
															                 counter[ent] = counter.get(ent, 0) + 1
														
 
															-    
														
 
															-        sorted_entities = sorted(counter.items(), key=lambda x: -x[1])[:limit]
														
 
															+
														
 
															         result: list[dict[str, Any]] = []
														
 
															-        for label, count in sorted_entities:
														
 
															+        for label, count in sorted(counter.items(), key=lambda x: -x[1])[:limit]:
														
 
															             meta = self._store.get_entity_metadata(label)
														
 
															             result.append({
														
 
															                 "label": label,
														
@@ -289,64 +246,35 @@ class DashboardStore:
 
															         hours: float = 24,
														
 
															         limit: int = 30,
														
 
															     ) -> list[dict[str, Any]]:
														
 
															-        """Top keywords by occurrence count in recent clusters.
														
 
															+        """Top keywords by occurrence count filtered by payload.timestamp (ISO 8601 UTC guaranteed).
														
 
															-        Mirrors get_entity_frequencies but for LLM-curated thematic keywords.
														
 
															-        Filters by the cluster's own event timestamp (payload.timestamp).
														
 
															-        Only includes keywords that are NOT already extracted as entities
														
 
															-        in the same cluster — the entity signal is higher quality and is
														
 
															-        already shown in the entity frequencies view.
														
 
															+        Excludes keywords that are already entities in the same cluster,
														
 
															+        and excludes DEFAULT_TOPICS labels (crypto, macro, regulation, ai, other).
														
 
															         """
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															+        _topic_labels = {t.lower() for t in DEFAULT_TOPICS}
														
 
															-        query = "SELECT payload FROM clusters"
														
 
															-        params: list = []
														
 
															         with self._store._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															-
														
 
															-        def _parse_ts(ts):
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            s = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
														
 
															-            except Exception:
														
 
															-                try:
														
 
															-                    dt = parsedate_to_datetime(s)
														
 
															-                except Exception:
														
 
															-                    return None
														
 
															-            if dt.tzinfo is None:
														
 
															-                dt = dt.replace(tzinfo=timezone.utc)
														
 
															-            return dt.astimezone(timezone.utc)
														
 
															+            rows = conn.execute("SELECT payload FROM clusters").fetchall()
														
 
															         counter: dict[str, int] = {}
														
 
															-        _topic_labels = {t.lower() for t in DEFAULT_TOPICS}
														
 
															         for (payload_text,) in rows:
														
 
															             c = json.loads(payload_text)
														
 
															-            dt = _parse_ts(c.get("timestamp"))
														
 
															-            if dt is None:
														
 
															-                continue
														
 
															-            if dt < cutoff:
														
 
															+            if (_read_ts(c.get("timestamp")) or 0.0) < cutoff_ts:
														
 
															                 continue
														
 
															-            # Get entities in this cluster to dedup against keywords
														
 
															             ents_in_cluster = {str(e).strip().lower() for e in (c.get("entities", []) or []) if str(e).strip()}
														
 
															             for kw in c.get("keywords", []):
														
 
															                 kw_str = str(kw).strip()
														
 
															                 if not kw_str:
														
 
															                     continue
														
 
															-                # Skip topic labels (crypto, macro, regulation, ai, other)
														
 
															-                # that the LLM sometimes returns as keywords.
														
 
															                 if kw_str.lower() in _topic_labels:
														
 
															                     continue
														
 
															-                # Skip keywords that are already entities in this cluster
														
 
															                 if kw_str.lower() in ents_in_cluster:
														
 
															                     continue
														
 
															                 counter[kw_str] = counter.get(kw_str, 0) + 1
														
 
															-        sorted_kws = sorted(counter.items(), key=lambda x: -x[1])[:limit]
														
 
															         return [
														
 
															             {"label": label, "count": count}
														
 
															-            for label, count in sorted_kws
														
 
															+            for label, count in sorted(counter.items(), key=lambda x: -x[1])[:limit]
														
 
															         ]
														
--- a/news_mcp/mcp_server_fastmcp.py
+++ b/news_mcp/mcp_server_fastmcp.py
@@ -9,7 +9,6 @@ import re
 
															 import time
														
 
															 from collections import Counter
														
 
															 from datetime import datetime, timezone
														
 
															-from email.utils import parsedate_to_datetime
														
 
															 from pathlib import Path
														
 
															 from fastapi import FastAPI, Form
														
@@ -85,20 +84,18 @@ def _cluster_entity_haystack(cluster: dict) -> list[str]:
 
															 def _parse_cluster_timestamp(value) -> datetime:
														
 
															+    """Parse a stored cluster timestamp.
														
 
															+
														
 
															+    payload.timestamp is guaranteed ISO 8601 UTC (YYYY-MM-DDTHH:MM:SS+00:00)
														
 
															+    at write time. Only datetime.fromisoformat is needed — no RFC 2822 fallback.
														
 
															+    """
														
 
															     if not value:
														
 
															         return datetime.min.replace(tzinfo=timezone.utc)
														
 
															     text = str(value).strip()
														
 
															     if not text:
														
 
															         return datetime.min.replace(tzinfo=timezone.utc)
														
 
															     try:
														
 
															-        dt = datetime.fromisoformat(text.replace("Z", "+00:00"))
														
 
															-        if dt.tzinfo is None:
														
 
															-            dt = dt.replace(tzinfo=timezone.utc)
														
 
															-        return dt.astimezone(timezone.utc)
														
 
															-    except Exception:
														
 
															-        pass
														
 
															-    try:
														
 
															-        dt = parsedate_to_datetime(text)
														
 
															+        dt = datetime.fromisoformat(text)
														
 
															         if dt.tzinfo is None:
														
 
															             dt = dt.replace(tzinfo=timezone.utc)
														
 
															         return dt.astimezone(timezone.utc)
														
@@ -600,24 +597,17 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
 
															     now = datetime.now(timezone.utc)
														
 
															     def _cluster_age_hours(c: dict) -> float:
														
 
															-        """Return the cluster's age in hours (approximate, from now)."""
														
 
															+        """Return the cluster's age in hours. payload.timestamp is ISO 8601 UTC guaranteed."""
														
 
															         ts = c.get("timestamp") or c.get("last_updated")
														
 
															         if not ts:
														
 
															-            return 0.0  # treat un-dated as fresh
														
 
															+            return 0.0
														
 
															         try:
														
 
															-            s = str(ts).replace("Z", "+00:00")
														
 
															-            dt = datetime.fromisoformat(s)
														
 
															+            dt = datetime.fromisoformat(str(ts).strip())
														
 
															             if dt.tzinfo is None:
														
 
															                 dt = dt.replace(tzinfo=timezone.utc)
														
 
															             return max(0.0, (now - dt.astimezone(timezone.utc)).total_seconds() / 3600.0)
														
 
															         except Exception:
														
 
															-            try:
														
 
															-                dt = parsedate_to_datetime(str(ts))
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return max(0.0, (now - dt.astimezone(timezone.utc)).total_seconds() / 3600.0)
														
 
															-            except Exception:
														
 
															-                return 0.0
														
 
															+            return 0.0
														
 
															     # Generic entity filter
														
 
															     _generic_tokens = {"news", "latest", "breaking", "update", "updates", "report", "reports"}
														
--- a/news_mcp/storage/sqlite_store.py
+++ b/news_mcp/storage/sqlite_store.py
@@ -55,6 +55,27 @@ def _normalize_ts(ts: Any) -> str:
 
															     return text
														
 
															+def _read_ts(ts: Any) -> float | None:
														
 
															+    """Parse a stored, already-normalized ISO 8601 UTC timestamp to a unix float.
														
 
															+
														
 
															+    All payload.timestamp / payload.first_seen / payload.last_updated values
														
 
															+    are guaranteed YYYY-MM-DDTHH:MM:SS+00:00 at write time (enforced by
														
 
															+    sanitize_cluster_payload → _normalize_ts).  Only datetime.fromisoformat is
														
 
															+    needed here.  Do NOT add RFC 2822 / parsedate_to_datetime fallbacks — if
														
 
															+    this function can't parse a stored timestamp it means the normalization
														
 
															+    pipeline has a bug that should be fixed there, not papered over here.
														
 
															+    """
														
 
															+    if not ts:
														
 
															+        return None
														
 
															+    try:
														
 
															+        dt = datetime.fromisoformat(str(ts).strip())
														
 
															+        if dt.tzinfo is None:
														
 
															+            dt = dt.replace(tzinfo=timezone.utc)
														
 
															+        return dt.astimezone(timezone.utc).timestamp()
														
 
															+    except Exception:
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															 @dataclass
														
 
															 class ClusterRow:
														
 
															     cluster_id: str
														
@@ -278,35 +299,12 @@ class SQLiteClusterStore:
 
															             return json.loads(row[0])
														
 
															     def get_latest_clusters(self, topic: str, ttl_hours: float, limit: int) -> list[dict]:
														
 
															-        """Return newest clusters by *their own* timestamp.
														
 
															+        """Return newest clusters by their own event timestamp (payload.timestamp).
														
 
															-        Filtering/sorting by the DB row's `updated_at` can drift away from the
														
 
															-        actual event time in `payload.timestamp`.
														
 
															+        payload.timestamp is guaranteed ISO 8601 UTC — use _read_ts, not raw
														
 
															+        JSON parsing with RFC 2822 fallbacks.
														
 
															         """
														
 
															-
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=float(ttl_hours))
														
 
															-        cutoff_ts = cutoff.timestamp()
														
 
															-
														
 
															-        def _parse_payload_ts(ts: Any) -> float | None:
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            if isinstance(ts, (int, float)):
														
 
															-                return float(ts)
														
 
															-            text = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(text.replace('Z', '+00:00'))
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return dt.astimezone(timezone.utc).timestamp()
														
 
															-            except Exception:
														
 
															-                pass
														
 
															-            try:
														
 
															-                dt = parsedate_to_datetime(text)
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return dt.astimezone(timezone.utc).timestamp()
														
 
															-            except Exception:
														
 
															-                return None
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=float(ttl_hours))).timestamp()
														
 
															         with self._conn() as conn:
														
 
															             cur = conn.execute(
														
@@ -315,57 +313,24 @@ class SQLiteClusterStore:
 
															             )
														
 
															             candidates = [json.loads(r[0]) for r in cur.fetchall()]
														
 
															-        filtered: list[dict] = []
														
 
															-        for c in candidates:
														
 
															-            ts = _parse_payload_ts(c.get("timestamp"))
														
 
															-            if ts is None:
														
 
															-                continue
														
 
															-            if ts >= cutoff_ts:
														
 
															-                filtered.append(c)
														
 
															-
														
 
															-        filtered.sort(key=lambda c: _parse_payload_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															+        filtered = [c for c in candidates if (_read_ts(c.get("timestamp")) or 0.0) >= cutoff_ts]
														
 
															+        filtered.sort(key=lambda c: _read_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															         return filtered[: int(limit)]
														
 
															     def get_latest_clusters_all_topics(self, ttl_hours: float, limit: int) -> list[dict]:
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=float(ttl_hours))
														
 
															-        cutoff_ts = cutoff.timestamp()
														
 
															+        """Return newest clusters across all topics by event timestamp.
														
 
															-        def _parse_payload_ts(ts: Any) -> float | None:
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            if isinstance(ts, (int, float)):
														
 
															-                return float(ts)
														
 
															-            text = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(text.replace('Z', '+00:00'))
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return dt.astimezone(timezone.utc).timestamp()
														
 
															-            except Exception:
														
 
															-                pass
														
 
															-            try:
														
 
															-                dt = parsedate_to_datetime(text)
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return dt.astimezone(timezone.utc).timestamp()
														
 
															-            except Exception:
														
 
															-                return None
														
 
															+        payload.timestamp is guaranteed ISO 8601 UTC — use _read_ts, not raw
														
 
															+        JSON parsing with RFC 2822 fallbacks.
														
 
															+        """
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=float(ttl_hours))).timestamp()
														
 
															         with self._conn() as conn:
														
 
															-            cur = conn.execute(
														
 
															-                "SELECT payload FROM clusters ORDER BY updated_at DESC",
														
 
															-            )
														
 
															+            cur = conn.execute("SELECT payload FROM clusters ORDER BY updated_at DESC")
														
 
															             candidates = [json.loads(r[0]) for r in cur.fetchall()]
														
 
															-        filtered: list[dict] = []
														
 
															-        for c in candidates:
														
 
															-            ts = _parse_payload_ts(c.get("timestamp"))
														
 
															-            if ts is None:
														
 
															-                continue
														
 
															-            if ts >= cutoff_ts:
														
 
															-                filtered.append(c)
														
 
															-
														
 
															-        filtered.sort(key=lambda c: _parse_payload_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															+        filtered = [c for c in candidates if (_read_ts(c.get("timestamp")) or 0.0) >= cutoff_ts]
														
 
															+        filtered.sort(key=lambda c: _read_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															         return filtered[: int(limit)]
														
 
															     def get_cluster_by_id(self, cluster_id: str) -> dict | None:
														
@@ -687,23 +652,29 @@ class SQLiteClusterStore:
 
															         limit: int = 20,
														
 
															         offset: int = 0,
														
 
															     ) -> list[dict[str, Any]]:
														
 
															-        """Paginated cluster listing for the dashboard."""
														
 
															-        cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
														
 
															-        now = datetime.now(timezone.utc).isoformat()
														
 
															-        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
														
 
															-        params: list = [cutoff, now]
														
 
															+        """Paginated cluster listing filtered by payload.timestamp (event time).
														
 
															+
														
 
															+        payload.timestamp is guaranteed ISO 8601 UTC — filtered and sorted
														
 
															+        using _read_ts, not updated_at (row modification time).
														
 
															+        """
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															+
														
 
															+        query = "SELECT payload FROM clusters"
														
 
															+        params: list = []
														
 
															         if topic and topic != "all":
														
 
															-            query += " AND topic = ?"
														
 
															+            query += " WHERE topic = ?"
														
 
															             params.append(topic)
														
 
															-        query += " ORDER BY updated_at DESC LIMIT ? OFFSET ?"
														
 
															-        params.extend([limit, offset])
														
 
															+
														
 
															         with self._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															-        clusters: list[dict[str, Any]] = []
														
 
															-        for (payload_text,) in rows:
														
 
															-            c = json.loads(payload_text)
														
 
															-            clusters.append({
														
 
															+            rows = conn.execute(query, params).fetchall()
														
 
															+
														
 
															+        filtered = [json.loads(r[0]) for r in rows]
														
 
															+        filtered = [c for c in filtered if (_read_ts(c.get("timestamp")) or 0.0) >= cutoff_ts]
														
 
															+        filtered.sort(key=lambda c: _read_ts(c.get("timestamp")) or 0.0, reverse=True)
														
 
															+        page = filtered[offset:offset + limit]
														
 
															+
														
 
															+        return [
														
 
															+            {
														
 
															                 "cluster_id": c.get("cluster_id", ""),
														
 
															                 "headline": c.get("headline", ""),
														
 
															                 "topic": c.get("topic", ""),
														
@@ -715,8 +686,9 @@ class SQLiteClusterStore:
 
															                 "timestamp": c.get("timestamp", ""),
														
 
															                 "keywords": c.get("keywords", []),
														
 
															                 "article_count": len(c.get("articles", [])),
														
 
															-            })
														
 
															-        return clusters
														
 
															+            }
														
 
															+            for c in page
														
 
															+        ]
														
 
															     def get_sentiment_series(
														
 
															             self,
														
@@ -726,45 +698,28 @@ class SQLiteClusterStore:
 
															         ) -> list[dict[str, Any]]:
														
 
															             """Sentiment score averaged per time bucket.
														
 
															-            Filters by the cluster's own event timestamp (payload.timestamp),
														
 
															-            not by updated_at which tracks row modification time.
														
 
															+            Filters by payload.timestamp (event time, ISO 8601 UTC guaranteed).
														
 
															             """
														
 
															-            cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
														
 
															+            cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															             query = "SELECT payload FROM clusters"
														
 
															             params: list = []
														
 
															             if topic and topic != "all":
														
 
															                 query += " WHERE topic = ?"
														
 
															                 params.append(topic)
														
 
															-            query += " ORDER BY updated_at ASC"
														
 
															             with self._conn() as conn:
														
 
															-                cur = conn.execute(query, params)
														
 
															-                rows = cur.fetchall()
														
 
															-
														
 
															-            def _parse_ts(ts: Any) -> datetime | None:
														
 
															-                if not ts:
														
 
															-                    return None
														
 
															-                s = str(ts).strip()
														
 
															-                try:
														
 
															-                    dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
														
 
															-                except Exception:
														
 
															-                    try:
														
 
															-                        dt = parsedate_to_datetime(s)
														
 
															-                    except Exception:
														
 
															-                        return None
														
 
															-                if dt.tzinfo is None:
														
 
															-                    dt = dt.replace(tzinfo=timezone.utc)
														
 
															-                return dt.astimezone(timezone.utc)
														
 
															+                rows = conn.execute(query, params).fetchall()
														
 
															             buckets: dict[datetime, list[float]] = {}
														
 
															             for (payload_text,) in rows:
														
 
															                 c = json.loads(payload_text)
														
 
															-                dt = _parse_ts(c.get("timestamp"))
														
 
															+                ts = _read_ts(c.get("timestamp"))
														
 
															                 score = c.get("sentimentScore")
														
 
															-                if dt is None or score is None:
														
 
															+                if ts is None or score is None:
														
 
															                     continue
														
 
															-                if dt < cutoff.replace(tzinfo=timezone.utc):
														
 
															+                if ts < cutoff_ts:
														
 
															                     continue
														
 
															+                dt = datetime.fromtimestamp(ts, tz=timezone.utc)
														
 
															                 bucket_key = dt.replace(minute=0, second=0, microsecond=0)
														
 
															                 if bucket_hours > 1:
														
 
															                     bucket_key = bucket_key.replace(
														
@@ -772,66 +727,38 @@ class SQLiteClusterStore:
 
															                     )
														
 
															                 buckets.setdefault(bucket_key, []).append(float(score))
														
 
															-            series: list[dict[str, Any]] = []
														
 
															-            for bucket_key in sorted(buckets):
														
 
															-                scores = buckets[bucket_key]
														
 
															-                series.append({
														
 
															+            return [
														
 
															+                {
														
 
															                     "time": bucket_key.isoformat(),
														
 
															                     "avg_sentiment": round(sum(scores) / len(scores), 3),
														
 
															                     "count": len(scores),
														
 
															                     "min": round(min(scores), 3),
														
 
															                     "max": round(max(scores), 3),
														
 
															-                })
														
 
															-            return series
														
 
															+                }
														
 
															+                for bucket_key, scores in sorted(buckets.items())
														
 
															+            ]
														
 
															     def get_entity_frequencies(
														
 
															         self,
														
 
															         hours: float = 24,
														
 
															         limit: int = 30,
														
 
															     ) -> list[dict[str, Any]]:
														
 
															-        """Top entities by mention count in recent clusters.
														
 
															+        """Top entities by mention count filtered by payload.timestamp (ISO 8601 UTC guaranteed)."""
														
 
															+        cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
														
 
															-        Filters by the cluster's own event timestamp (payload.timestamp),
														
 
															-        not by updated_at which tracks row modification time.
														
 
															-        """
														
 
															-        cutoff = datetime.now(timezone.utc) - timedelta(hours=hours)
														
 
															-    
														
 
															-        query = "SELECT payload FROM clusters"
														
 
															-        params: list = []
														
 
															         with self._conn() as conn:
														
 
															-            cur = conn.execute(query, params)
														
 
															-            rows = cur.fetchall()
														
 
															-    
														
 
															-        def _parse_ts(ts):
														
 
															-            if not ts:
														
 
															-                return None
														
 
															-            s = str(ts).strip()
														
 
															-            try:
														
 
															-                dt = datetime.fromisoformat(s.replace("Z", "+00:00"))
														
 
															-            except Exception:
														
 
															-                try:
														
 
															-                    from email.utils import parsedate_to_datetime
														
 
															-                    dt = parsedate_to_datetime(s)
														
 
															-                except Exception:
														
 
															-                    return None
														
 
															-            if dt.tzinfo is None:
														
 
															-                dt = dt.replace(tzinfo=timezone.utc)
														
 
															-            return dt.astimezone(timezone.utc)
														
 
															-    
														
 
															+            rows = conn.execute("SELECT payload FROM clusters").fetchall()
														
 
															+
														
 
															         counter: dict[str, int] = {}
														
 
															         for (payload_text,) in rows:
														
 
															             c = json.loads(payload_text)
														
 
															-            dt = _parse_ts(c.get("timestamp"))
														
 
															-            if dt is None:
														
 
															-                continue
														
 
															-            if dt < cutoff:
														
 
															+            if (_read_ts(c.get("timestamp")) or 0.0) < cutoff_ts:
														
 
															                 continue
														
 
															             for ent in c.get("entities", []):
														
 
															                 counter[ent] = counter.get(ent, 0) + 1
														
 
															-    
														
 
															-        sorted_entities = sorted(counter.items(), key=lambda x: -x[1])[:limit]
														
 
															+
														
 
															         result: list[dict[str, Any]] = []
														
 
															-        for label, count in sorted_entities:
														
 
															+        for label, count in sorted(counter.items(), key=lambda x: -x[1])[:limit]:
														
 
															             meta = self.get_entity_metadata(label)
														
 
															             result.append({
														
 
															                 "label": label,
														
--- a/test_news_mcp.py
+++ b/test_news_mcp.py
@@ -262,8 +262,8 @@ def test_resolve_entity_falls_back_cleanly_when_provider_unavailable(monkeypatch
 
															 def test_sort_clusters_by_recency_prefers_newer_timestamp_over_importance():
														
 
															     clusters = [
														
 
															-        {"headline": "older", "timestamp": "Wed, 01 Apr 2026 10:00:00 GMT", "importance": 0.9},
														
 
															-        {"headline": "newer", "timestamp": "Wed, 01 Apr 2026 11:00:00 GMT", "importance": 0.1},
														
 
															+        {"headline": "older", "timestamp": "2026-04-01T10:00:00+00:00", "importance": 0.9},
														
 
															+        {"headline": "newer", "timestamp": "2026-04-01T11:00:00+00:00", "importance": 0.1},
														
 
															     ]
														
 
															     sorted_clusters = _sort_clusters_by_recency(clusters)