Quellcode durchsuchen

fix: get_clusters_page total count now consistent with returned clusters

api_clusters endpoint was computing total via SQL updated_at filter while
get_clusters_page filtered by payload.timestamp in Python — two different
time domains causing total=6056 but only 100 clusters returned.

- get_clusters_page now returns {"clusters": [...], "total": len(filtered)}
  so total always matches the actual filtered result set
- api_clusters uses the returned total directly instead of a separate SQL
  COUNT query on updated_at
- Return type annotation updated to dict[str, Any]
Lukas Goldschmidt vor 1 Woche
Ursprung
Commit
5ffdb11fe7
2 geänderte Dateien mit 23 neuen und 31 gelöschten Zeilen
  1. 21 17
      news_mcp/dashboard/dashboard_store.py
  2. 2 14
      news_mcp/mcp_server_fastmcp.py

+ 21 - 17
news_mcp/dashboard/dashboard_store.py

@@ -84,11 +84,12 @@ class DashboardStore:
         hours: float = 24,
         limit: int = 20,
         offset: int = 0,
-    ) -> list[dict[str, Any]]:
+    ) -> dict[str, Any]:
         """Paginated cluster listing filtered by payload.timestamp (event time).
 
         payload.timestamp is guaranteed ISO 8601 UTC — uses _read_ts from
         sqlite_store. Do NOT filter by updated_at (row mod time).
+        Returns {"clusters": [...], "total": int}.
         """
         cutoff_ts = (datetime.now(timezone.utc) - timedelta(hours=hours)).timestamp()
 
@@ -106,22 +107,25 @@ class DashboardStore:
         filtered.sort(key=lambda c: _read_ts(c.get("timestamp")) or 0.0, reverse=True)
         page = filtered[offset:offset + limit]
 
-        return [
-            {
-                "cluster_id": c.get("cluster_id", ""),
-                "headline": c.get("headline", ""),
-                "topic": c.get("topic", ""),
-                "sentiment": c.get("sentiment", "neutral"),
-                "sentimentScore": c.get("sentimentScore"),
-                "importance": c.get("importance", 0),
-                "entities": c.get("entities", []),
-                "sources": c.get("sources", []),
-                "timestamp": c.get("timestamp", ""),
-                "keywords": c.get("keywords", []),
-                "article_count": len(c.get("articles", [])),
-            }
-            for c in page
-        ]
+        return {
+            "clusters": [
+                {
+                    "cluster_id": c.get("cluster_id", ""),
+                    "headline": c.get("headline", ""),
+                    "topic": c.get("topic", ""),
+                    "sentiment": c.get("sentiment", "neutral"),
+                    "sentimentScore": c.get("sentimentScore"),
+                    "importance": c.get("importance", 0),
+                    "entities": c.get("entities", []),
+                    "sources": c.get("sources", []),
+                    "timestamp": c.get("timestamp", ""),
+                    "keywords": c.get("keywords", []),
+                    "article_count": len(c.get("articles", [])),
+                }
+                for c in page
+            ],
+            "total": len(filtered),
+        }
 
     def get_cluster_detail(self, cluster_id: str) -> dict[str, Any] | None:
         with self._store._conn() as conn:

+ 2 - 14
news_mcp/mcp_server_fastmcp.py

@@ -1112,20 +1112,8 @@ def api_clusters(
     """Paginated cluster listing."""
     try:
         store = DashboardStore(_shared_store)
-        clusters = store.get_clusters_page(topic=topic, hours=hours, limit=limit, offset=offset)
-        with store._store._conn() as conn:
-            if topic and topic != "all":
-                count_row = conn.execute(
-                    "SELECT COUNT(*) FROM clusters WHERE updated_at >= datetime('now', ? || ' hours') AND topic = ?",
-                    (-hours, topic),
-                ).fetchone()
-            else:
-                count_row = conn.execute(
-                    "SELECT COUNT(*) FROM clusters WHERE updated_at >= datetime('now', ? || ' hours')",
-                    (-hours,),
-                ).fetchone()
-            total = count_row[0] if count_row else 0
-        return {"clusters": clusters, "total": total, "topic": topic or "all", "hours": hours}
+        result = store.get_clusters_page(topic=topic, hours=hours, limit=limit, offset=offset)
+        return {"clusters": result["clusters"], "total": result["total"], "topic": topic or "all", "hours": hours}
     except Exception as e:
         return _api_err(e, f"clusters(topic={topic},hours={hours})")