Browse Source

Fix unbounded time range in sentiment-series, cluster listing, and entity queries

All SQL queries for sentiment-series (sqlite_store + dashboard_store),
cluster listing (get_clusters), and entity frequencies (get_entity_frequencies)
used only a lower bound (updated_at >= cutoff) with no upper bound, causing
them to scan clusters from years back instead of the requested time window.

E.g. requesting 168h with 12h buckets returned 253 buckets spanning 3 years
instead of the expected ~14 buckets for one week.

Fix: add AND updated_at <= ? (now) to all four query sites.
Lukas Goldschmidt 1 tuần trước cách đây
mục cha
commit
b939a252fe
2 tập tin đã thay đổi với 18 bổ sung12 xóa
  1. 9 6
      news_mcp/dashboard/dashboard_store.py
  2. 9 6
      news_mcp/storage/sqlite_store.py

+ 9 - 6
news_mcp/dashboard/dashboard_store.py

@@ -86,8 +86,9 @@ class DashboardStore:
         offset: int = 0,
     ) -> list[dict[str, Any]]:
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
-        query = "SELECT payload FROM clusters WHERE updated_at >= ?"
-        params: list = [cutoff]
+        now = datetime.now(timezone.utc).isoformat()
+        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
+        params: list = [cutoff, now]
         if topic and topic != "all":
             query += " AND topic = ?"
             params.append(topic)
@@ -161,8 +162,9 @@ class DashboardStore:
         bucket_hours: float = 1,
     ) -> list[dict[str, Any]]:
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
-        query = "SELECT payload FROM clusters WHERE updated_at >= ?"
-        params: list = [cutoff]
+        now = datetime.now(timezone.utc).isoformat()
+        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
+        params: list = [cutoff, now]
         if topic and topic != "all":
             query += " AND topic = ?"
             params.append(topic)
@@ -222,11 +224,12 @@ class DashboardStore:
         limit: int = 30,
     ) -> list[dict[str, Any]]:
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
+        now = datetime.now(timezone.utc).isoformat()
         with self._store._conn() as conn:
             cur = conn.execute(
-                "SELECT payload FROM clusters WHERE updated_at >= ? "
+                "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ? "
                 "ORDER BY updated_at DESC LIMIT 500",
-                (cutoff,),
+                (cutoff, now),
             )
             rows = cur.fetchall()
 

+ 9 - 6
news_mcp/storage/sqlite_store.py

@@ -643,8 +643,9 @@ class SQLiteClusterStore:
     ) -> list[dict[str, Any]]:
         """Paginated cluster listing for the dashboard."""
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
-        query = "SELECT payload FROM clusters WHERE updated_at >= ?"
-        params: list = [cutoff]
+        now = datetime.now(timezone.utc).isoformat()
+        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
+        params: list = [cutoff, now]
         if topic and topic != "all":
             query += " AND topic = ?"
             params.append(topic)
@@ -679,8 +680,9 @@ class SQLiteClusterStore:
     ) -> list[dict[str, Any]]:
         """Sentiment score averaged per time bucket."""
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
-        query = "SELECT payload FROM clusters WHERE updated_at >= ?"
-        params: list = [cutoff]
+        now = datetime.now(timezone.utc).isoformat()
+        query = "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ?"
+        params: list = [cutoff, now]
         if topic and topic != "all":
             query += " AND topic = ?"
             params.append(topic)
@@ -737,10 +739,11 @@ class SQLiteClusterStore:
     ) -> list[dict[str, Any]]:
         """Top entities by mention count in recent clusters."""
         cutoff = (datetime.now(timezone.utc) - timedelta(hours=hours)).isoformat()
+        now = datetime.now(timezone.utc).isoformat()
         with self._conn() as conn:
             cur = conn.execute(
-                "SELECT payload FROM clusters WHERE updated_at >= ? ORDER BY updated_at DESC LIMIT 500",
-                (cutoff,),
+                "SELECT payload FROM clusters WHERE updated_at >= ? AND updated_at <= ? ORDER BY updated_at DESC LIMIT 500",
+                (cutoff, now),
             )
             rows = cur.fetchall()
         counter: dict[str, int] = {}