|
|
@@ -133,20 +133,27 @@ def sanitize_cluster_payload(cluster: dict[str, Any], *, include_resolutions: bo
|
|
|
|
|
|
raw_articles = out.get("articles", []) or []
|
|
|
articles = [a for a in raw_articles if isinstance(a, dict)]
|
|
|
- # Normalize article timestamps
|
|
|
+ # Normalize article timestamps, clamping future dates to now.
|
|
|
+ now_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
|
|
for a in articles:
|
|
|
if "timestamp" in a:
|
|
|
a["timestamp"] = _normalize_ts(a["timestamp"])
|
|
|
+ if a["timestamp"] > now_str:
|
|
|
+ a["timestamp"] = now_str
|
|
|
out["articles"] = _dedup_articles(articles)
|
|
|
|
|
|
raw_entities = out.get("entities", []) or []
|
|
|
entities = normalize_entities(raw_entities)
|
|
|
out["entities"] = entities
|
|
|
|
|
|
- # Normalize cluster-level timestamps
|
|
|
+ # Normalize cluster-level timestamps, clamping future dates to now.
|
|
|
+ now_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S+00:00")
|
|
|
for field in ("timestamp", "last_updated", "first_seen"):
|
|
|
if field in out and out[field]:
|
|
|
- out[field] = _normalize_ts(out[field])
|
|
|
+ ts = _normalize_ts(out[field])
|
|
|
+ if ts > now_str:
|
|
|
+ ts = now_str
|
|
|
+ out[field] = ts
|
|
|
# Ensure timestamp is always present for the generated column index.
|
|
|
# Prefer existing timestamp, then first_seen, then last_updated, then now.
|
|
|
for src in ("timestamp", "first_seen", "last_updated"):
|