před 6 dny · b22882c580
--- a/news_mcp/article_identity.py
+++ b/news_mcp/article_identity.py
@@ -0,0 +1,48 @@
 
				+"""Article identity and content hashing — single source of truth.
			
 
				+
			
 
				+Used by:
			
 
				+  - news_mcp.dedup.cluster  (clustering identity, orphan merge, stable cluster IDs)
			
 
				+  - news_mcp.storage.sqlite_store  (seen_articles, dedup, upsert)
			
 
				+  - scripts/backfill_seen_articles.py  (backfill)
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import hashlib
			
 
				+from typing import Any
			
 
				+from urllib.parse import urlparse
			
 
				+
			
 
				+
			
 
				+def article_key(article: dict[str, Any]) -> str:
			
 
				+    """Deterministic identity key derived from an article's URL.
			
 
				+
			
 
				+    If a URL exists, returns the last path segment (e.g. '/content/uuid' → 'uuid',
			
 
				+    '/Article/Slug/66427393' → '66427393').  Falls back to the full URL if no
			
 
				+    path segments, or to the title if no URL at all.
			
 
				+
			
 
				+    This is the primary dedup identity — two articles with the same key
			
 
				+    are considered the same article regardless of source.
			
 
				+    """
			
 
				+    url = str(article.get("url") or "").strip()
			
 
				+    if not url:
			
 
				+        return str(article.get("title") or "")
			
 
				+    try:
			
 
				+        parsed = urlparse(url)
			
 
				+        parts = [p for p in parsed.path.split("/") if p]
			
 
				+        if parts:
			
 
				+            return parts[-1]
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    return url
			
 
				+
			
 
				+
			
 
				+def article_content_hash(article: dict[str, Any]) -> str:
			
 
				+    """SHA-1 hash of title + summary for detecting content changes.
			
 
				+
			
 
				+    Used to detect in-place article updates (e.g. a stub that gets fleshed
			
 
				+    out) where the URL stays the same but the content changes.
			
 
				+    """
			
 
				+    title = str(article.get("title") or "").strip()
			
 
				+    summary = str(article.get("summary") or "").strip()
			
 
				+    material = f"{title}|{summary}"
			
 
				+    return hashlib.sha1(material.encode("utf-8")).hexdigest()
			
--- a/news_mcp/dedup/cluster.py
+++ b/news_mcp/dedup/cluster.py
@@ -6,13 +6,13 @@ import re
 
				 from datetime import datetime, timezone, timedelta
			
 
				 from difflib import SequenceMatcher
			
 
				 from typing import Any, Dict, List
			
 
				-from urllib.parse import urlparse
			
 
				 
			
 
				 from news_mcp.config import (
			
 
				     NEWS_EMBEDDINGS_ENABLED,
			
 
				     NEWS_EMBEDDING_SIMILARITY_THRESHOLD,
			
 
				     NEWS_CLUSTER_MAX_AGE_HOURS,
			
 
				 )
			
 
				+from news_mcp.article_identity import article_key, article_content_hash
			
 
				 from news_mcp.dedup.embedding_support import cosine_similarity, ollama_embed
			
 
				 from news_mcp.sources.news_feeds import normalize_topic_from_title
			
 
				 
			
@@ -33,18 +33,8 @@ def _title_similarity(a: str, b: str) -> float:
 
				     return SequenceMatcher(None, _normalize_title(a), _normalize_title(b)).ratio()
			
 
				 
			
 
				 
			
 
				-def _article_key(article: Dict[str, Any]) -> str:
			
 
				-    url = str(article.get("url") or "").strip()
			
 
				-    if not url:
			
 
				-        return str(article.get("title") or "")
			
 
				-    try:
			
 
				-        parsed = urlparse(url)
			
 
				-        parts = [p for p in parsed.path.split("/") if p]
			
 
				-        if parts:
			
 
				-            return parts[-1]
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-    return url
			
 
				+# For internal use — canonical name is article_key(article) from article_identity
			
 
				+_article_key = article_key
			
 
				 
			
 
				 
			
 
				 def _cluster_text(a: Dict[str, Any]) -> str:
			
--- a/news_mcp/mcp_server_fastmcp.py
+++ b/news_mcp/mcp_server_fastmcp.py
@@ -349,6 +349,70 @@ async def toggle_feed(feed_url: str, enabled: bool) -> dict:
 
				     return {"ok": True, "feed_key": feed_url.strip(), "enabled": enabled, "details": updated}
			
 
				 
			
 
				 
			
 
				+@mcp.tool(description="Debug dedup: inspect whether an article URL was already processed, which cluster it belongs to, and what similarity signals it would produce against existing clusters.")
			
 
				+async def debug_dedup(url: str, title: str | None = None) -> dict:
			
 
				+    """Given an article URL (and optional title), report dedup status.
			
 
				+
			
 
				+    Returns:
			
 
				+      - seen: whether the article_key is in seen_articles
			
 
				+      - article_key: the identity key derived from the URL
			
 
				+      - cluster_id: which cluster it belongs to (if seen)
			
 
				+      - similarity_signals: if title is provided, compute signals against
			
 
				+        the top-N most similar existing clusters
			
 
				+    """
			
 
				+    from news_mcp.article_identity import article_key, article_content_hash
			
 
				+    from news_mcp.dedup.cluster import _title_similarity, _normalize_title, _signals, _is_match
			
 
				+    from news_mcp.config import NEWS_EMBEDDINGS_ENABLED
			
 
				+
			
 
				+    art = {"url": url, "title": title or ""}
			
 
				+    akey = article_key(art)
			
 
				+    result = {"url": url, "article_key": akey}
			
 
				+
			
 
				+    store = SQLiteClusterStore(DB_PATH)
			
 
				+    with store._conn() as conn:
			
 
				+        # Check seen_articles
			
 
				+        row = conn.execute(
			
 
				+            "SELECT cluster_id, first_seen, url FROM seen_articles WHERE article_key=?",
			
 
				+            (akey,),
			
 
				+        ).fetchone()
			
 
				+        if row:
			
 
				+            result["seen"] = True
			
 
				+            result["cluster_id"] = row[0]
			
 
				+            result["first_seen"] = row[1]
			
 
				+            result["stored_url"] = row[2]
			
 
				+        else:
			
 
				+            result["seen"] = False
			
 
				+
			
 
				+    # If title provided, compute similarity against top clusters
			
 
				+    if title:
			
 
				+        # Get recent clusters for comparison
			
 
				+        recent = store.get_latest_clusters_all_topics(ttl_hours=24, limit=20)
			
 
				+        signals_list = []
			
 
				+        for c in recent:
			
 
				+            c_title = c.get("headline", "")
			
 
				+            sigs = _signals(art, c)
			
 
				+            matched, signal_name, signal_value = _is_match(
			
 
				+                sigs, embeddings_enabled=NEWS_EMBEDDINGS_ENABLED,
			
 
				+            )
			
 
				+            signals_list.append({
			
 
				+                "cluster_id": c.get("cluster_id", "")[:12],
			
 
				+                "headline": c_title[:60],
			
 
				+                "title_sim": round(sigs["title"], 3),
			
 
				+                "jaccard": round(sigs["jaccard"], 3),
			
 
				+                "cosine": round(sigs["cosine"], 3) if sigs["cosine"] else None,
			
 
				+                "matched": matched,
			
 
				+                "match_signal": signal_name,
			
 
				+                "match_value": round(signal_value, 3) if signal_value else None,
			
 
				+            })
			
 
				+        # Sort by best title similarity
			
 
				+        signals_list.sort(key=lambda x: x["title_sim"], reverse=True)
			
 
				+        result["similarity_signals"] = signals_list[:10]
			
 
				+        result["title_threshold"] = 0.75  # DEFAULT_TITLE_THRESHOLD
			
 
				+        result["jaccard_threshold"] = 0.55  # DEFAULT_JACCARD_THRESHOLD
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				 @mcp.tool(description="Investigate a topic and return the newest deduplicated news clusters with entities and thematic keywords, sorted by recency.")
			
 
				 async def get_latest_events(topic: str | None = None, limit: int = 5, include_articles: bool = False):
			
 
				     limit = max(1, min(int(limit), 20))
			
@@ -1380,6 +1444,51 @@ async def api_feed_toggle(feed_url: str = Form(), enabled: bool = Form()):
 
				         return _api_err(e, f"toggle({feed_url})")
			
 
				 
			
 
				 
			
 
				+# ------------------------------------------------------------------ #
			
 
				+#  Site config (dashboard-tuneable parameters)
			
 
				+# ------------------------------------------------------------------ #
			
 
				+
			
 
				+@app.get("/api/v1/config")
			
 
				+def api_config():
			
 
				+    """All site config parameters (seeded from .env/defaults)."""
			
 
				+    try:
			
 
				+        from news_mcp.site_config import get_site_config
			
 
				+        with _shared_store._conn() as conn:
			
 
				+            rows = get_site_config(conn)
			
 
				+        return {"config": rows}
			
 
				+    except Exception as e:
			
 
				+        return _api_err(e, "config")
			
 
				+
			
 
				+
			
 
				+@app.post("/api/v1/config/update")
			
 
				+async def api_config_update(key: str = Form(), value: str = Form()):
			
 
				+    """Update a single config parameter at runtime."""
			
 
				+    try:
			
 
				+        from news_mcp.site_config import set_config_value
			
 
				+        with _shared_store._conn() as conn:
			
 
				+            ok = set_config_value(conn, key.strip(), value.strip())
			
 
				+            conn.commit()
			
 
				+        if not ok:
			
 
				+            return JSONResponse(status_code=404, content={"error": f"Config key not found: {key}"})
			
 
				+        return {"ok": True, "key": key.strip(), "value": value.strip()}
			
 
				+    except Exception as e:
			
 
				+        return _api_err(e, f"config/update({key})")
			
 
				+
			
 
				+
			
 
				+@app.post("/api/v1/config/reset")
			
 
				+async def api_config_reset():
			
 
				+    """Reset all config to .env/defaults (drops and re-seeds site_config)."""
			
 
				+    try:
			
 
				+        from news_mcp.site_config import seed_site_config
			
 
				+        with _shared_store._conn() as conn:
			
 
				+            conn.execute("DELETE FROM site_config")
			
 
				+            seeded = seed_site_config(conn)
			
 
				+            conn.commit()
			
 
				+        return {"ok": True, "seeded": seeded}
			
 
				+    except Exception as e:
			
 
				+        return _api_err(e, "config/reset")
			
 
				+
			
 
				+
			
 
				 @app.get("/health")
			
 
				 def health():
			
 
				     return {
			
--- a/news_mcp/site_config.py
+++ b/news_mcp/site_config.py
@@ -0,0 +1,167 @@
 
				+"""DB-backed site configuration.
			
 
				+
			
 
				+All tunable parameters live in the `site_config` SQLite table.
			
 
				+On startup, if the table is empty, it is seeded from .env overrides
			
 
				+or Python defaults. After that, values are read from the DB — allowing
			
 
				+runtime updates via the REST API without restart.
			
 
				+
			
 
				+Categories:
			
 
				+  clustering  — similarity thresholds and merge criteria
			
 
				+  enrichment  — LLM behavior, rate limits, embedding settings
			
 
				+  retention   — pruning, age windows, refresh intervals
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import os
			
 
				+from typing import Any
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+#  Default registry — (key, default_value, type, category, description)
			
 
				+#  The type is one of: float, int, bool, str
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+CONFIG_DEFAULTS: list[tuple[str, str, str, str, str]] = [
			
 
				+    # Clustering thresholds
			
 
				+    ("title_threshold",             "0.75",  "float", "clustering", "Min title similarity to merge (SequenceMatcher)"),
			
 
				+    ("jaccard_threshold",           "0.55",  "float", "clustering", "Min Jaccard token overlap to merge"),
			
 
				+    ("dual_title_floor",            "0.55",  "float", "clustering", "Dual-signal: min title for title+jaccard merge"),
			
 
				+    ("dual_jaccard_floor",          "0.25",  "float", "clustering", "Dual-signal: min jaccard for title+jaccard merge"),
			
 
				+    ("early_exit_title",            "0.95",  "float", "clustering", "Early-exit title signal (both must be met)"),
			
 
				+    ("early_exit_jaccard",          "0.80",  "float", "clustering", "Early-exit jaccard signal"),
			
 
				+    ("consensus_cosine_floor",      "0.80",  "float", "clustering", "Consensus path: min cosine"),
			
 
				+    ("consensus_jaccard_floor",     "0.30",  "float", "clustering", "Consensus path: min jaccard (or title)"),
			
 
				+    ("consensus_title_floor",       "0.55",  "float", "clustering", "Consensus path: min title (or jaccard)"),
			
 
				+    ("crosstopic_title_threshold",  "0.90",  "float", "clustering", "Cross-topic merge: min title similarity"),
			
 
				+    ("embedding_similarity_threshold", "0.885", "float", "clustering", "Cosine threshold for embedding-only merge"),
			
 
				+    ("cluster_max_age_hours",       "6",     "float", "clustering", "Cross-cycle merge window (hours, 0=off)"),
			
 
				+
			
 
				+    # Enrichment/LLM
			
 
				+    ("embeddings_enabled",          "true",  "bool",  "enrichment", "Enable Ollama embedding computation"),
			
 
				+    ("ollama_base_url",             "http://192.168.0.200:11434", "str", "enrichment", "Ollama API base URL"),
			
 
				+    ("ollama_embedding_model",      "nomic-embed-text", "str", "enrichment", "Ollama embedding model name"),
			
 
				+    ("extract_provider",            "groq",  "str",   "enrichment", "LLM provider for extraction"),
			
 
				+    ("extract_model",               "llama4-16e", "str", "enrichment", "LLM model for extraction"),
			
 
				+    ("summary_provider",            "groq",  "str",   "enrichment", "LLM provider for summarisation"),
			
 
				+    ("summary_model",               "llama4-16e", "str", "enrichment", "LLM model for summarisation"),
			
 
				+    ("enrichment_max_per_refresh",  "0",     "int",   "enrichment", "Max enrichments per cycle (0=unlimited)"),
			
 
				+    ("enrich_other_topics_only",    "false", "bool",  "enrichment", "Only enrich 'other' topic (legacy guard)"),
			
 
				+
			
 
				+    # Retention/pruning
			
 
				+    ("pruning_enabled",             "true",  "bool",  "retention", "Enable periodic cluster pruning"),
			
 
				+    ("retention_days",              "10",    "float", "retention", "Delete clusters older than N days"),
			
 
				+    ("prune_interval_hours",        "12",    "float", "retention", "Run prune every N hours"),
			
 
				+    ("refresh_interval_seconds",    "300",   "int",   "retention", "Polling cycle interval (seconds)"),
			
 
				+    ("background_refresh_enabled",  "true",  "bool",  "retention", "Enable background polling"),
			
 
				+    ("default_lookback_hours",      "24",    "float", "retention", "Default lookback for read queries"),
			
 
				+]
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+#  .env override map: CONFIG_KEY → ENV_VAR_NAME
			
 
				+#  When seeding, if the env var is set, its value overrides the default.
			
 
				+# ---------------------------------------------------------------------------
			
 
				+
			
 
				+ENV_OVERRIDES: dict[str, str] = {
			
 
				+    "title_threshold":             "NEWS_TITLE_THRESHOLD",
			
 
				+    "jaccard_threshold":           "NEWS_JACCARD_THRESHOLD",
			
 
				+    "embedding_similarity_threshold": "NEWS_EMBEDDING_SIMILARITY_THRESHOLD",
			
 
				+    "cluster_max_age_hours":       "NEWS_CLUSTER_MAX_AGE_HOURS",
			
 
				+    "embeddings_enabled":          "NEWS_EMBEDDINGS_ENABLED",
			
 
				+    "ollama_base_url":             "OLLAMA_BASE_URL",
			
 
				+    "ollama_embedding_model":      "OLLAMA_EMBEDDING_MODEL",
			
 
				+    "extract_provider":            "NEWS_EXTRACT_PROVIDER",
			
 
				+    "extract_model":               "NEWS_EXTRACT_MODEL",
			
 
				+    "summary_provider":            "NEWS_SUMMARY_PROVIDER",
			
 
				+    "summary_model":               "NEWS_SUMMARY_MODEL",
			
 
				+    "enrichment_max_per_refresh":  "ENRICHMENT_MAX_PER_REFRESH",
			
 
				+    "enrich_other_topics_only":    "ENRICH_OTHER_TOPICS_ONLY",
			
 
				+    "pruning_enabled":             "NEWS_PRUNING_ENABLED",
			
 
				+    "retention_days":              "NEWS_RETENTION_DAYS",
			
 
				+    "prune_interval_hours":        "NEWS_PRUNE_INTERVAL_HOURS",
			
 
				+    "refresh_interval_seconds":    "NEWS_REFRESH_INTERVAL_SECONDS",
			
 
				+    "background_refresh_enabled":  "NEWS_BACKGROUND_REFRESH_ENABLED",
			
 
				+    "default_lookback_hours":      "NEWS_DEFAULT_LOOKBACK_HOURS",
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _coerce(value: str, typ: str) -> Any:
			
 
				+    """Convert a string value to its declared type."""
			
 
				+    if typ == "float":
			
 
				+        return float(value)
			
 
				+    if typ == "int":
			
 
				+        return int(value)
			
 
				+    if typ == "bool":
			
 
				+        return value.lower() in ("true", "1", "yes")
			
 
				+    return value  # str
			
 
				+
			
 
				+
			
 
				+def seed_site_config(conn) -> int:
			
 
				+    """Create the site_config table and seed it if empty.
			
 
				+
			
 
				+    Returns the number of rows inserted (0 if already seeded).
			
 
				+    """
			
 
				+    conn.execute("""
			
 
				+        CREATE TABLE IF NOT EXISTS site_config (
			
 
				+            key         TEXT PRIMARY KEY,
			
 
				+            value       TEXT NOT NULL,
			
 
				+            type        TEXT NOT NULL DEFAULT 'str',
			
 
				+            category    TEXT NOT NULL DEFAULT 'general',
			
 
				+            description TEXT NOT NULL DEFAULT '',
			
 
				+            source      TEXT NOT NULL DEFAULT 'default'
			
 
				+        )
			
 
				+    """)
			
 
				+
			
 
				+    count = conn.execute("SELECT count(*) FROM site_config").fetchone()[0]
			
 
				+    if count > 0:
			
 
				+        return 0
			
 
				+
			
 
				+    inserted = 0
			
 
				+    for key, default, typ, category, description in CONFIG_DEFAULTS:
			
 
				+        # Check .env override
			
 
				+        env_var = ENV_OVERRIDES.get(key)
			
 
				+        env_val = os.getenv(env_var) if env_var else None
			
 
				+        if env_val is not None:
			
 
				+            value = env_val
			
 
				+            source = "env"
			
 
				+        else:
			
 
				+            value = default
			
 
				+            source = "default"
			
 
				+        conn.execute(
			
 
				+            "INSERT INTO site_config(key, value, type, category, description, source) VALUES(?,?,?,?,?,?)",
			
 
				+            (key, value, typ, category, description, source),
			
 
				+        )
			
 
				+        inserted += 1
			
 
				+    return inserted
			
 
				+
			
 
				+
			
 
				+def get_site_config(conn) -> list[dict]:
			
 
				+    """Return all config rows as dicts."""
			
 
				+    conn.execute("""
			
 
				+        CREATE TABLE IF NOT EXISTS site_config (
			
 
				+            key         TEXT PRIMARY KEY,
			
 
				+            value       TEXT NOT NULL,
			
 
				+            type        TEXT NOT NULL DEFAULT 'str',
			
 
				+            category    TEXT NOT NULL DEFAULT 'general',
			
 
				+            description TEXT NOT NULL DEFAULT '',
			
 
				+            source      TEXT NOT NULL DEFAULT 'default'
			
 
				+        )
			
 
				+    """)
			
 
				+    rows = conn.execute(
			
 
				+        "SELECT key, value, type, category, description, source FROM site_config ORDER BY category, key"
			
 
				+    ).fetchall()
			
 
				+    return [
			
 
				+        {"key": r[0], "value": r[1], "type": r[2], "category": r[3], "description": r[4], "source": r[5]}
			
 
				+        for r in rows
			
 
				+    ]
			
 
				+
			
 
				+
			
 
				+def get_config_value(conn, key: str) -> str | None:
			
 
				+    """Get a single config value by key. Returns None if not found."""
			
 
				+    row = conn.execute("SELECT value FROM site_config WHERE key=?", (key,)).fetchone()
			
 
				+    return row[0] if row else None
			
 
				+
			
 
				+
			
 
				+def set_config_value(conn, key: str, value: str) -> bool:
			
 
				+    """Update a single config value. Returns True if the key existed."""
			
 
				+    cur = conn.execute("UPDATE site_config SET value=?, source='api' WHERE key=?", (value, key))
			
 
				+    return cur.rowcount > 0
			
--- a/news_mcp/storage/sqlite_store.py
+++ b/news_mcp/storage/sqlite_store.py
@@ -6,9 +6,9 @@ from dataclasses import dataclass
 
				 from datetime import datetime, timezone, timedelta
			
 
				 from pathlib import Path
			
 
				 from typing import Any
			
 
				-from urllib.parse import urlparse
			
 
				 from email.utils import parsedate_to_datetime
			
 
				 
			
 
				+from news_mcp.article_identity import article_key
			
 
				 from news_mcp.config import (
			
 
				     NEWS_PRUNE_INTERVAL_HOURS,
			
 
				     NEWS_PRUNING_ENABLED,
			
@@ -87,19 +87,8 @@ class ClusterRow:
 
				 
			
 
				 META_LAST_PRUNE_AT = "last_prune_at"
			
 
				 
			
 
				-
			
 
				-def _article_key(article: dict[str, Any]) -> str:
			
 
				-    url = str(article.get("url") or "").strip()
			
 
				-    if not url:
			
 
				-        return str(article.get("title") or "")
			
 
				-    try:
			
 
				-        parsed = urlparse(url)
			
 
				-        parts = [p for p in parsed.path.split("/") if p]
			
 
				-        if parts:
			
 
				-            return parts[-1]
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-    return url
			
 
				+# For internal use — canonical name is article_key(article) from article_identity
			
 
				+_article_key = article_key
			
 
				 
			
 
				 
			
 
				 def _dedup_articles(articles: list[dict[str, Any]]) -> list[dict[str, Any]]:
			
@@ -329,6 +318,13 @@ class SQLiteClusterStore:
 
				                 """
			
 
				             )
			
 
				 
			
 
				+            # Seed site_config from .env / defaults (no-op if already populated)
			
 
				+            from news_mcp.site_config import seed_site_config
			
 
				+            seeded = seed_site_config(conn)
			
 
				+            if seeded:
			
 
				+                import logging
			
 
				+                logging.getLogger(__name__).info("site_config: seeded %d rows from env/defaults", seeded)
			
 
				+
			
 
				     def upsert_clusters(self, clusters: list[dict], topic: str) -> None:
			
 
				         now = datetime.now(timezone.utc)
			
 
				         with self._conn() as conn:
			
--- a/scripts/backfill_seen_articles.py
+++ b/scripts/backfill_seen_articles.py
@@ -11,21 +11,12 @@ import json
 
				 import sqlite3
			
 
				 import sys
			
 
				 from datetime import datetime, timezone
			
 
				-from urllib.parse import urlparse
			
 
				 
			
 
				+# Add parent dir so we can import news_mcp when run as a standalone script
			
 
				+from pathlib import Path
			
 
				+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
			
 
				 
			
 
				-def _article_key(article: dict) -> str:
			
 
				-    url = str(article.get("url") or "").strip()
			
 
				-    if not url:
			
 
				-        return str(article.get("title") or "")
			
 
				-    try:
			
 
				-        parsed = urlparse(url)
			
 
				-        parts = [p for p in parsed.path.split("/") if p]
			
 
				-        if parts:
			
 
				-            return parts[-1]
			
 
				-    except Exception:
			
 
				-        pass
			
 
				-    return url
			
 
				+from news_mcp.article_identity import article_key
			
 
				 
			
 
				 
			
 
				 def main(db_path: str = "./data/news.sqlite"):
			
@@ -53,7 +44,7 @@ def main(db_path: str = "./data/news.sqlite"):
 
				             skipped += 1
			
 
				             continue
			
 
				         for art in payload.get("articles", []):
			
 
				-            akey = _article_key(art)
			
 
				+            akey = article_key(art)
			
 
				             if not akey:
			
 
				                 continue
			
 
				             art_url = str(art.get("url") or "").strip()