6 дней назад · b22882c580
--- a/news_mcp/article_identity.py
+++ b/news_mcp/article_identity.py
@@ -0,0 +1,48 @@
 
															+"""Article identity and content hashing — single source of truth.
														
 
															+
														
 
															+Used by:
														
 
															+  - news_mcp.dedup.cluster  (clustering identity, orphan merge, stable cluster IDs)
														
 
															+  - news_mcp.storage.sqlite_store  (seen_articles, dedup, upsert)
														
 
															+  - scripts/backfill_seen_articles.py  (backfill)
														
 
															+"""
														
 
															+
														
 
															+from __future__ import annotations
														
 
															+
														
 
															+import hashlib
														
 
															+from typing import Any
														
 
															+from urllib.parse import urlparse
														
 
															+
														
 
															+
														
 
															+def article_key(article: dict[str, Any]) -> str:
														
 
															+    """Deterministic identity key derived from an article's URL.
														
 
															+
														
 
															+    If a URL exists, returns the last path segment (e.g. '/content/uuid' → 'uuid',
														
 
															+    '/Article/Slug/66427393' → '66427393').  Falls back to the full URL if no
														
 
															+    path segments, or to the title if no URL at all.
														
 
															+
														
 
															+    This is the primary dedup identity — two articles with the same key
														
 
															+    are considered the same article regardless of source.
														
 
															+    """
														
 
															+    url = str(article.get("url") or "").strip()
														
 
															+    if not url:
														
 
															+        return str(article.get("title") or "")
														
 
															+    try:
														
 
															+        parsed = urlparse(url)
														
 
															+        parts = [p for p in parsed.path.split("/") if p]
														
 
															+        if parts:
														
 
															+            return parts[-1]
														
 
															+    except Exception:
														
 
															+        pass
														
 
															+    return url
														
 
															+
														
 
															+
														
 
															+def article_content_hash(article: dict[str, Any]) -> str:
														
 
															+    """SHA-1 hash of title + summary for detecting content changes.
														
 
															+
														
 
															+    Used to detect in-place article updates (e.g. a stub that gets fleshed
														
 
															+    out) where the URL stays the same but the content changes.
														
 
															+    """
														
 
															+    title = str(article.get("title") or "").strip()
														
 
															+    summary = str(article.get("summary") or "").strip()
														
 
															+    material = f"{title}|{summary}"
														
 
															+    return hashlib.sha1(material.encode("utf-8")).hexdigest()
														
--- a/news_mcp/dedup/cluster.py
+++ b/news_mcp/dedup/cluster.py
@@ -6,13 +6,13 @@ import re
 
															 from datetime import datetime, timezone, timedelta
														
 
															 from difflib import SequenceMatcher
														
 
															 from typing import Any, Dict, List
														
 
															-from urllib.parse import urlparse
														
 
															 from news_mcp.config import (
														
 
															     NEWS_EMBEDDINGS_ENABLED,
														
 
															     NEWS_EMBEDDING_SIMILARITY_THRESHOLD,
														
 
															     NEWS_CLUSTER_MAX_AGE_HOURS,
														
 
															 )
														
 
															+from news_mcp.article_identity import article_key, article_content_hash
														
 
															 from news_mcp.dedup.embedding_support import cosine_similarity, ollama_embed
														
 
															 from news_mcp.sources.news_feeds import normalize_topic_from_title
														
@@ -33,18 +33,8 @@ def _title_similarity(a: str, b: str) -> float:
 
															     return SequenceMatcher(None, _normalize_title(a), _normalize_title(b)).ratio()
														
 
															-def _article_key(article: Dict[str, Any]) -> str:
														
 
															-    url = str(article.get("url") or "").strip()
														
 
															-    if not url:
														
 
															-        return str(article.get("title") or "")
														
 
															-    try:
														
 
															-        parsed = urlparse(url)
														
 
															-        parts = [p for p in parsed.path.split("/") if p]
														
 
															-        if parts:
														
 
															-            return parts[-1]
														
 
															-    except Exception:
														
 
															-        pass
														
 
															-    return url
														
 
															+# For internal use — canonical name is article_key(article) from article_identity
														
 
															+_article_key = article_key
														
 
															 def _cluster_text(a: Dict[str, Any]) -> str:
														
--- a/news_mcp/mcp_server_fastmcp.py
+++ b/news_mcp/mcp_server_fastmcp.py
@@ -349,6 +349,70 @@ async def toggle_feed(feed_url: str, enabled: bool) -> dict:
 
															     return {"ok": True, "feed_key": feed_url.strip(), "enabled": enabled, "details": updated}
														
 
															+@mcp.tool(description="Debug dedup: inspect whether an article URL was already processed, which cluster it belongs to, and what similarity signals it would produce against existing clusters.")
														
 
															+async def debug_dedup(url: str, title: str | None = None) -> dict:
														
 
															+    """Given an article URL (and optional title), report dedup status.
														
 
															+
														
 
															+    Returns:
														
 
															+      - seen: whether the article_key is in seen_articles
														
 
															+      - article_key: the identity key derived from the URL
														
 
															+      - cluster_id: which cluster it belongs to (if seen)
														
 
															+      - similarity_signals: if title is provided, compute signals against
														
 
															+        the top-N most similar existing clusters
														
 
															+    """
														
 
															+    from news_mcp.article_identity import article_key, article_content_hash
														
 
															+    from news_mcp.dedup.cluster import _title_similarity, _normalize_title, _signals, _is_match
														
 
															+    from news_mcp.config import NEWS_EMBEDDINGS_ENABLED
														
 
															+
														
 
															+    art = {"url": url, "title": title or ""}
														
 
															+    akey = article_key(art)
														
 
															+    result = {"url": url, "article_key": akey}
														
 
															+
														
 
															+    store = SQLiteClusterStore(DB_PATH)
														
 
															+    with store._conn() as conn:
														
 
															+        # Check seen_articles
														
 
															+        row = conn.execute(
														
 
															+            "SELECT cluster_id, first_seen, url FROM seen_articles WHERE article_key=?",
														
 
															+            (akey,),
														
 
															+        ).fetchone()
														
 
															+        if row:
														
 
															+            result["seen"] = True
														
 
															+            result["cluster_id"] = row[0]
														
 
															+            result["first_seen"] = row[1]
														
 
															+            result["stored_url"] = row[2]
														
 
															+        else:
														
 
															+            result["seen"] = False
														
 
															+
														
 
															+    # If title provided, compute similarity against top clusters
														
 
															+    if title:
														
 
															+        # Get recent clusters for comparison
														
 
															+        recent = store.get_latest_clusters_all_topics(ttl_hours=24, limit=20)
														
 
															+        signals_list = []
														
 
															+        for c in recent:
														
 
															+            c_title = c.get("headline", "")
														
 
															+            sigs = _signals(art, c)
														
 
															+            matched, signal_name, signal_value = _is_match(
														
 
															+                sigs, embeddings_enabled=NEWS_EMBEDDINGS_ENABLED,
														
 
															+            )
														
 
															+            signals_list.append({
														
 
															+                "cluster_id": c.get("cluster_id", "")[:12],
														
 
															+                "headline": c_title[:60],
														
 
															+                "title_sim": round(sigs["title"], 3),
														
 
															+                "jaccard": round(sigs["jaccard"], 3),
														
 
															+                "cosine": round(sigs["cosine"], 3) if sigs["cosine"] else None,
														
 
															+                "matched": matched,
														
 
															+                "match_signal": signal_name,
														
 
															+                "match_value": round(signal_value, 3) if signal_value else None,
														
 
															+            })
														
 
															+        # Sort by best title similarity
														
 
															+        signals_list.sort(key=lambda x: x["title_sim"], reverse=True)
														
 
															+        result["similarity_signals"] = signals_list[:10]
														
 
															+        result["title_threshold"] = 0.75  # DEFAULT_TITLE_THRESHOLD
														
 
															+        result["jaccard_threshold"] = 0.55  # DEFAULT_JACCARD_THRESHOLD
														
 
															+
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															 @mcp.tool(description="Investigate a topic and return the newest deduplicated news clusters with entities and thematic keywords, sorted by recency.")
														
 
															 async def get_latest_events(topic: str | None = None, limit: int = 5, include_articles: bool = False):
														
 
															     limit = max(1, min(int(limit), 20))
														
@@ -1380,6 +1444,51 @@ async def api_feed_toggle(feed_url: str = Form(), enabled: bool = Form()):
 
															         return _api_err(e, f"toggle({feed_url})")
														
 
															+# ------------------------------------------------------------------ #
														
 
															+#  Site config (dashboard-tuneable parameters)
														
 
															+# ------------------------------------------------------------------ #
														
 
															+
														
 
															+@app.get("/api/v1/config")
														
 
															+def api_config():
														
 
															+    """All site config parameters (seeded from .env/defaults)."""
														
 
															+    try:
														
 
															+        from news_mcp.site_config import get_site_config
														
 
															+        with _shared_store._conn() as conn:
														
 
															+            rows = get_site_config(conn)
														
 
															+        return {"config": rows}
														
 
															+    except Exception as e:
														
 
															+        return _api_err(e, "config")
														
 
															+
														
 
															+
														
 
															+@app.post("/api/v1/config/update")
														
 
															+async def api_config_update(key: str = Form(), value: str = Form()):
														
 
															+    """Update a single config parameter at runtime."""
														
 
															+    try:
														
 
															+        from news_mcp.site_config import set_config_value
														
 
															+        with _shared_store._conn() as conn:
														
 
															+            ok = set_config_value(conn, key.strip(), value.strip())
														
 
															+            conn.commit()
														
 
															+        if not ok:
														
 
															+            return JSONResponse(status_code=404, content={"error": f"Config key not found: {key}"})
														
 
															+        return {"ok": True, "key": key.strip(), "value": value.strip()}
														
 
															+    except Exception as e:
														
 
															+        return _api_err(e, f"config/update({key})")
														
 
															+
														
 
															+
														
 
															+@app.post("/api/v1/config/reset")
														
 
															+async def api_config_reset():
														
 
															+    """Reset all config to .env/defaults (drops and re-seeds site_config)."""
														
 
															+    try:
														
 
															+        from news_mcp.site_config import seed_site_config
														
 
															+        with _shared_store._conn() as conn:
														
 
															+            conn.execute("DELETE FROM site_config")
														
 
															+            seeded = seed_site_config(conn)
														
 
															+            conn.commit()
														
 
															+        return {"ok": True, "seeded": seeded}
														
 
															+    except Exception as e:
														
 
															+        return _api_err(e, "config/reset")
														
 
															+
														
 
															+
														
 
															 @app.get("/health")
														
 
															 def health():
														
 
															     return {
														
--- a/news_mcp/site_config.py
+++ b/news_mcp/site_config.py
@@ -0,0 +1,167 @@
 
															+"""DB-backed site configuration.
														
 
															+
														
 
															+All tunable parameters live in the `site_config` SQLite table.
														
 
															+On startup, if the table is empty, it is seeded from .env overrides
														
 
															+or Python defaults. After that, values are read from the DB — allowing
														
 
															+runtime updates via the REST API without restart.
														
 
															+
														
 
															+Categories:
														
 
															+  clustering  — similarity thresholds and merge criteria
														
 
															+  enrichment  — LLM behavior, rate limits, embedding settings
														
 
															+  retention   — pruning, age windows, refresh intervals
														
 
															+"""
														
 
															+
														
 
															+from __future__ import annotations
														
 
															+
														
 
															+import os
														
 
															+from typing import Any
														
 
															+
														
 
															+# ---------------------------------------------------------------------------
														
 
															+#  Default registry — (key, default_value, type, category, description)
														
 
															+#  The type is one of: float, int, bool, str
														
 
															+# ---------------------------------------------------------------------------
														
 
															+
														
 
															+CONFIG_DEFAULTS: list[tuple[str, str, str, str, str]] = [
														
 
															+    # Clustering thresholds
														
 
															+    ("title_threshold",             "0.75",  "float", "clustering", "Min title similarity to merge (SequenceMatcher)"),
														
 
															+    ("jaccard_threshold",           "0.55",  "float", "clustering", "Min Jaccard token overlap to merge"),
														
 
															+    ("dual_title_floor",            "0.55",  "float", "clustering", "Dual-signal: min title for title+jaccard merge"),
														
 
															+    ("dual_jaccard_floor",          "0.25",  "float", "clustering", "Dual-signal: min jaccard for title+jaccard merge"),
														
 
															+    ("early_exit_title",            "0.95",  "float", "clustering", "Early-exit title signal (both must be met)"),
														
 
															+    ("early_exit_jaccard",          "0.80",  "float", "clustering", "Early-exit jaccard signal"),
														
 
															+    ("consensus_cosine_floor",      "0.80",  "float", "clustering", "Consensus path: min cosine"),
														
 
															+    ("consensus_jaccard_floor",     "0.30",  "float", "clustering", "Consensus path: min jaccard (or title)"),
														
 
															+    ("consensus_title_floor",       "0.55",  "float", "clustering", "Consensus path: min title (or jaccard)"),
														
 
															+    ("crosstopic_title_threshold",  "0.90",  "float", "clustering", "Cross-topic merge: min title similarity"),
														
 
															+    ("embedding_similarity_threshold", "0.885", "float", "clustering", "Cosine threshold for embedding-only merge"),
														
 
															+    ("cluster_max_age_hours",       "6",     "float", "clustering", "Cross-cycle merge window (hours, 0=off)"),
														
 
															+
														
 
															+    # Enrichment/LLM
														
 
															+    ("embeddings_enabled",          "true",  "bool",  "enrichment", "Enable Ollama embedding computation"),
														
 
															+    ("ollama_base_url",             "http://192.168.0.200:11434", "str", "enrichment", "Ollama API base URL"),
														
 
															+    ("ollama_embedding_model",      "nomic-embed-text", "str", "enrichment", "Ollama embedding model name"),
														
 
															+    ("extract_provider",            "groq",  "str",   "enrichment", "LLM provider for extraction"),
														
 
															+    ("extract_model",               "llama4-16e", "str", "enrichment", "LLM model for extraction"),
														
 
															+    ("summary_provider",            "groq",  "str",   "enrichment", "LLM provider for summarisation"),
														
 
															+    ("summary_model",               "llama4-16e", "str", "enrichment", "LLM model for summarisation"),
														
 
															+    ("enrichment_max_per_refresh",  "0",     "int",   "enrichment", "Max enrichments per cycle (0=unlimited)"),
														
 
															+    ("enrich_other_topics_only",    "false", "bool",  "enrichment", "Only enrich 'other' topic (legacy guard)"),
														
 
															+
														
 
															+    # Retention/pruning
														
 
															+    ("pruning_enabled",             "true",  "bool",  "retention", "Enable periodic cluster pruning"),
														
 
															+    ("retention_days",              "10",    "float", "retention", "Delete clusters older than N days"),
														
 
															+    ("prune_interval_hours",        "12",    "float", "retention", "Run prune every N hours"),
														
 
															+    ("refresh_interval_seconds",    "300",   "int",   "retention", "Polling cycle interval (seconds)"),
														
 
															+    ("background_refresh_enabled",  "true",  "bool",  "retention", "Enable background polling"),
														
 
															+    ("default_lookback_hours",      "24",    "float", "retention", "Default lookback for read queries"),
														
 
															+]
														
 
															+
														
 
															+# ---------------------------------------------------------------------------
														
 
															+#  .env override map: CONFIG_KEY → ENV_VAR_NAME
														
 
															+#  When seeding, if the env var is set, its value overrides the default.
														
 
															+# ---------------------------------------------------------------------------
														
 
															+
														
 
															+ENV_OVERRIDES: dict[str, str] = {
														
 
															+    "title_threshold":             "NEWS_TITLE_THRESHOLD",
														
 
															+    "jaccard_threshold":           "NEWS_JACCARD_THRESHOLD",
														
 
															+    "embedding_similarity_threshold": "NEWS_EMBEDDING_SIMILARITY_THRESHOLD",
														
 
															+    "cluster_max_age_hours":       "NEWS_CLUSTER_MAX_AGE_HOURS",
														
 
															+    "embeddings_enabled":          "NEWS_EMBEDDINGS_ENABLED",
														
 
															+    "ollama_base_url":             "OLLAMA_BASE_URL",
														
 
															+    "ollama_embedding_model":      "OLLAMA_EMBEDDING_MODEL",
														
 
															+    "extract_provider":            "NEWS_EXTRACT_PROVIDER",
														
 
															+    "extract_model":               "NEWS_EXTRACT_MODEL",
														
 
															+    "summary_provider":            "NEWS_SUMMARY_PROVIDER",
														
 
															+    "summary_model":               "NEWS_SUMMARY_MODEL",
														
 
															+    "enrichment_max_per_refresh":  "ENRICHMENT_MAX_PER_REFRESH",
														
 
															+    "enrich_other_topics_only":    "ENRICH_OTHER_TOPICS_ONLY",
														
 
															+    "pruning_enabled":             "NEWS_PRUNING_ENABLED",
														
 
															+    "retention_days":              "NEWS_RETENTION_DAYS",
														
 
															+    "prune_interval_hours":        "NEWS_PRUNE_INTERVAL_HOURS",
														
 
															+    "refresh_interval_seconds":    "NEWS_REFRESH_INTERVAL_SECONDS",
														
 
															+    "background_refresh_enabled":  "NEWS_BACKGROUND_REFRESH_ENABLED",
														
 
															+    "default_lookback_hours":      "NEWS_DEFAULT_LOOKBACK_HOURS",
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def _coerce(value: str, typ: str) -> Any:
														
 
															+    """Convert a string value to its declared type."""
														
 
															+    if typ == "float":
														
 
															+        return float(value)
														
 
															+    if typ == "int":
														
 
															+        return int(value)
														
 
															+    if typ == "bool":
														
 
															+        return value.lower() in ("true", "1", "yes")
														
 
															+    return value  # str
														
 
															+
														
 
															+
														
 
															+def seed_site_config(conn) -> int:
														
 
															+    """Create the site_config table and seed it if empty.
														
 
															+
														
 
															+    Returns the number of rows inserted (0 if already seeded).
														
 
															+    """
														
 
															+    conn.execute("""
														
 
															+        CREATE TABLE IF NOT EXISTS site_config (
														
 
															+            key         TEXT PRIMARY KEY,
														
 
															+            value       TEXT NOT NULL,
														
 
															+            type        TEXT NOT NULL DEFAULT 'str',
														
 
															+            category    TEXT NOT NULL DEFAULT 'general',
														
 
															+            description TEXT NOT NULL DEFAULT '',
														
 
															+            source      TEXT NOT NULL DEFAULT 'default'
														
 
															+        )
														
 
															+    """)
														
 
															+
														
 
															+    count = conn.execute("SELECT count(*) FROM site_config").fetchone()[0]
														
 
															+    if count > 0:
														
 
															+        return 0
														
 
															+
														
 
															+    inserted = 0
														
 
															+    for key, default, typ, category, description in CONFIG_DEFAULTS:
														
 
															+        # Check .env override
														
 
															+        env_var = ENV_OVERRIDES.get(key)
														
 
															+        env_val = os.getenv(env_var) if env_var else None
														
 
															+        if env_val is not None:
														
 
															+            value = env_val
														
 
															+            source = "env"
														
 
															+        else:
														
 
															+            value = default
														
 
															+            source = "default"
														
 
															+        conn.execute(
														
 
															+            "INSERT INTO site_config(key, value, type, category, description, source) VALUES(?,?,?,?,?,?)",
														
 
															+            (key, value, typ, category, description, source),
														
 
															+        )
														
 
															+        inserted += 1
														
 
															+    return inserted
														
 
															+
														
 
															+
														
 
															+def get_site_config(conn) -> list[dict]:
														
 
															+    """Return all config rows as dicts."""
														
 
															+    conn.execute("""
														
 
															+        CREATE TABLE IF NOT EXISTS site_config (
														
 
															+            key         TEXT PRIMARY KEY,
														
 
															+            value       TEXT NOT NULL,
														
 
															+            type        TEXT NOT NULL DEFAULT 'str',
														
 
															+            category    TEXT NOT NULL DEFAULT 'general',
														
 
															+            description TEXT NOT NULL DEFAULT '',
														
 
															+            source      TEXT NOT NULL DEFAULT 'default'
														
 
															+        )
														
 
															+    """)
														
 
															+    rows = conn.execute(
														
 
															+        "SELECT key, value, type, category, description, source FROM site_config ORDER BY category, key"
														
 
															+    ).fetchall()
														
 
															+    return [
														
 
															+        {"key": r[0], "value": r[1], "type": r[2], "category": r[3], "description": r[4], "source": r[5]}
														
 
															+        for r in rows
														
 
															+    ]
														
 
															+
														
 
															+
														
 
															+def get_config_value(conn, key: str) -> str | None:
														
 
															+    """Get a single config value by key. Returns None if not found."""
														
 
															+    row = conn.execute("SELECT value FROM site_config WHERE key=?", (key,)).fetchone()
														
 
															+    return row[0] if row else None
														
 
															+
														
 
															+
														
 
															+def set_config_value(conn, key: str, value: str) -> bool:
														
 
															+    """Update a single config value. Returns True if the key existed."""
														
 
															+    cur = conn.execute("UPDATE site_config SET value=?, source='api' WHERE key=?", (value, key))
														
 
															+    return cur.rowcount > 0
														
--- a/news_mcp/storage/sqlite_store.py
+++ b/news_mcp/storage/sqlite_store.py
@@ -6,9 +6,9 @@ from dataclasses import dataclass
 
															 from datetime import datetime, timezone, timedelta
														
 
															 from pathlib import Path
														
 
															 from typing import Any
														
 
															-from urllib.parse import urlparse
														
 
															 from email.utils import parsedate_to_datetime
														
 
															+from news_mcp.article_identity import article_key
														
 
															 from news_mcp.config import (
														
 
															     NEWS_PRUNE_INTERVAL_HOURS,
														
 
															     NEWS_PRUNING_ENABLED,
														
@@ -87,19 +87,8 @@ class ClusterRow:
 
															 META_LAST_PRUNE_AT = "last_prune_at"
														
 
															-
														
 
															-def _article_key(article: dict[str, Any]) -> str:
														
 
															-    url = str(article.get("url") or "").strip()
														
 
															-    if not url:
														
 
															-        return str(article.get("title") or "")
														
 
															-    try:
														
 
															-        parsed = urlparse(url)
														
 
															-        parts = [p for p in parsed.path.split("/") if p]
														
 
															-        if parts:
														
 
															-            return parts[-1]
														
 
															-    except Exception:
														
 
															-        pass
														
 
															-    return url
														
 
															+# For internal use — canonical name is article_key(article) from article_identity
														
 
															+_article_key = article_key
														
 
															 def _dedup_articles(articles: list[dict[str, Any]]) -> list[dict[str, Any]]:
														
@@ -329,6 +318,13 @@ class SQLiteClusterStore:
 
															                 """
														
 
															             )
														
 
															+            # Seed site_config from .env / defaults (no-op if already populated)
														
 
															+            from news_mcp.site_config import seed_site_config
														
 
															+            seeded = seed_site_config(conn)
														
 
															+            if seeded:
														
 
															+                import logging
														
 
															+                logging.getLogger(__name__).info("site_config: seeded %d rows from env/defaults", seeded)
														
 
															+
														
 
															     def upsert_clusters(self, clusters: list[dict], topic: str) -> None:
														
 
															         now = datetime.now(timezone.utc)
														
 
															         with self._conn() as conn:
														
--- a/scripts/backfill_seen_articles.py
+++ b/scripts/backfill_seen_articles.py
@@ -11,21 +11,12 @@ import json
 
															 import sqlite3
														
 
															 import sys
														
 
															 from datetime import datetime, timezone
														
 
															-from urllib.parse import urlparse
														
 
															+# Add parent dir so we can import news_mcp when run as a standalone script
														
 
															+from pathlib import Path
														
 
															+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
														
 
															-def _article_key(article: dict) -> str:
														
 
															-    url = str(article.get("url") or "").strip()
														
 
															-    if not url:
														
 
															-        return str(article.get("title") or "")
														
 
															-    try:
														
 
															-        parsed = urlparse(url)
														
 
															-        parts = [p for p in parsed.path.split("/") if p]
														
 
															-        if parts:
														
 
															-            return parts[-1]
														
 
															-    except Exception:
														
 
															-        pass
														
 
															-    return url
														
 
															+from news_mcp.article_identity import article_key
														
 
															 def main(db_path: str = "./data/news.sqlite"):
														
@@ -53,7 +44,7 @@ def main(db_path: str = "./data/news.sqlite"):
 
															             skipped += 1
														
 
															             continue
														
 
															         for art in payload.get("articles", []):
														
 
															-            akey = _article_key(art)
														
 
															+            akey = article_key(art)
														
 
															             if not akey:
														
 
															                 continue
														
 
															             art_url = str(art.get("url") or "").strip()