lucky
/
news-mcp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
							from __future__ import annotations

import tempfile
from pathlib import Path

from news_mcp.dedup.cluster import dedup_and_cluster_articles
from news_mcp.storage.sqlite_store import SQLiteClusterStore
from news_mcp.enrichment.importance import compute_importance


def _article(title: str, url: str = "https://example.com/x", source: str = "Src", ts: str = "Mon, 30 Mar 2026 12:00:00 GMT"):
    return {
        "title": title,
        "url": url,
        "source": source,
        "timestamp": ts,
        "summary": "summary text",
    }


def test_dedup_merges_similar_titles():
    articles = [
        _article("Trump warns Iran war could spread"),
        _article("Trump warns Iran conflict could spread"),
        _article("Unrelated sports result"),
    ]
    clustered = dedup_and_cluster_articles(articles, similarity_threshold=0.75)
    # We expect the Trump/Iran items to be merged into one cluster in the same topic bucket.
    total_clusters = sum(len(v) for v in clustered.values())
    assert total_clusters == 2


def test_sqlite_feed_hash_roundtrip():
    with tempfile.TemporaryDirectory() as td:
        db = Path(td) / "news.sqlite"
        store = SQLiteClusterStore(db)
        assert store.get_feed_hash("breakingthenews") is None
        store.set_feed_hash("breakingthenews", "abc123")
        assert store.get_feed_hash("breakingthenews") == "abc123"


def test_sqlite_summary_cache_roundtrip():
    with tempfile.TemporaryDirectory() as td:
        db = Path(td) / "news.sqlite"
        store = SQLiteClusterStore(db)
        # Upsert a base cluster first.
        store.upsert_clusters([
            {
                "cluster_id": "cid1",
                "headline": "Headline",
                "summary": "Summary",
                "entities": ["Iran"],
                "sentiment": "negative",
                "importance": 0.5,
                "sources": ["BreakingTheNews"],
                "timestamp": "Mon, 30 Mar 2026 12:00:00 GMT",
                "articles": [],
                "first_seen": "Mon, 30 Mar 2026 12:00:00 GMT",
                "last_updated": "Mon, 30 Mar 2026 12:00:00 GMT",
            }
        ], topic="other")
        store.upsert_cluster_summary(
            "cid1",
            {
                "headline": "Headline",
                "mergedSummary": "Merged summary",
                "keyFacts": ["Fact 1"],
                "sources": ["BreakingTheNews"],
            },
        )
        cached = store.get_cluster_summary("cid1", ttl_hours=24)
        assert cached is not None
        assert cached["mergedSummary"] == "Merged summary"
        assert cached["keyFacts"] == ["Fact 1"]


def test_importance_prefers_llm_signal():
    # Two clusters with same coverage but different sentiment magnitude.
    base = {
        "sources": ["A", "B"],
        "articles": [{}, {}],
        "sentiment": "neutral",
        "sentimentScore": 0.0,
    }
    pos = dict(base, sentimentScore=0.9)
    neg = dict(base, sentimentScore=-0.8)

    imp_base = compute_importance(base)
    imp_pos = compute_importance(pos)
    imp_neg = compute_importance(neg)

    assert imp_pos >= imp_base
    assert imp_neg >= imp_base