test_news_mcp.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. from __future__ import annotations
  2. import tempfile
  3. from pathlib import Path
  4. from news_mcp.dedup.cluster import dedup_and_cluster_articles
  5. from news_mcp.storage.sqlite_store import SQLiteClusterStore
  6. from news_mcp.enrichment.importance import compute_importance
  7. def _article(title: str, url: str = "https://example.com/x", source: str = "Src", ts: str = "Mon, 30 Mar 2026 12:00:00 GMT"):
  8. return {
  9. "title": title,
  10. "url": url,
  11. "source": source,
  12. "timestamp": ts,
  13. "summary": "summary text",
  14. }
  15. def test_dedup_merges_similar_titles():
  16. articles = [
  17. _article("Trump warns Iran war could spread"),
  18. _article("Trump warns Iran conflict could spread"),
  19. _article("Unrelated sports result"),
  20. ]
  21. clustered = dedup_and_cluster_articles(articles, similarity_threshold=0.75)
  22. # We expect the Trump/Iran items to be merged into one cluster in the same topic bucket.
  23. total_clusters = sum(len(v) for v in clustered.values())
  24. assert total_clusters == 2
  25. def test_sqlite_feed_hash_roundtrip():
  26. with tempfile.TemporaryDirectory() as td:
  27. db = Path(td) / "news.sqlite"
  28. store = SQLiteClusterStore(db)
  29. assert store.get_feed_hash("breakingthenews") is None
  30. store.set_feed_hash("breakingthenews", "abc123")
  31. assert store.get_feed_hash("breakingthenews") == "abc123"
  32. def test_sqlite_summary_cache_roundtrip():
  33. with tempfile.TemporaryDirectory() as td:
  34. db = Path(td) / "news.sqlite"
  35. store = SQLiteClusterStore(db)
  36. # Upsert a base cluster first.
  37. store.upsert_clusters([
  38. {
  39. "cluster_id": "cid1",
  40. "headline": "Headline",
  41. "summary": "Summary",
  42. "entities": ["Iran"],
  43. "sentiment": "negative",
  44. "importance": 0.5,
  45. "sources": ["BreakingTheNews"],
  46. "timestamp": "Mon, 30 Mar 2026 12:00:00 GMT",
  47. "articles": [],
  48. "first_seen": "Mon, 30 Mar 2026 12:00:00 GMT",
  49. "last_updated": "Mon, 30 Mar 2026 12:00:00 GMT",
  50. }
  51. ], topic="other")
  52. store.upsert_cluster_summary(
  53. "cid1",
  54. {
  55. "headline": "Headline",
  56. "mergedSummary": "Merged summary",
  57. "keyFacts": ["Fact 1"],
  58. "sources": ["BreakingTheNews"],
  59. },
  60. )
  61. cached = store.get_cluster_summary("cid1", ttl_hours=24)
  62. assert cached is not None
  63. assert cached["mergedSummary"] == "Merged summary"
  64. assert cached["keyFacts"] == ["Fact 1"]
  65. def test_importance_prefers_llm_signal():
  66. # Two clusters with same coverage but different sentiment magnitude.
  67. base = {
  68. "sources": ["A", "B"],
  69. "articles": [{}, {}],
  70. "sentiment": "neutral",
  71. "sentimentScore": 0.0,
  72. }
  73. pos = dict(base, sentimentScore=0.9)
  74. neg = dict(base, sentimentScore=-0.8)
  75. imp_base = compute_importance(base)
  76. imp_pos = compute_importance(pos)
  77. imp_neg = compute_importance(neg)
  78. assert imp_pos >= imp_base
  79. assert imp_neg >= imp_base