test_news_mcp.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. from __future__ import annotations
  2. import tempfile
  3. from pathlib import Path
  4. from news_mcp.dedup.cluster import dedup_and_cluster_articles
  5. from news_mcp.storage.sqlite_store import SQLiteClusterStore
  6. def _article(title: str, url: str = "https://example.com/x", source: str = "Src", ts: str = "Mon, 30 Mar 2026 12:00:00 GMT"):
  7. return {
  8. "title": title,
  9. "url": url,
  10. "source": source,
  11. "timestamp": ts,
  12. "summary": "summary text",
  13. }
  14. def test_dedup_merges_similar_titles():
  15. articles = [
  16. _article("Trump warns Iran war could spread"),
  17. _article("Trump warns Iran conflict could spread"),
  18. _article("Unrelated sports result"),
  19. ]
  20. clustered = dedup_and_cluster_articles(articles, similarity_threshold=0.75)
  21. # We expect the Trump/Iran items to be merged into one cluster in the same topic bucket.
  22. total_clusters = sum(len(v) for v in clustered.values())
  23. assert total_clusters == 2
  24. def test_sqlite_feed_hash_roundtrip():
  25. with tempfile.TemporaryDirectory() as td:
  26. db = Path(td) / "news.sqlite"
  27. store = SQLiteClusterStore(db)
  28. assert store.get_feed_hash("breakingthenews") is None
  29. store.set_feed_hash("breakingthenews", "abc123")
  30. assert store.get_feed_hash("breakingthenews") == "abc123"
  31. def test_sqlite_summary_cache_roundtrip():
  32. with tempfile.TemporaryDirectory() as td:
  33. db = Path(td) / "news.sqlite"
  34. store = SQLiteClusterStore(db)
  35. # Upsert a base cluster first.
  36. store.upsert_clusters([
  37. {
  38. "cluster_id": "cid1",
  39. "headline": "Headline",
  40. "summary": "Summary",
  41. "entities": ["Iran"],
  42. "sentiment": "negative",
  43. "importance": 0.5,
  44. "sources": ["BreakingTheNews"],
  45. "timestamp": "Mon, 30 Mar 2026 12:00:00 GMT",
  46. "articles": [],
  47. "first_seen": "Mon, 30 Mar 2026 12:00:00 GMT",
  48. "last_updated": "Mon, 30 Mar 2026 12:00:00 GMT",
  49. }
  50. ], topic="other")
  51. store.upsert_cluster_summary(
  52. "cid1",
  53. {
  54. "headline": "Headline",
  55. "mergedSummary": "Merged summary",
  56. "keyFacts": ["Fact 1"],
  57. "sources": ["BreakingTheNews"],
  58. },
  59. )
  60. cached = store.get_cluster_summary("cid1", ttl_hours=24)
  61. assert cached is not None
  62. assert cached["mergedSummary"] == "Merged summary"
  63. assert cached["keyFacts"] == ["Fact 1"]