live_tests.sh 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/usr/bin/env bash
  2. set -euo pipefail
  3. ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  4. PYTHON_BIN="${PYTHON_BIN:-$ROOT_DIR/.venv/bin/python}"
  5. if [[ -f "$ROOT_DIR/.env" ]]; then
  6. set -a
  7. # shellcheck disable=SC1090
  8. source "$ROOT_DIR/.env"
  9. set +a
  10. fi
  11. if [[ ! -x "$PYTHON_BIN" ]]; then
  12. echo "ERROR: python not found at $PYTHON_BIN" >&2
  13. exit 1
  14. fi
  15. if [[ -z "${NEWS_EXTRACT_PROVIDER:-}" ]]; then
  16. if [[ -n "${OPENAI_API_KEY:-}" ]]; then
  17. export NEWS_EXTRACT_PROVIDER="openai"
  18. elif [[ -n "${GROQ_API_KEY:-}" ]]; then
  19. export NEWS_EXTRACT_PROVIDER="groq"
  20. else
  21. export NEWS_EXTRACT_PROVIDER="openai"
  22. fi
  23. fi
  24. case "${NEWS_EXTRACT_PROVIDER}" in
  25. openai)
  26. export NEWS_EXTRACT_MODEL="${NEWS_EXTRACT_MODEL:-gpt-5-nano-2025-08-07}"
  27. ;;
  28. groq)
  29. export NEWS_EXTRACT_MODEL="${NEWS_EXTRACT_MODEL:-llama4-16e}"
  30. ;;
  31. esac
  32. export ENTITY_BLACKLIST="${ENTITY_BLACKLIST:-}"
  33. case "${NEWS_EXTRACT_PROVIDER}" in
  34. openai)
  35. if [[ -z "${OPENAI_API_KEY:-}" ]]; then
  36. echo "ERROR: OPENAI_API_KEY is not set, so the live OpenAI extraction test cannot run." >&2
  37. exit 4
  38. fi
  39. ;;
  40. groq)
  41. if [[ -z "${GROQ_API_KEY:-}" ]]; then
  42. echo "ERROR: GROQ_API_KEY is not set, so the live Groq extraction test cannot run." >&2
  43. exit 4
  44. fi
  45. ;;
  46. esac
  47. "$PYTHON_BIN" - <<'PY'
  48. import asyncio
  49. import json
  50. import os
  51. import sys
  52. from news_mcp.llm import call_extraction
  53. cluster = {
  54. "headline": "Reuters says Bitcoin, Ethereum, the Fed, and the ECB reacted as Trump and the EU discussed Iran and Israel",
  55. "summary": (
  56. "In a fictional test report, Reuters described Bitcoin, Ethereum, "
  57. "the Federal Reserve, and the European Central Bank. Trump, the EU, "
  58. "Iran, and Israel were all mentioned in the same narrative."
  59. ),
  60. "articles": [
  61. {
  62. "title": "Reuters says Bitcoin, Ethereum, the Fed, and the ECB reacted as Trump and the EU discussed Iran and Israel",
  63. "url": "https://example.com/test",
  64. "source": "TestSource",
  65. "timestamp": "Tue, 31 Mar 2026 12:00:00 GMT",
  66. "summary": "A fabricated test story involving several named entities.",
  67. }
  68. ],
  69. }
  70. # Quantifiable acceptance set: the model may canonicalize some entities,
  71. # but it must recover the core set below.
  72. expected_any = {
  73. "Reuters",
  74. "Bitcoin",
  75. "Ethereum",
  76. "Fed",
  77. "ECB",
  78. "Trump",
  79. "EU",
  80. "Iran",
  81. "Israel",
  82. }
  83. canonical_map = {
  84. "federal reserve": "Fed",
  85. "federalreserve": "Fed",
  86. "european central bank": "ECB",
  87. "ecb": "ECB",
  88. "european union": "EU",
  89. "eu": "EU",
  90. "donald trump": "Trump",
  91. "trump": "Trump",
  92. }
  93. async def main() -> int:
  94. out = await call_extraction(cluster)
  95. entities = out.get("entities", [])
  96. normalized = set()
  97. for ent in entities:
  98. key = str(ent).strip().lower()
  99. normalized.add(canonical_map.get(key, str(ent).strip()))
  100. missing = sorted(expected_any - normalized)
  101. extra = sorted(normalized - expected_any)
  102. print(json.dumps({
  103. "provider": os.getenv("NEWS_EXTRACT_PROVIDER"),
  104. "model": os.getenv("NEWS_EXTRACT_MODEL"),
  105. "output": out,
  106. "normalized_entities": sorted(normalized),
  107. "missing": missing,
  108. "extra": extra,
  109. }, ensure_ascii=False, indent=2))
  110. if missing:
  111. print(f"FAIL: missing entities: {missing}", file=sys.stderr)
  112. return 2
  113. # Extra entities are tolerated only if they are generic / helpful.
  114. allowed_extras = {"Macro", "Crypto"}
  115. bad_extra = [e for e in extra if e not in allowed_extras]
  116. if bad_extra:
  117. print(f"FAIL: unexpected extra entities: {bad_extra}", file=sys.stderr)
  118. return 3
  119. print("PASS: live extraction smoke test matched expected core entities")
  120. return 0
  121. raise SystemExit(asyncio.run(main()))
  122. PY