config.py 4.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import os
  2. from pathlib import Path
  3. from dotenv import load_dotenv
  4. _HERE = Path(__file__).resolve().parent.parent
  5. load_dotenv(_HERE / ".env")
  6. DATA_DIR = Path(os.getenv("NEWS_MCP_DATA_DIR", Path(__file__).resolve().parent / "data"))
  7. DATA_DIR.mkdir(parents=True, exist_ok=True)
  8. DB_PATH = Path(os.getenv("NEWS_MCP_DB_PATH", str(DATA_DIR / "news.sqlite")))
  9. PROMPTS_DIR = Path(os.getenv("NEWS_PROMPTS_DIR", str(_HERE / "prompts")))
  10. ENTITY_ALIASES_FILE = Path(os.getenv("NEWS_ENTITY_ALIASES_FILE", str(_HERE / "config" / "entity_aliases.json")))
  11. NEWS_FEED_URL = os.getenv("NEWS_FEED_URL", os.getenv("NEWS_RSS_FEED_URL", "https://breakingthenews.net/news-feed.xml"))
  12. NEWS_FEED_URLS = os.getenv("NEWS_FEED_URLS", os.getenv("NEWS_RSS_FEED_URLS", "")).strip()
  13. RSS_FEED_URL = NEWS_FEED_URL
  14. RSS_FEED_URLS = NEWS_FEED_URLS
  15. NEWS_FEED_ITEMS_PER_POLL = int(os.getenv("NEWS_FEED_ITEMS_PER_POLL", "50"))
  16. DEFAULT_LOOKBACK_HOURS = float(os.getenv("NEWS_DEFAULT_LOOKBACK_HOURS", os.getenv("NEWS_CLUSTERS_TTL_HOURS", "24")))
  17. DEFAULT_TOPICS = ["crypto", "macro", "regulation", "ai", "other"]
  18. # LLM API keys (provider-specific keys only)
  19. GROQ_API_KEY = os.getenv("GROQ_API_KEY")
  20. OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
  21. OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
  22. # LLM provider/model selection (provider-agnostic)
  23. NEWS_EXTRACT_PROVIDER = os.getenv("NEWS_EXTRACT_PROVIDER", "groq")
  24. NEWS_EXTRACT_MODEL = os.getenv("NEWS_EXTRACT_MODEL", "llama4-16e")
  25. NEWS_SUMMARY_PROVIDER = os.getenv("NEWS_SUMMARY_PROVIDER", "groq")
  26. NEWS_SUMMARY_MODEL = os.getenv("NEWS_SUMMARY_MODEL", "llama4-16e")
  27. # LLM behavior
  28. LLM_DEBUG = os.getenv("LLM_DEBUG", "false").lower() == "true"
  29. NEWS_ENTITY_BLACKLIST = [x.strip().lower() for x in os.getenv("ENTITY_BLACKLIST", "").split(",") if x.strip()]
  30. # Enrichment: 0 = no limit (enrich every cluster); >0 caps per refresh cycle
  31. ENRICHMENT_MAX_PER_REFRESH = int(os.getenv("ENRICHMENT_MAX_PER_REFRESH", "0"))
  32. # When true, only the "other" topic gets LLM enrichment (legacy guard)
  33. ENRICH_OTHER_TOPICS_ONLY = os.getenv("ENRICH_OTHER_TOPICS_ONLY", "false").lower() == "true"
  34. # Optional embeddings path (Ollama first when enabled, fallback otherwise).
  35. NEWS_EMBEDDINGS_ENABLED = os.getenv("NEWS_EMBEDDINGS_ENABLED", "false").lower() == "true"
  36. OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", os.getenv("OLLAMA_URL", "http://127.0.0.1:11434"))
  37. OLLAMA_EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
  38. NEWS_EMBEDDING_SIMILARITY_THRESHOLD = float(os.getenv("NEWS_EMBEDDING_SIMILARITY_THRESHOLD", "0.885"))
  39. NEWS_REFRESH_INTERVAL_SECONDS = int(os.getenv("NEWS_REFRESH_INTERVAL_SECONDS", "900"))
  40. NEWS_BACKGROUND_REFRESH_ENABLED = os.getenv("NEWS_BACKGROUND_REFRESH_ENABLED", "true").lower() == "true"
  41. NEWS_BACKGROUND_REFRESH_ON_START = os.getenv("NEWS_BACKGROUND_REFRESH_ON_START", "true").lower() == "true"
  42. NEWS_PRUNING_ENABLED = os.getenv("NEWS_PRUNING_ENABLED", "true").lower() == "true"
  43. NEWS_RETENTION_DAYS = float(os.getenv("NEWS_RETENTION_DAYS", "180"))
  44. NEWS_PRUNE_INTERVAL_HOURS = float(os.getenv("NEWS_PRUNE_INTERVAL_HOURS", "24"))
  45. # ---------------------------------------------------------------------------
  46. # Concurrency controls
  47. # ---------------------------------------------------------------------------
  48. # Maximum concurrent outbound LLM API calls per provider.
  49. # Defaults are conservative for free tiers; override via env if you have
  50. # higher rate limits or are on a paid plan.
  51. _NEEDLE_DEFAULT_CONCURRENCY = {
  52. "openrouter": 2,
  53. "openai": 5,
  54. "groq": 8,
  55. }
  56. _NEEDLE_RSS_MAX_CONCURRENCY = int(os.getenv("NEWS_RSS_MAX_CONCURRENCY", "10"))
  57. _NEEDLE_OLLAMA_MAX_CONCURRENCY = int(os.getenv("NEWS_OLLAMA_MAX_CONCURRENCY", "4"))
  58. def llm_concurrency(provider: str) -> int:
  59. """Return the max concurrent LLM calls for *provider*.
  60. Reads from ``NEWS_LLM_CONCURRENCY_<PROVIDER>`` env var first (e.g.
  61. ``NEWS_LLM_CONCURRENCY_OPENROUTER``), then falls back to the built-in
  62. default map.
  63. """
  64. provider = provider.strip().lower()
  65. env_key = f"NEWS_LLM_CONCURRENCY_{provider.upper()}"
  66. env_val = os.getenv(env_key)
  67. if env_val is not None:
  68. try:
  69. return max(1, int(env_val))
  70. except ValueError:
  71. pass
  72. return _NEEDLE_DEFAULT_CONCURRENCY.get(provider, 3)