| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- """
- config.py — loads .env and exposes typed settings for the entire project.
- No LLM calls. No side effects. Just config.
- """
- import os
- from dataclasses import dataclass
- from dotenv import load_dotenv
- load_dotenv()
- @dataclass(frozen=True)
- class Config:
- # mem0 server
- mem0_base_url: str
- mem0_agent_id: str
- # Groq
- groq_api_key: str
- groq_model: str
- # Folder paths
- books_inbox: str
- books_processing: str
- books_done: str
- books_manifests: str
- # Chunking
- chunk_size_tokens: int
- # Safety cap — docs with more sections than this are treated as flat
- # Prevents token burn on crappy OCR'd PDFs with hundreds of fake chapters
- max_sections: int
- # Throttling — delay between POSTs to spare the embedder/GPU
- ingest_delay: float
- # Logging
- log_level: str
- def load_config() -> Config:
- def require(key: str) -> str:
- val = os.getenv(key)
- if not val:
- raise EnvironmentError(f"Missing required env var: {key}")
- return val
- return Config(
- mem0_base_url=require("MEM0_BASE_URL").rstrip("/"),
- mem0_agent_id=os.getenv("MEM0_AGENT_ID", "knowledge_base"),
- groq_api_key=require("GROQ_API_KEY"),
- groq_model=os.getenv("GROQ_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"),
- books_inbox=os.getenv("BOOKS_INBOX", "./books/inbox"),
- books_processing=os.getenv("BOOKS_PROCESSING", "./books/processing"),
- books_done=os.getenv("BOOKS_DONE", "./books/done"),
- books_manifests=os.getenv("BOOKS_MANIFESTS", "./books/manifests"),
- chunk_size_tokens=int(os.getenv("CHUNK_SIZE_TOKENS", "350")),
- max_sections=int(os.getenv("MAX_SECTIONS", "60")),
- ingest_delay=float(os.getenv("INGEST_DELAY", "0.5")),
- log_level=os.getenv("LOG_LEVEL", "INFO"),
- )
- # Singleton — import this everywhere
- cfg = load_config()
|