lucky
/
book-ingestor


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
							"""
config.py — loads .env and exposes typed settings for the entire project.
No LLM calls. No side effects. Just config.
"""

import os
from dataclasses import dataclass
from dotenv import load_dotenv

load_dotenv()


@dataclass(frozen=True)
class Config:
    # mem0 server
    mem0_base_url: str
    mem0_agent_id: str

    # Groq
    groq_api_key: str
    groq_model: str

    # Folder paths
    books_inbox: str
    books_processing: str
    books_done: str
    books_manifests: str

    # Chunking
    chunk_size_tokens: int

    # Safety cap — docs with more sections than this are treated as flat
    # Prevents token burn on crappy OCR'd PDFs with hundreds of fake chapters
    max_sections: int

    # Throttling — delay between POSTs to spare the embedder/GPU
    ingest_delay: float

    # Logging
    log_level: str


def load_config() -> Config:
    def require(key: str) -> str:
        val = os.getenv(key)
        if not val:
            raise EnvironmentError(f"Missing required env var: {key}")
        return val

    return Config(
        mem0_base_url=require("MEM0_BASE_URL").rstrip("/"),
        mem0_agent_id=os.getenv("MEM0_AGENT_ID", "knowledge_base"),
        groq_api_key=require("GROQ_API_KEY"),
        groq_model=os.getenv("GROQ_MODEL", "meta-llama/llama-4-scout-17b-16e-instruct"),
        books_inbox=os.getenv("BOOKS_INBOX", "./books/inbox"),
        books_processing=os.getenv("BOOKS_PROCESSING", "./books/processing"),
        books_done=os.getenv("BOOKS_DONE", "./books/done"),
        books_manifests=os.getenv("BOOKS_MANIFESTS", "./books/manifests"),
        chunk_size_tokens=int(os.getenv("CHUNK_SIZE_TOKENS", "350")),
        max_sections=int(os.getenv("MAX_SECTIONS", "60")),
        ingest_delay=float(os.getenv("INGEST_DELAY", "0.5")),
        log_level=os.getenv("LOG_LEVEL", "INFO"),
    )


# Singleton — import this everywhere
cfg = load_config()