hace 4 meses · 8220079693
--- a/.env.example
+++ b/.env.example
@@ -15,5 +15,10 @@ BOOKS_MANIFESTS=./books/manifests
 
				 # Chunking
			
 
				 CHUNK_SIZE_TOKENS=350
			
 
				 
			
 
				+INGEST_DELAY=0.5   # default — 2 chunks/sec, gentle on the GPU
			
 
				+#INGEST_DELAY=1.0   # 1 chunk/sec — if GPU still runs hot
			
 
				+#INGEST_DELAY=0.2   # faster — if GPU handles it fine
			
 
				+#INGEST_DELAY=0.0   # no throttle — full speed, GPU on its own
			
 
				+
			
 
				 # Logging: DEBUG | INFO | WARNING
			
 
				 LOG_LEVEL=INFO
			
--- a/book_ingestor/config.py
+++ b/book_ingestor/config.py
@@ -29,6 +29,9 @@ class Config:
 
				     # Chunking
			
 
				     chunk_size_tokens: int
			
 
				 
			
 
				+    # Throttling — delay between POSTs to spare the embedder/GPU
			
 
				+    ingest_delay: float
			
 
				+
			
 
				     # Logging
			
 
				     log_level: str
			
 
				 
			
@@ -50,9 +53,10 @@ def load_config() -> Config:
 
				         books_done=os.getenv("BOOKS_DONE", "./books/done"),
			
 
				         books_manifests=os.getenv("BOOKS_MANIFESTS", "./books/manifests"),
			
 
				         chunk_size_tokens=int(os.getenv("CHUNK_SIZE_TOKENS", "350")),
			
 
				+        ingest_delay=float(os.getenv("INGEST_DELAY", "0.5")),
			
 
				         log_level=os.getenv("LOG_LEVEL", "INFO"),
			
 
				     )
			
 
				 
			
 
				 
			
 
				 # Singleton — import this everywhere
			
 
				-cfg = load_config()
			
 
				+cfg = load_config()
			
--- a/book_ingestor/mem0_writer.py
+++ b/book_ingestor/mem0_writer.py
@@ -10,6 +10,7 @@ Server expects: { text, user_id, metadata, infer }
 
				 from __future__ import annotations
			
 
				 
			
 
				 import logging
			
 
				+import time
			
 
				 from datetime import datetime, timezone
			
 
				 
			
 
				 import requests
			
@@ -80,12 +81,18 @@ def write_content_chunk(chunk: Chunk, doc_title: str) -> str | None:
 
				 
			
 
				 
			
 
				 def write_content_chunks_batch(chunks: list[Chunk], doc_title: str) -> list[str]:
			
 
				-    """POST multiple content chunks. Returns list of successful memory IDs."""
			
 
				+    """
			
 
				+    POST multiple content chunks. Returns list of successful memory IDs.
			
 
				+    Throttled by INGEST_DELAY to avoid hammering the Ollama embedder.
			
 
				+    """
			
 
				     ids = []
			
 
				-    for chunk in chunks:
			
 
				+    for i, chunk in enumerate(chunks):
			
 
				         mem_id = write_content_chunk(chunk, doc_title)
			
 
				         if mem_id:
			
 
				             ids.append(mem_id)
			
 
				+        # Throttle after every chunk except the last — give the GPU breathing room
			
 
				+        if cfg.ingest_delay > 0 and i < len(chunks) - 1:
			
 
				+            time.sleep(cfg.ingest_delay)
			
 
				     return ids