|
|
@@ -10,6 +10,7 @@ Server expects: { text, user_id, metadata, infer }
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import logging
|
|
|
+import time
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
import requests
|
|
|
@@ -80,12 +81,18 @@ def write_content_chunk(chunk: Chunk, doc_title: str) -> str | None:
|
|
|
|
|
|
|
|
|
def write_content_chunks_batch(chunks: list[Chunk], doc_title: str) -> list[str]:
|
|
|
- """POST multiple content chunks. Returns list of successful memory IDs."""
|
|
|
+ """
|
|
|
+ POST multiple content chunks. Returns list of successful memory IDs.
|
|
|
+ Throttled by INGEST_DELAY to avoid hammering the Ollama embedder.
|
|
|
+ """
|
|
|
ids = []
|
|
|
- for chunk in chunks:
|
|
|
+ for i, chunk in enumerate(chunks):
|
|
|
mem_id = write_content_chunk(chunk, doc_title)
|
|
|
if mem_id:
|
|
|
ids.append(mem_id)
|
|
|
+ # Throttle after every chunk except the last — give the GPU breathing room
|
|
|
+ if cfg.ingest_delay > 0 and i < len(chunks) - 1:
|
|
|
+ time.sleep(cfg.ingest_delay)
|
|
|
return ids
|
|
|
|
|
|
|