1 kuukausi sitten · 19402cf907
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -6,10 +6,10 @@ Compact noisy conversational memory stretches (especially debug sessions) into d
 
				 ## Current implementation
			
 
				 - HTTP client for custom mem0 server endpoints:
			
 
				   - `POST /memories/all`
			
 
				-  - `POST /memories`
			
 
				+  - `POST /memories/raw`
			
 
				   - `DELETE /memory/{id}`
			
 
				 - temporal clustering (`gap-minutes`)
			
 
				-- debug-arc heuristic and compact summary writer
			
 
				+- LLM segment extraction + summary writer
			
 
				 - dry-run reporting
			
 
				 
			
 
				 ## Operating mode
			
@@ -18,9 +18,9 @@ Compact noisy conversational memory stretches (especially debug sessions) into d
 
				 - bounded deletions with `--max-deletes`
			
 
				 
			
 
				 ## Design notes
			
 
				-- keep newest item in each compacted cluster as guardrail
			
 
				 - write compacted summary before deleting old entries
			
 
				-- annotate summaries with provenance metadata (`compacted_from_ids`)
			
 
				+- annotate summaries with provenance metadata (`segment_source_ids`, extraction)
			
 
				+- set `metadata.created_at` to the segment end timestamp so summary ordering stays intact
			
 
				 
			
 
				 ## Next steps
			
 
				 1. Add rollback snapshot file before apply
			
--- a/README.md
+++ b/README.md
@@ -5,15 +5,15 @@ Temporal conversational memory compactor for your custom mem0-python-server.
 
				 ## What it does
			
 
				 - pulls conversational memories for one `user_id`
			
 
				 - clusters by time windows
			
 
				-- detects likely debug-session arcs
			
 
				-- creates a compact summary memory for resolved arcs
			
 
				-- preserves extracted facts (names/phones/emails) in summary + metadata
			
 
				-- deletes older intermediate entries (safe budget + dry-run first)
			
 
				+- extracts a compact summary + structured facts per segment (Groq)
			
 
				+- writes summaries **verbatim** to `POST /memories/raw`
			
 
				+- preserves extracted facts in metadata alongside the summary
			
 
				+- injects `metadata.created_at` so summaries sort correctly without date prefixes
			
 
				+- optionally deletes older intermediate entries (safe budget + dry-run first)
			
 
				 
			
 
				 ## Safety defaults
			
 
				 - **dry-run by default** (no writes/deletes)
			
 
				 - keeps recent memories (`--min-age-days`)
			
 
				-- keeps the latest entry in each compacted cluster
			
 
				 - delete cap per run (`--max-deletes`)
			
 
				 
			
 
				 ## Run
			
@@ -27,11 +27,20 @@ Apply changes:
 
				 python3 compactor.py --user-id main --apply
			
 
				 ```
			
 
				 
			
 
				+## Environment
			
 
				+The compactor auto-loads `.env` from this directory (useful for cron). At minimum set:
			
 
				+```env
			
 
				+GROQ_API_KEY=your_key_here
			
 
				+```
			
 
				+
			
 
				 ## Useful options
			
 
				 - `--base-url http://192.168.0.200:8420`
			
 
				 - `--gap-minutes 45`
			
 
				 - `--min-age-days 2`
			
 
				 - `--max-deletes 50`
			
 
				+- `--segment-max-items 15`
			
 
				+- `--max-summaries 1` (limit created summaries for testing)
			
 
				+- `--skip-ephemeral` (skip obvious weather-like chatter)
			
 
				 
			
 
				 ## Next improvements
			
 
				 - semantic clustering (embeddings) beyond time windows
			
--- a/compactor.py
+++ b/compactor.py
@@ -4,7 +4,7 @@ Conversational memory compactor for custom mem0-python-server.
 
				 
			
 
				 Why:
			
 
				 - keep long-term conversational memory useful
			
 
				-- collapse noisy debug sessions into resolved summaries
			
 
				+- compact clusters into LLM summaries stored verbatim
			
 
				 - preserve safety with dry-run-first workflow
			
 
				 """
			
 
				 
			
@@ -14,19 +14,38 @@ import argparse
 
				 import dataclasses
			
 
				 import datetime as dt
			
 
				 import json
			
 
				+import os
			
 
				 import re
			
 
				-from typing import Any, Dict, List, Tuple
			
 
				+from typing import Any, Dict, List
			
 
				 
			
 
				 import requests
			
 
				 
			
 
				-DEBUG_HINTS = {
			
 
				-    "error", "bug", "traceback", "exception", "fix", "retry", "failed",
			
 
				-    "works", "resolved", "done", "restart", "timeout", "stack", "issue"
			
 
				+
			
 
				+def load_env_file(path: str) -> None:
			
 
				+    """Load simple KEY=VALUE pairs from a .env file into os.environ.
			
 
				+
			
 
				+    This keeps cron usage predictable without adding a dependency on python-dotenv.
			
 
				+    """
			
 
				+    if not os.path.exists(path):
			
 
				+        return
			
 
				+    with open(path, "r", encoding="utf-8") as handle:
			
 
				+        for line in handle:
			
 
				+            line = line.strip()
			
 
				+            if not line or line.startswith("#") or "=" not in line:
			
 
				+                continue
			
 
				+            key, value = line.split("=", 1)
			
 
				+            key = key.strip()
			
 
				+            value = value.strip().strip('"').strip("'")
			
 
				+            if key and key not in os.environ:
			
 
				+                os.environ[key] = value
			
 
				+
			
 
				+EPHEMERAL_HINTS = {
			
 
				+    "weather", "forecast", "temperature", "rain", "raining", "expected to stop",
			
 
				+    "wind", "humidity", "uv index", "clouds", "sunrise", "sunset",
			
 
				 }
			
 
				 
			
 
				-PHONE_RE = re.compile(r"\b\+?[0-9][0-9\-\s]{4,}[0-9]\b")
			
 
				-EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
			
 
				-NAME_RE = re.compile(r"\b(?:name is|his name is|her name is)\s+([A-Z][a-z]+)\b", re.IGNORECASE)
			
 
				+DEFAULT_ENV_PATH = os.path.join(os.path.dirname(__file__), ".env")
			
 
				+
			
 
				 
			
 
				 
			
 
				 @dataclasses.dataclass
			
@@ -73,7 +92,7 @@ class Mem0Client:
 
				 
			
 
				     def write_memory(self, user_id: str, text: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
			
 
				         r = requests.post(
			
 
				-            f"{self.base_url}/memories",
			
 
				+            f"{self.base_url}/memories/raw",
			
 
				             json={"text": text, "userId": user_id, "metadata": metadata},
			
 
				             timeout=self.timeout,
			
 
				         )
			
@@ -95,10 +114,9 @@ def normalize(text: str) -> str:
 
				     text = re.sub(r"\s+", " ", text)
			
 
				     return text
			
 
				 
			
 
				-
			
 
				-def is_debug_like(text: str) -> bool:
			
 
				-    t = normalize(text)
			
 
				-    return any(k in t for k in DEBUG_HINTS)
			
 
				+def is_ephemeral_cluster(texts: List[str]) -> bool:
			
 
				+    joined = normalize("\n".join(texts))
			
 
				+    return any(hint in joined for hint in EPHEMERAL_HINTS)
			
 
				 
			
 
				 
			
 
				 def cluster_by_time(memories: List[MemoryItem], gap_minutes: int) -> List[List[MemoryItem]]:
			
@@ -117,74 +135,86 @@ def cluster_by_time(memories: List[MemoryItem], gap_minutes: int) -> List[List[M
 
				     return clusters
			
 
				 
			
 
				 
			
 
				-def extract_facts(texts: List[str]) -> Dict[str, Any]:
			
 
				-    phones, emails, names = set(), set(), set()
			
 
				-    for t in texts:
			
 
				-        for m in PHONE_RE.findall(t):
			
 
				-            phones.add(re.sub(r"\s+", "", m))
			
 
				-        for m in EMAIL_RE.findall(t):
			
 
				-            emails.add(m)
			
 
				-        for m in NAME_RE.findall(t):
			
 
				-            names.add(m.strip().title())
			
 
				-    return {
			
 
				-        "phones": sorted(phones),
			
 
				-        "emails": sorted(emails),
			
 
				-        "names": sorted(names),
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-def cluster_has_fact_signals(texts: List[str]) -> bool:
			
 
				-    joined = "\n".join(texts)
			
 
				-    return bool(PHONE_RE.search(joined) or EMAIL_RE.search(joined) or NAME_RE.search(joined))
			
 
				-
			
 
				-
			
 
				-def summarize_cluster(cluster: List[MemoryItem]) -> Tuple[str, bool, Dict[str, Any]]:
			
 
				-    texts = [c.text.strip() for c in cluster if c.text.strip()]
			
 
				-    if not texts:
			
 
				-        return "", False, {}
			
 
				-
			
 
				-    debug_ratio = sum(1 for t in texts if is_debug_like(t)) / max(len(texts), 1)
			
 
				-    has_resolution = any(any(w in normalize(t) for w in ["resolved", "fixed", "works", "done"]) for t in texts)
			
 
				-
			
 
				-    if len(texts) < 4 or debug_ratio < 0.35 or not has_resolution:
			
 
				-        return "", False, {}
			
 
				+def format_segment(cluster: List[MemoryItem]) -> str:
			
 
				+    lines = []
			
 
				+    for item in cluster:
			
 
				+        ts = item.created_at or "unknown"
			
 
				+        text = item.text.strip().replace("\n", " ")
			
 
				+        if text:
			
 
				+            lines.append(f"[{ts}] {text}")
			
 
				+    return "\n".join(lines)
			
 
				+
			
 
				+
			
 
				+def split_cluster(cluster: List[MemoryItem], max_items: int) -> List[List[MemoryItem]]:
			
 
				+    if max_items <= 0 or len(cluster) <= max_items:
			
 
				+        return [cluster]
			
 
				+    chunks = []
			
 
				+    for i in range(0, len(cluster), max_items):
			
 
				+        chunks.append(cluster[i:i + max_items])
			
 
				+    return chunks
			
 
				+def call_groq_extract(segment_text: str, model: str, timeout: int, base_url: str) -> Dict[str, Any]:
			
 
				+    api_key = os.getenv("GROQ_API_KEY")
			
 
				+    if not api_key:
			
 
				+        raise RuntimeError("GROQ_API_KEY is not set in the environment.")
			
 
				+
			
 
				+    prompt = (
			
 
				+        "You extract structured facts and a concise summary from a chat segment. "
			
 
				+        "Return ONLY raw JSON (no code fences, no markdown) with keys: "
			
 
				+        "facts, summary, segment_kind, resolution. "
			
 
				+        "facts must include: people (list of {name, phone, email}), "
			
 
				+        "projects (list of {name, url}), urls, paths, phones, emails, names. "
			
 
				+        "Only include facts explicitly present in the segment. Do NOT infer or invent. "
			
 
				+        "Never include generic 'user' as a person. Use null for unknown phone/email. "
			
 
				+        "If no facts exist, return empty lists. "
			
 
				+        "summary should be one or two sentences. "
			
 
				+        "segment_kind: implementation|debug_arc|planning|deployment|misc. "
			
 
				+        "resolution: resolved|open|unknown."
			
 
				+    )
			
 
				 
			
 
				-    facts = extract_facts(texts)
			
 
				-    requires_fact_preservation = cluster_has_fact_signals(texts)
			
 
				-    has_extracted_facts = any(len(v) > 0 for v in facts.values())
			
 
				+    payload = {
			
 
				+        "model": model,
			
 
				+        "messages": [
			
 
				+            {"role": "system", "content": prompt},
			
 
				+            {"role": "user", "content": segment_text},
			
 
				+        ],
			
 
				+        "temperature": 0.2,
			
 
				+        "max_tokens": 600,
			
 
				+    }
			
 
				 
			
 
				-    # guardrail: if cluster appears to contain facts but we couldn't preserve them, skip compaction
			
 
				-    if requires_fact_preservation and not has_extracted_facts:
			
 
				-        return "", False, {}
			
 
				+    url = f"{base_url.rstrip('/')}/chat/completions"
			
 
				+    r = requests.post(
			
 
				+        url,
			
 
				+        headers={"Authorization": f"Bearer {api_key}"},
			
 
				+        json=payload,
			
 
				+        timeout=timeout,
			
 
				+    )
			
 
				+    if r.status_code >= 400:
			
 
				+        raise RuntimeError(f"Groq API error {r.status_code}: {r.text}")
			
 
				 
			
 
				-    head = texts[0][:280]
			
 
				-    tail = texts[-1][:280]
			
 
				+    data = r.json()
			
 
				+    content = data["choices"][0]["message"]["content"].strip()
			
 
				+    if content.startswith("```"):
			
 
				+        content = re.sub(r"^```[a-zA-Z]*\n", "", content)
			
 
				+        content = re.sub(r"```$", "", content).strip()
			
 
				+    try:
			
 
				+        return json.loads(content)
			
 
				+    except json.JSONDecodeError:
			
 
				+        return {"parse_error": True, "raw": content}
			
 
				 
			
 
				-    facts_line = ""
			
 
				-    if has_extracted_facts:
			
 
				-        facts_line = (
			
 
				-            " Preserved facts: "
			
 
				-            f"names={facts['names'] or []}, "
			
 
				-            f"phones={facts['phones'] or []}, "
			
 
				-            f"emails={facts['emails'] or []}."
			
 
				-        )
			
 
				 
			
 
				-    summary = (
			
 
				-        "[COMPACTED DEBUG ARC] "
			
 
				-        f"Started with: {head} | Final state: {tail}. "
			
 
				-        "Intermediate trial/error messages were compacted."
			
 
				-        + facts_line
			
 
				-    )
			
 
				-    return summary, True, facts
			
 
				+def is_compacted_memory(item: MemoryItem) -> bool:
			
 
				+    kind = (item.metadata or {}).get("kind")
			
 
				+    return kind in {"segment_summary", "debug_arc_summary"}
			
 
				 
			
 
				 
			
 
				 def run(args: argparse.Namespace) -> None:
			
 
				+    load_env_file(DEFAULT_ENV_PATH)
			
 
				     client = Mem0Client(args.base_url, timeout=args.timeout)
			
 
				     memories = client.all_memories(args.user_id)
			
 
				 
			
 
				     # keep very recent entries untouched
			
 
				     cutoff = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=args.min_age_days)
			
 
				-    candidates = [m for m in memories if m.created_dt < cutoff]
			
 
				+    candidates = [m for m in memories if m.created_dt < cutoff and not is_compacted_memory(m)]
			
 
				 
			
 
				     clusters = cluster_by_time(candidates, args.gap_minutes)
			
 
				 
			
@@ -193,59 +223,119 @@ def run(args: argparse.Namespace) -> None:
 
				         "total_memories": len(memories),
			
 
				         "candidates": len(candidates),
			
 
				         "clusters": len(clusters),
			
 
				+        "max_summaries": args.max_summaries,
			
 
				         "actions": [],
			
 
				     }
			
 
				 
			
 
				     delete_budget = args.max_deletes
			
 
				+    created_count = 0
			
 
				 
			
 
				     for cluster in clusters:
			
 
				-        summary, should_compact, facts = summarize_cluster(cluster)
			
 
				-        if not should_compact:
			
 
				+        texts = [c.text.strip() for c in cluster if c.text.strip()]
			
 
				+        if not texts:
			
 
				             continue
			
 
				 
			
 
				-        ids = [m.id for m in cluster if m.id]
			
 
				-        if len(ids) < 2:
			
 
				+        if len(texts) < args.segment_min_items:
			
 
				             continue
			
 
				-
			
 
				-        to_delete = ids[:-1]  # keep latest raw entry as an extra guardrail
			
 
				-        if len(to_delete) > delete_budget:
			
 
				+        if args.skip_ephemeral and is_ephemeral_cluster(texts):
			
 
				             continue
			
 
				 
			
 
				-        action = {
			
 
				-            "type": "compact_debug_arc",
			
 
				-            "cluster_size": len(cluster),
			
 
				-            "delete_ids": to_delete,
			
 
				-            "keep_id": ids[-1],
			
 
				-            "summary_preview": summary[:240],
			
 
				-            "preserved_facts": facts,
			
 
				-        }
			
 
				-        report["actions"].append(action)
			
 
				-
			
 
				-        if args.apply:
			
 
				-            metadata = {
			
 
				-                "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
			
 
				-                "compactor_version": "0.2",
			
 
				-                "compacted_from_ids": ids,
			
 
				-                "kind": "debug_arc_summary",
			
 
				-                "preserved_facts": facts,
			
 
				+        for subcluster in split_cluster(cluster, args.segment_max_items):
			
 
				+            if args.max_summaries and created_count >= args.max_summaries:
			
 
				+                break
			
 
				+
			
 
				+            segment_text = format_segment(subcluster)
			
 
				+            extraction = call_groq_extract(segment_text, args.model, args.timeout, args.groq_base_url)
			
 
				+            facts = extraction.get("facts") if isinstance(extraction, dict) else None
			
 
				+            summary = extraction.get("summary") if isinstance(extraction, dict) else ""
			
 
				+            parse_error = bool(extraction.get("parse_error")) if isinstance(extraction, dict) else True
			
 
				+
			
 
				+            has_facts = bool(facts) and any(
			
 
				+                facts.get(k) for k in ["people", "projects", "urls", "paths", "phones", "emails", "names"]
			
 
				+            )
			
 
				+            if not args.llm_report_all and not parse_error and not summary and not has_facts:
			
 
				+                continue
			
 
				+
			
 
				+            ids = [m.id for m in subcluster if m.id]
			
 
				+            segment_start = subcluster[0].created_at if subcluster else None
			
 
				+            segment_end = subcluster[-1].created_at if subcluster else None
			
 
				+            action = {
			
 
				+                "type": "segment_extract",
			
 
				+                "cluster_size": len(subcluster),
			
 
				+                "segment_preview": segment_text[:240],
			
 
				+                "extraction": extraction,
			
 
				+                "source_ids": ids,
			
 
				+                "segment_start": segment_start,
			
 
				+                "segment_end": segment_end,
			
 
				             }
			
 
				-            client.write_memory(args.user_id, summary, metadata)
			
 
				-            for mid in to_delete:
			
 
				-                client.delete_memory(mid)
			
 
				-            delete_budget -= len(to_delete)
			
 
				+            report["actions"].append(action)
			
 
				+
			
 
				+            can_create = bool(summary)
			
 
				+            if args.apply and not args.dry_run and summary and args.purge_source and len(ids) > delete_budget:
			
 
				+                can_create = False
			
 
				+            if can_create:
			
 
				+                created_count += 1
			
 
				+
			
 
				+            if args.apply and not args.dry_run and summary:
			
 
				+                if args.purge_source and len(ids) > delete_budget:
			
 
				+                    continue
			
 
				+
			
 
				+                metadata = {
			
 
				+                    "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
			
 
				+                    "compactor_version": "0.4",
			
 
				+                    "kind": "segment_summary",
			
 
				+                    "segment_source_ids": ids,
			
 
				+                    "segment_start": segment_start,
			
 
				+                    "segment_end": segment_end,
			
 
				+                    "created_at": segment_end or segment_start,
			
 
				+                    "extraction": extraction,
			
 
				+                    "model": args.model,
			
 
				+                }
			
 
				+                client.write_memory(args.user_id, summary, metadata)
			
 
				+
			
 
				+                if args.purge_source:
			
 
				+                    for mid in ids:
			
 
				+                        client.delete_memory(mid)
			
 
				+                    delete_budget -= len(ids)
			
 
				+
			
 
				+        if args.max_summaries and created_count >= args.max_summaries:
			
 
				+            break
			
 
				 
			
 
				     print(json.dumps(report, indent=2, ensure_ascii=False))
			
 
				 
			
 
				 
			
 
				 def parse_args() -> argparse.Namespace:
			
 
				-    p = argparse.ArgumentParser(description="Compacts conversational memories with temporal clustering.")
			
 
				+    examples = """
			
 
				+Examples:
			
 
				+  python3 compactor.py --user-id main
			
 
				+  python3 compactor.py --user-id main --apply
			
 
				+  python3 compactor.py --user-id main --apply --max-summaries 1
			
 
				+  python3 compactor.py --user-id main --segment-max-items 15 --skip-ephemeral
			
 
				+"""
			
 
				+    class HelpFormatter(argparse.RawDescriptionHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
			
 
				+        pass
			
 
				+
			
 
				+    p = argparse.ArgumentParser(
			
 
				+        description="Compacts conversational memories with temporal clustering.",
			
 
				+        formatter_class=HelpFormatter,
			
 
				+        epilog=examples.strip(),
			
 
				+    )
			
 
				     p.add_argument("--base-url", default="http://192.168.0.200:8420")
			
 
				     p.add_argument("--user-id", required=True)
			
 
				     p.add_argument("--apply", action="store_true", help="Apply changes. Default is dry-run.")
			
 
				+    p.add_argument("--dry-run", action="store_true", help="Force dry-run even with --apply.")
			
 
				     p.add_argument("--gap-minutes", type=int, default=45)
			
 
				-    p.add_argument("--min-age-days", type=int, default=2)
			
 
				+    p.add_argument("--min-age-days", type=int, default=7)
			
 
				     p.add_argument("--max-deletes", type=int, default=50)
			
 
				+    p.add_argument("--max-summaries", type=int, default=0, help="Limit the number of summaries created (0 = no limit).")
			
 
				     p.add_argument("--timeout", type=int, default=20)
			
 
				+    p.add_argument("--model", default="meta-llama/llama-4-scout-17b-16e-instruct")
			
 
				+    p.add_argument("--segment-min-items", type=int, default=4)
			
 
				+    p.add_argument("--segment-max-items", type=int, default=15, help="Split large clusters to reduce topic drift.")
			
 
				+    p.add_argument("--skip-ephemeral", action="store_true", help="Skip obvious ephemeral weather-like segments.")
			
 
				+    p.add_argument("--llm-report-all", action="store_true", help="Report all LLM extractions even if empty.")
			
 
				+    p.add_argument("--purge-source", action="store_true", help="Delete source memories after writing a summary.")
			
 
				+    p.add_argument("--groq-base-url", default="https://api.groq.com/openai/v1")
			
 
				     return p.parse_args()