1 mesiac pred · eec04b2e39
--- a/PROJECT.md
+++ b/PROJECT.md
@@ -19,6 +19,7 @@ Compact noisy conversational memory stretches (especially debug sessions) into d
 
				 
			
 
				 ## Design notes
			
 
				 - write compacted summary before deleting old entries
			
 
				+- append extracted facts into the summary text (non-empty only)
			
 
				 - annotate summaries with provenance metadata (`segment_source_ids`, extraction)
			
 
				 - set `metadata.created_at` to the segment end timestamp so summary ordering stays intact
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -7,7 +7,8 @@ Temporal conversational memory compactor for your custom mem0-python-server.
 
				 - clusters by time windows
			
 
				 - extracts a compact summary + structured facts per segment (Groq)
			
 
				 - writes summaries **verbatim** to `POST /memories/raw`
			
 
				-- preserves extracted facts in metadata alongside the summary
			
 
				+- appends extracted facts into the summary text (non-empty only)
			
 
				+- preserves full extracted facts in metadata alongside the summary
			
 
				 - injects `metadata.created_at` so summaries sort correctly without date prefixes
			
 
				 - optionally deletes older intermediate entries (safe budget + dry-run first)
			
 
				 
			
--- a/compactor.py
+++ b/compactor.py
@@ -158,15 +158,18 @@ def call_groq_extract(segment_text: str, model: str, timeout: int, base_url: str
 
				         raise RuntimeError("GROQ_API_KEY is not set in the environment.")
			
 
				 
			
 
				     prompt = (
			
 
				-        "You extract structured facts and a concise summary from a chat segment. "
			
 
				+        "You extract structured facts and a detailed summary from a chat segment. "
			
 
				         "Return ONLY raw JSON (no code fences, no markdown) with keys: "
			
 
				         "facts, summary, segment_kind, resolution. "
			
 
				-        "facts must include: people (list of {name, phone, email}), "
			
 
				-        "projects (list of {name, url}), urls, paths, phones, emails, names. "
			
 
				+        "facts must include: "
			
 
				+        "people (list of {name, phone, email}), projects (list of {name, url}), "
			
 
				+        "urls, paths, commands, packages, services, env_vars, ips, ports, hosts, "
			
 
				+        "phones, emails, names. "
			
 
				         "Only include facts explicitly present in the segment. Do NOT infer or invent. "
			
 
				         "Never include generic 'user' as a person. Use null for unknown phone/email. "
			
 
				         "If no facts exist, return empty lists. "
			
 
				-        "summary should be one or two sentences. "
			
 
				+        "summary should be concise (1–3 sentences max) and include all concrete details (IP addresses, ports, commands, files, URLs). "
			
 
				+        "Use 1 sentence for short segments, 2 for medium, 3 for long. Avoid redundancy. "
			
 
				         "segment_kind: implementation|debug_arc|planning|deployment|misc. "
			
 
				         "resolution: resolved|open|unknown."
			
 
				     )
			
@@ -207,6 +210,96 @@ def is_compacted_memory(item: MemoryItem) -> bool:
 
				     return kind in {"segment_summary", "debug_arc_summary"}
			
 
				 
			
 
				 
			
 
				+def _limit_items(items: List[Any], max_items: int = 5) -> List[Any]:
			
 
				+    if len(items) <= max_items:
			
 
				+        return items
			
 
				+    return items[:max_items]
			
 
				+
			
 
				+
			
 
				+def _format_people(people: List[Dict[str, Any]]) -> List[str]:
			
 
				+    out = []
			
 
				+    for person in people:
			
 
				+        name = (person or {}).get("name")
			
 
				+        phone = (person or {}).get("phone")
			
 
				+        email = (person or {}).get("email")
			
 
				+        bits = [b for b in [name, phone, email] if b]
			
 
				+        if bits:
			
 
				+            out.append("/".join(bits))
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def _format_projects(projects: List[Dict[str, Any]]) -> List[str]:
			
 
				+    out = []
			
 
				+    for proj in projects:
			
 
				+        name = (proj or {}).get("name")
			
 
				+        url = (proj or {}).get("url")
			
 
				+        bits = [b for b in [name, url] if b]
			
 
				+        if bits:
			
 
				+            out.append("/".join(bits))
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def format_facts_inline(facts: Dict[str, Any]) -> str:
			
 
				+    if not isinstance(facts, dict):
			
 
				+        return ""
			
 
				+
			
 
				+    parts = []
			
 
				+    people = _format_people(facts.get("people") or [])
			
 
				+    projects = _format_projects(facts.get("projects") or [])
			
 
				+
			
 
				+    fields = [
			
 
				+        ("people", people),
			
 
				+        ("projects", projects),
			
 
				+        ("urls", facts.get("urls") or []),
			
 
				+        ("paths", facts.get("paths") or []),
			
 
				+        ("commands", facts.get("commands") or []),
			
 
				+        ("packages", facts.get("packages") or []),
			
 
				+        ("services", facts.get("services") or []),
			
 
				+        ("env_vars", facts.get("env_vars") or []),
			
 
				+        ("ips", facts.get("ips") or []),
			
 
				+        ("ports", facts.get("ports") or []),
			
 
				+        ("hosts", facts.get("hosts") or []),
			
 
				+        ("phones", facts.get("phones") or []),
			
 
				+        ("emails", facts.get("emails") or []),
			
 
				+        ("names", facts.get("names") or []),
			
 
				+    ]
			
 
				+
			
 
				+    for key, value in fields:
			
 
				+        if not value:
			
 
				+            continue
			
 
				+        trimmed = _limit_items(value)
			
 
				+        parts.append(f"{key}={trimmed}")
			
 
				+
			
 
				+    if not parts:
			
 
				+        return ""
			
 
				+    return "Facts: " + "; ".join(parts)
			
 
				+
			
 
				+
			
 
				+def build_summary_metadata(
			
 
				+    *,
			
 
				+    segment_ids: List[str],
			
 
				+    segment_start: str | None,
			
 
				+    segment_end: str | None,
			
 
				+    extraction: Dict[str, Any],
			
 
				+    model: str,
			
 
				+) -> Dict[str, Any]:
			
 
				+    # Keep summaries sortable without embedding timestamps in the text itself.
			
 
				+    created_at = segment_end or segment_start
			
 
				+    return {
			
 
				+        "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
			
 
				+        "compactor_version": "0.6",
			
 
				+        "kind": "segment_summary",
			
 
				+        "segment_source_ids": segment_ids,
			
 
				+        "segment_start": segment_start,
			
 
				+        "segment_end": segment_end,
			
 
				+        "created_at": created_at,
			
 
				+        "extraction": extraction,
			
 
				+        "model": model,
			
 
				+        "source": "memory-compactor",
			
 
				+        "scope": "compacted",
			
 
				+    }
			
 
				+
			
 
				+
			
 
				 def run(args: argparse.Namespace) -> None:
			
 
				     load_env_file(DEFAULT_ENV_PATH)
			
 
				     client = Mem0Client(args.base_url, timeout=args.timeout)
			
@@ -246,16 +339,37 @@ def run(args: argparse.Namespace) -> None:
 
				 
			
 
				             segment_text = format_segment(subcluster)
			
 
				             extraction = call_groq_extract(segment_text, args.model, args.timeout, args.groq_base_url)
			
 
				-            facts = extraction.get("facts") if isinstance(extraction, dict) else None
			
 
				-            summary = extraction.get("summary") if isinstance(extraction, dict) else ""
			
 
				+            facts = extraction.get("facts") if isinstance(extraction, dict) else {}
			
 
				+            summary_raw = extraction.get("summary") if isinstance(extraction, dict) else ""
			
 
				             parse_error = bool(extraction.get("parse_error")) if isinstance(extraction, dict) else True
			
 
				 
			
 
				             has_facts = bool(facts) and any(
			
 
				-                facts.get(k) for k in ["people", "projects", "urls", "paths", "phones", "emails", "names"]
			
 
				+                facts.get(k)
			
 
				+                for k in [
			
 
				+                    "people",
			
 
				+                    "projects",
			
 
				+                    "urls",
			
 
				+                    "paths",
			
 
				+                    "commands",
			
 
				+                    "packages",
			
 
				+                    "services",
			
 
				+                    "env_vars",
			
 
				+                    "ips",
			
 
				+                    "ports",
			
 
				+                    "hosts",
			
 
				+                    "phones",
			
 
				+                    "emails",
			
 
				+                    "names",
			
 
				+                ]
			
 
				             )
			
 
				-            if not args.llm_report_all and not parse_error and not summary and not has_facts:
			
 
				+            if not args.llm_report_all and not parse_error and not summary_raw and not has_facts:
			
 
				                 continue
			
 
				 
			
 
				+            facts_inline = format_facts_inline(facts)
			
 
				+            summary_text = summary_raw
			
 
				+            if facts_inline:
			
 
				+                summary_text = f"{summary_raw} {facts_inline}".strip() if summary_raw else facts_inline
			
 
				+
			
 
				             ids = [m.id for m in subcluster if m.id]
			
 
				             segment_start = subcluster[0].created_at if subcluster else None
			
 
				             segment_end = subcluster[-1].created_at if subcluster else None
			
@@ -268,30 +382,34 @@ def run(args: argparse.Namespace) -> None:
 
				                 "segment_start": segment_start,
			
 
				                 "segment_end": segment_end,
			
 
				             }
			
 
				+
			
 
				+            metadata = None
			
 
				+            if summary_text:
			
 
				+                metadata = build_summary_metadata(
			
 
				+                    segment_ids=ids,
			
 
				+                    segment_start=segment_start,
			
 
				+                    segment_end=segment_end,
			
 
				+                    extraction=extraction,
			
 
				+                    model=args.model,
			
 
				+                )
			
 
				+                action["summary_raw"] = summary_raw
			
 
				+                action["summary_text"] = summary_text
			
 
				+                action["summary_metadata"] = metadata
			
 
				+                action["write_payload"] = {"text": summary_text, "metadata": metadata}
			
 
				+
			
 
				             report["actions"].append(action)
			
 
				 
			
 
				-            can_create = bool(summary)
			
 
				-            if args.apply and not args.dry_run and summary and args.purge_source and len(ids) > delete_budget:
			
 
				+            can_create = bool(summary_text)
			
 
				+            if args.apply and not args.dry_run and summary_text and args.purge_source and len(ids) > delete_budget:
			
 
				                 can_create = False
			
 
				             if can_create:
			
 
				                 created_count += 1
			
 
				 
			
 
				-            if args.apply and not args.dry_run and summary:
			
 
				+            if args.apply and not args.dry_run and summary_text:
			
 
				                 if args.purge_source and len(ids) > delete_budget:
			
 
				                     continue
			
 
				 
			
 
				-                metadata = {
			
 
				-                    "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
			
 
				-                    "compactor_version": "0.4",
			
 
				-                    "kind": "segment_summary",
			
 
				-                    "segment_source_ids": ids,
			
 
				-                    "segment_start": segment_start,
			
 
				-                    "segment_end": segment_end,
			
 
				-                    "created_at": segment_end or segment_start,
			
 
				-                    "extraction": extraction,
			
 
				-                    "model": args.model,
			
 
				-                }
			
 
				-                client.write_memory(args.user_id, summary, metadata)
			
 
				+                client.write_memory(args.user_id, summary_text, metadata or {})
			
 
				 
			
 
				                 if args.purge_source:
			
 
				                     for mid in ids: