|
@@ -158,15 +158,18 @@ def call_groq_extract(segment_text: str, model: str, timeout: int, base_url: str
|
|
|
raise RuntimeError("GROQ_API_KEY is not set in the environment.")
|
|
raise RuntimeError("GROQ_API_KEY is not set in the environment.")
|
|
|
|
|
|
|
|
prompt = (
|
|
prompt = (
|
|
|
- "You extract structured facts and a concise summary from a chat segment. "
|
|
|
|
|
|
|
+ "You extract structured facts and a detailed summary from a chat segment. "
|
|
|
"Return ONLY raw JSON (no code fences, no markdown) with keys: "
|
|
"Return ONLY raw JSON (no code fences, no markdown) with keys: "
|
|
|
"facts, summary, segment_kind, resolution. "
|
|
"facts, summary, segment_kind, resolution. "
|
|
|
- "facts must include: people (list of {name, phone, email}), "
|
|
|
|
|
- "projects (list of {name, url}), urls, paths, phones, emails, names. "
|
|
|
|
|
|
|
+ "facts must include: "
|
|
|
|
|
+ "people (list of {name, phone, email}), projects (list of {name, url}), "
|
|
|
|
|
+ "urls, paths, commands, packages, services, env_vars, ips, ports, hosts, "
|
|
|
|
|
+ "phones, emails, names. "
|
|
|
"Only include facts explicitly present in the segment. Do NOT infer or invent. "
|
|
"Only include facts explicitly present in the segment. Do NOT infer or invent. "
|
|
|
"Never include generic 'user' as a person. Use null for unknown phone/email. "
|
|
"Never include generic 'user' as a person. Use null for unknown phone/email. "
|
|
|
"If no facts exist, return empty lists. "
|
|
"If no facts exist, return empty lists. "
|
|
|
- "summary should be one or two sentences. "
|
|
|
|
|
|
|
+ "summary should be concise (1–3 sentences max) and include all concrete details (IP addresses, ports, commands, files, URLs). "
|
|
|
|
|
+ "Use 1 sentence for short segments, 2 for medium, 3 for long. Avoid redundancy. "
|
|
|
"segment_kind: implementation|debug_arc|planning|deployment|misc. "
|
|
"segment_kind: implementation|debug_arc|planning|deployment|misc. "
|
|
|
"resolution: resolved|open|unknown."
|
|
"resolution: resolved|open|unknown."
|
|
|
)
|
|
)
|
|
@@ -207,6 +210,96 @@ def is_compacted_memory(item: MemoryItem) -> bool:
|
|
|
return kind in {"segment_summary", "debug_arc_summary"}
|
|
return kind in {"segment_summary", "debug_arc_summary"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def _limit_items(items: List[Any], max_items: int = 5) -> List[Any]:
|
|
|
|
|
+ if len(items) <= max_items:
|
|
|
|
|
+ return items
|
|
|
|
|
+ return items[:max_items]
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _format_people(people: List[Dict[str, Any]]) -> List[str]:
|
|
|
|
|
+ out = []
|
|
|
|
|
+ for person in people:
|
|
|
|
|
+ name = (person or {}).get("name")
|
|
|
|
|
+ phone = (person or {}).get("phone")
|
|
|
|
|
+ email = (person or {}).get("email")
|
|
|
|
|
+ bits = [b for b in [name, phone, email] if b]
|
|
|
|
|
+ if bits:
|
|
|
|
|
+ out.append("/".join(bits))
|
|
|
|
|
+ return out
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _format_projects(projects: List[Dict[str, Any]]) -> List[str]:
|
|
|
|
|
+ out = []
|
|
|
|
|
+ for proj in projects:
|
|
|
|
|
+ name = (proj or {}).get("name")
|
|
|
|
|
+ url = (proj or {}).get("url")
|
|
|
|
|
+ bits = [b for b in [name, url] if b]
|
|
|
|
|
+ if bits:
|
|
|
|
|
+ out.append("/".join(bits))
|
|
|
|
|
+ return out
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def format_facts_inline(facts: Dict[str, Any]) -> str:
|
|
|
|
|
+ if not isinstance(facts, dict):
|
|
|
|
|
+ return ""
|
|
|
|
|
+
|
|
|
|
|
+ parts = []
|
|
|
|
|
+ people = _format_people(facts.get("people") or [])
|
|
|
|
|
+ projects = _format_projects(facts.get("projects") or [])
|
|
|
|
|
+
|
|
|
|
|
+ fields = [
|
|
|
|
|
+ ("people", people),
|
|
|
|
|
+ ("projects", projects),
|
|
|
|
|
+ ("urls", facts.get("urls") or []),
|
|
|
|
|
+ ("paths", facts.get("paths") or []),
|
|
|
|
|
+ ("commands", facts.get("commands") or []),
|
|
|
|
|
+ ("packages", facts.get("packages") or []),
|
|
|
|
|
+ ("services", facts.get("services") or []),
|
|
|
|
|
+ ("env_vars", facts.get("env_vars") or []),
|
|
|
|
|
+ ("ips", facts.get("ips") or []),
|
|
|
|
|
+ ("ports", facts.get("ports") or []),
|
|
|
|
|
+ ("hosts", facts.get("hosts") or []),
|
|
|
|
|
+ ("phones", facts.get("phones") or []),
|
|
|
|
|
+ ("emails", facts.get("emails") or []),
|
|
|
|
|
+ ("names", facts.get("names") or []),
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ for key, value in fields:
|
|
|
|
|
+ if not value:
|
|
|
|
|
+ continue
|
|
|
|
|
+ trimmed = _limit_items(value)
|
|
|
|
|
+ parts.append(f"{key}={trimmed}")
|
|
|
|
|
+
|
|
|
|
|
+ if not parts:
|
|
|
|
|
+ return ""
|
|
|
|
|
+ return "Facts: " + "; ".join(parts)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def build_summary_metadata(
|
|
|
|
|
+ *,
|
|
|
|
|
+ segment_ids: List[str],
|
|
|
|
|
+ segment_start: str | None,
|
|
|
|
|
+ segment_end: str | None,
|
|
|
|
|
+ extraction: Dict[str, Any],
|
|
|
|
|
+ model: str,
|
|
|
|
|
+) -> Dict[str, Any]:
|
|
|
|
|
+ # Keep summaries sortable without embedding timestamps in the text itself.
|
|
|
|
|
+ created_at = segment_end or segment_start
|
|
|
|
|
+ return {
|
|
|
|
|
+ "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
|
|
|
|
|
+ "compactor_version": "0.6",
|
|
|
|
|
+ "kind": "segment_summary",
|
|
|
|
|
+ "segment_source_ids": segment_ids,
|
|
|
|
|
+ "segment_start": segment_start,
|
|
|
|
|
+ "segment_end": segment_end,
|
|
|
|
|
+ "created_at": created_at,
|
|
|
|
|
+ "extraction": extraction,
|
|
|
|
|
+ "model": model,
|
|
|
|
|
+ "source": "memory-compactor",
|
|
|
|
|
+ "scope": "compacted",
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def run(args: argparse.Namespace) -> None:
|
|
def run(args: argparse.Namespace) -> None:
|
|
|
load_env_file(DEFAULT_ENV_PATH)
|
|
load_env_file(DEFAULT_ENV_PATH)
|
|
|
client = Mem0Client(args.base_url, timeout=args.timeout)
|
|
client = Mem0Client(args.base_url, timeout=args.timeout)
|
|
@@ -246,16 +339,37 @@ def run(args: argparse.Namespace) -> None:
|
|
|
|
|
|
|
|
segment_text = format_segment(subcluster)
|
|
segment_text = format_segment(subcluster)
|
|
|
extraction = call_groq_extract(segment_text, args.model, args.timeout, args.groq_base_url)
|
|
extraction = call_groq_extract(segment_text, args.model, args.timeout, args.groq_base_url)
|
|
|
- facts = extraction.get("facts") if isinstance(extraction, dict) else None
|
|
|
|
|
- summary = extraction.get("summary") if isinstance(extraction, dict) else ""
|
|
|
|
|
|
|
+ facts = extraction.get("facts") if isinstance(extraction, dict) else {}
|
|
|
|
|
+ summary_raw = extraction.get("summary") if isinstance(extraction, dict) else ""
|
|
|
parse_error = bool(extraction.get("parse_error")) if isinstance(extraction, dict) else True
|
|
parse_error = bool(extraction.get("parse_error")) if isinstance(extraction, dict) else True
|
|
|
|
|
|
|
|
has_facts = bool(facts) and any(
|
|
has_facts = bool(facts) and any(
|
|
|
- facts.get(k) for k in ["people", "projects", "urls", "paths", "phones", "emails", "names"]
|
|
|
|
|
|
|
+ facts.get(k)
|
|
|
|
|
+ for k in [
|
|
|
|
|
+ "people",
|
|
|
|
|
+ "projects",
|
|
|
|
|
+ "urls",
|
|
|
|
|
+ "paths",
|
|
|
|
|
+ "commands",
|
|
|
|
|
+ "packages",
|
|
|
|
|
+ "services",
|
|
|
|
|
+ "env_vars",
|
|
|
|
|
+ "ips",
|
|
|
|
|
+ "ports",
|
|
|
|
|
+ "hosts",
|
|
|
|
|
+ "phones",
|
|
|
|
|
+ "emails",
|
|
|
|
|
+ "names",
|
|
|
|
|
+ ]
|
|
|
)
|
|
)
|
|
|
- if not args.llm_report_all and not parse_error and not summary and not has_facts:
|
|
|
|
|
|
|
+ if not args.llm_report_all and not parse_error and not summary_raw and not has_facts:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
|
|
+ facts_inline = format_facts_inline(facts)
|
|
|
|
|
+ summary_text = summary_raw
|
|
|
|
|
+ if facts_inline:
|
|
|
|
|
+ summary_text = f"{summary_raw} {facts_inline}".strip() if summary_raw else facts_inline
|
|
|
|
|
+
|
|
|
ids = [m.id for m in subcluster if m.id]
|
|
ids = [m.id for m in subcluster if m.id]
|
|
|
segment_start = subcluster[0].created_at if subcluster else None
|
|
segment_start = subcluster[0].created_at if subcluster else None
|
|
|
segment_end = subcluster[-1].created_at if subcluster else None
|
|
segment_end = subcluster[-1].created_at if subcluster else None
|
|
@@ -268,30 +382,34 @@ def run(args: argparse.Namespace) -> None:
|
|
|
"segment_start": segment_start,
|
|
"segment_start": segment_start,
|
|
|
"segment_end": segment_end,
|
|
"segment_end": segment_end,
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ metadata = None
|
|
|
|
|
+ if summary_text:
|
|
|
|
|
+ metadata = build_summary_metadata(
|
|
|
|
|
+ segment_ids=ids,
|
|
|
|
|
+ segment_start=segment_start,
|
|
|
|
|
+ segment_end=segment_end,
|
|
|
|
|
+ extraction=extraction,
|
|
|
|
|
+ model=args.model,
|
|
|
|
|
+ )
|
|
|
|
|
+ action["summary_raw"] = summary_raw
|
|
|
|
|
+ action["summary_text"] = summary_text
|
|
|
|
|
+ action["summary_metadata"] = metadata
|
|
|
|
|
+ action["write_payload"] = {"text": summary_text, "metadata": metadata}
|
|
|
|
|
+
|
|
|
report["actions"].append(action)
|
|
report["actions"].append(action)
|
|
|
|
|
|
|
|
- can_create = bool(summary)
|
|
|
|
|
- if args.apply and not args.dry_run and summary and args.purge_source and len(ids) > delete_budget:
|
|
|
|
|
|
|
+ can_create = bool(summary_text)
|
|
|
|
|
+ if args.apply and not args.dry_run and summary_text and args.purge_source and len(ids) > delete_budget:
|
|
|
can_create = False
|
|
can_create = False
|
|
|
if can_create:
|
|
if can_create:
|
|
|
created_count += 1
|
|
created_count += 1
|
|
|
|
|
|
|
|
- if args.apply and not args.dry_run and summary:
|
|
|
|
|
|
|
+ if args.apply and not args.dry_run and summary_text:
|
|
|
if args.purge_source and len(ids) > delete_budget:
|
|
if args.purge_source and len(ids) > delete_budget:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- metadata = {
|
|
|
|
|
- "compacted_at": dt.datetime.now(dt.timezone.utc).isoformat(),
|
|
|
|
|
- "compactor_version": "0.4",
|
|
|
|
|
- "kind": "segment_summary",
|
|
|
|
|
- "segment_source_ids": ids,
|
|
|
|
|
- "segment_start": segment_start,
|
|
|
|
|
- "segment_end": segment_end,
|
|
|
|
|
- "created_at": segment_end or segment_start,
|
|
|
|
|
- "extraction": extraction,
|
|
|
|
|
- "model": args.model,
|
|
|
|
|
- }
|
|
|
|
|
- client.write_memory(args.user_id, summary, metadata)
|
|
|
|
|
|
|
+ client.write_memory(args.user_id, summary_text, metadata or {})
|
|
|
|
|
|
|
|
if args.purge_source:
|
|
if args.purge_source:
|
|
|
for mid in ids:
|
|
for mid in ids:
|