replay_decisions.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. #!/usr/bin/env python3
  2. from __future__ import annotations
  3. import argparse
  4. import json
  5. from pathlib import Path
  6. import sys
  7. ROOT = Path(__file__).resolve().parents[1]
  8. SRC = ROOT / "src"
  9. if str(SRC) not in sys.path:
  10. sys.path.insert(0, str(SRC))
  11. from hermes_mcp.replay import compare_to_baseline # noqa: E402
  12. from hermes_mcp.store import init_db # noqa: E402
  13. import sqlite3 # noqa: E402
  14. def _load_rows(limit: int, concern_id: str | None) -> list[dict]:
  15. init_db()
  16. db_path = ROOT / "data" / "hermes_mcp.sqlite3"
  17. with sqlite3.connect(db_path) as conn:
  18. conn.row_factory = sqlite3.Row
  19. if concern_id:
  20. rows = conn.execute(
  21. "select * from decisions where concern_id = ? order by created_at desc limit ?",
  22. (concern_id, limit),
  23. ).fetchall()
  24. else:
  25. rows = conn.execute(
  26. "select * from decisions order by created_at desc limit ?",
  27. (limit,),
  28. ).fetchall()
  29. return [dict(r) for r in rows]
  30. def main() -> int:
  31. parser = argparse.ArgumentParser(description="Replay stored Hermes decisions against the current decision engine.")
  32. parser.add_argument("--limit", type=int, default=20, help="How many stored decisions to replay")
  33. parser.add_argument("--concern-id", help="Optional concern id filter")
  34. parser.add_argument("--only-changed", action="store_true", help="Print only changed decisions")
  35. parser.add_argument("--json", action="store_true", help="Emit JSON lines instead of plain text")
  36. args = parser.parse_args()
  37. rows = _load_rows(limit=max(1, args.limit), concern_id=args.concern_id)
  38. checked = 0
  39. changed = 0
  40. skipped = 0
  41. for row in rows:
  42. payload = json.loads(row.get("target_policy_json") or "{}")
  43. replay_input = payload.get("replay_input") if isinstance(payload.get("replay_input"), dict) else None
  44. if not replay_input:
  45. skipped += 1
  46. continue
  47. result = compare_to_baseline(
  48. replay_input=replay_input,
  49. baseline={
  50. "mode": row.get("mode"),
  51. "action": row.get("action"),
  52. "target_strategy": row.get("target_strategy"),
  53. },
  54. )
  55. checked += 1
  56. changed += 1 if result["changed"] else 0
  57. if args.only_changed and not result["changed"]:
  58. continue
  59. output = {
  60. "decision_id": row.get("id"),
  61. "concern_id": row.get("concern_id"),
  62. "created_at": row.get("created_at"),
  63. **result,
  64. }
  65. if args.json:
  66. print(json.dumps(output, ensure_ascii=False))
  67. else:
  68. marker = "CHANGED" if result["changed"] else "same"
  69. print(f"[{marker}] {row.get('created_at')} concern={row.get('concern_id')} baseline={result['baseline']} replayed={result['replayed']}")
  70. summary = {
  71. "rows_loaded": len(rows),
  72. "checked": checked,
  73. "changed": changed,
  74. "skipped_without_replay_input": skipped,
  75. }
  76. if args.json:
  77. print(json.dumps({"summary": summary}, ensure_ascii=False))
  78. else:
  79. print(f"summary: {summary}")
  80. return 0
  81. if __name__ == "__main__":
  82. raise SystemExit(main())