prepare_candles.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from __future__ import annotations
  2. from pathlib import Path
  3. import argparse
  4. import json
  5. import sys
  6. ROOT = Path(__file__).resolve().parents[1]
  7. SRC = ROOT / "src"
  8. SIM_SRC = ROOT / "simulation" / "src"
  9. for path in (str(SRC), str(SIM_SRC)):
  10. if path not in sys.path:
  11. sys.path.insert(0, path)
  12. from hermes_sim.candles import Candle # noqa: E402
  13. from hermes_sim.preprocess import build_manifest, discover_raw_archives, load_raw_archives, write_manifest, write_prepared_csv # noqa: E402
  14. def main() -> int:
  15. parser = argparse.ArgumentParser(description="Preprocess raw Binance archives into a canonical Hermes input CSV.")
  16. parser.add_argument("--symbol", default="XRPUSDT")
  17. parser.add_argument("--interval", default="1m")
  18. parser.add_argument("--raw-dir", default=str(ROOT / "simulation" / "data" / "raw"))
  19. parser.add_argument("--out", default=None, help="Output CSV path. Defaults to simulation/data/prepared/<symbol>/<interval>/dataset.csv")
  20. parser.add_argument("--manifest-out", default=None, help="Output manifest JSON path. Defaults beside the CSV.")
  21. args = parser.parse_args()
  22. raw_dir = Path(args.raw_dir)
  23. archives = discover_raw_archives(raw_dir, args.symbol, args.interval)
  24. if not archives:
  25. raise SystemExit(f"No raw archives found under {raw_dir}")
  26. candles = load_raw_archives(archives)
  27. out_path = Path(args.out) if args.out else ROOT / "simulation" / "data" / "prepared" / args.symbol.upper() / args.interval / f"{args.symbol.upper()}-{args.interval}.csv"
  28. write_prepared_csv(out_path, candles)
  29. dataset = build_manifest(
  30. symbol=args.symbol,
  31. interval=args.interval,
  32. source_kind="bulk",
  33. source_files=archives,
  34. candles=candles,
  35. output_csv=out_path,
  36. )
  37. manifest_path = Path(args.manifest_out) if args.manifest_out else out_path.with_suffix(".manifest.json")
  38. write_manifest(manifest_path, dataset)
  39. print(json.dumps({"candles": len(candles), "csv": str(out_path), "manifest": str(manifest_path)}, indent=2))
  40. return 0
  41. if __name__ == "__main__":
  42. raise SystemExit(main())