| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- from __future__ import annotations
- from pathlib import Path
- import argparse
- import json
- import sys
- ROOT = Path(__file__).resolve().parents[1]
- SRC = ROOT / "src"
- SIM_SRC = ROOT / "simulation" / "src"
- for path in (str(SRC), str(SIM_SRC)):
- if path not in sys.path:
- sys.path.insert(0, path)
- from hermes_sim.candles import Candle # noqa: E402
- from hermes_sim.preprocess import build_manifest, discover_raw_archives, load_raw_archives, write_manifest, write_prepared_csv # noqa: E402
- def main() -> int:
- parser = argparse.ArgumentParser(description="Preprocess raw Binance archives into a canonical Hermes input CSV.")
- parser.add_argument("--symbol", default="XRPUSDT")
- parser.add_argument("--interval", default="1m")
- parser.add_argument("--raw-dir", default=str(ROOT / "simulation" / "data" / "raw"))
- parser.add_argument("--out", default=None, help="Output CSV path. Defaults to simulation/data/prepared/<symbol>/<interval>/dataset.csv")
- parser.add_argument("--manifest-out", default=None, help="Output manifest JSON path. Defaults beside the CSV.")
- args = parser.parse_args()
- raw_dir = Path(args.raw_dir)
- archives = discover_raw_archives(raw_dir, args.symbol, args.interval)
- if not archives:
- raise SystemExit(f"No raw archives found under {raw_dir}")
- candles = load_raw_archives(archives)
- out_path = Path(args.out) if args.out else ROOT / "simulation" / "data" / "prepared" / args.symbol.upper() / args.interval / f"{args.symbol.upper()}-{args.interval}.csv"
- write_prepared_csv(out_path, candles)
- dataset = build_manifest(
- symbol=args.symbol,
- interval=args.interval,
- source_kind="bulk",
- source_files=archives,
- candles=candles,
- output_csv=out_path,
- )
- manifest_path = Path(args.manifest_out) if args.manifest_out else out_path.with_suffix(".manifest.json")
- write_manifest(manifest_path, dataset)
- print(json.dumps({"candles": len(candles), "csv": str(out_path), "manifest": str(manifest_path)}, indent=2))
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|