from __future__ import annotations from datetime import date, datetime, timezone from pathlib import Path import argparse import json import sys ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" SIM_SRC = ROOT / "simulation" / "src" for path in (str(SRC), str(SIM_SRC)): if path not in sys.path: sys.path.insert(0, path) from hermes_sim.binance_bulk import _daily_iter, _month_iter, download_day_archive, download_month_archive # noqa: E402 def _parse_dt(value: str) -> datetime: text = value.strip() if text.endswith("Z"): text = text[:-1] + "+00:00" dt = datetime.fromisoformat(text) if dt.tzinfo is None: return dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc) def _download_bulk_archives(*, symbol: str, interval: str, start: datetime, end: datetime, raw_dir: Path) -> list[str]: downloaded: list[str] = [] start_day = start.date() end_day = end.date() for year, month in _month_iter(start_day, end_day): month_path = download_month_archive(symbol, interval, year, month, cache_dir=raw_dir) if month_path is not None: downloaded.append(str(month_path)) continue month_start = date(year, month, 1) next_month = date(year + 1, 1, 1) if month == 12 else date(year, month + 1, 1) month_end = next_month.fromordinal(next_month.toordinal() - 1) for day in _daily_iter(max(start_day, month_start), min(end_day, month_end)): day_path = download_day_archive(symbol, interval, day, cache_dir=raw_dir) if day_path is not None: downloaded.append(str(day_path)) return downloaded def main() -> int: parser = argparse.ArgumentParser(description="Download raw Binance candle archives for Hermes simulation.") parser.add_argument("--symbol", default="XRPUSDT") parser.add_argument("--interval", default="1m") parser.add_argument("--start", required=True, help="ISO timestamp, e.g. 2024-01-01T00:00:00Z") parser.add_argument("--end", required=True, help="ISO timestamp, e.g. 2024-02-01T00:00:00Z") parser.add_argument("--raw-dir", default=str(ROOT / "simulation" / "data" / "raw")) parser.add_argument("--manifest-out", default=str(ROOT / "simulation" / "data" / "manifests" / "raw_download_manifest.json")) args = parser.parse_args() start = _parse_dt(args.start) end = _parse_dt(args.end) raw_dir = Path(args.raw_dir) downloaded = _download_bulk_archives(symbol=args.symbol, interval=args.interval, start=start, end=end, raw_dir=raw_dir) manifest = { "symbol": args.symbol.upper(), "interval": args.interval, "start": start.isoformat(), "end": end.isoformat(), "source_kind": "bulk", "raw_dir": str(raw_dir), "archives": downloaded, "archive_count": len(downloaded), } manifest_path = Path(args.manifest_out) manifest_path.parent.mkdir(parents=True, exist_ok=True) manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") print(json.dumps({"downloaded": len(downloaded), "manifest": str(manifest_path)}, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())