| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- from __future__ import annotations
- from datetime import date, datetime, timezone
- from pathlib import Path
- import argparse
- import json
- import sys
- ROOT = Path(__file__).resolve().parents[1]
- SRC = ROOT / "src"
- SIM_SRC = ROOT / "simulation" / "src"
- for path in (str(SRC), str(SIM_SRC)):
- if path not in sys.path:
- sys.path.insert(0, path)
- from hermes_sim.binance_bulk import _daily_iter, _month_iter, download_day_archive, download_month_archive # noqa: E402
- def _parse_dt(value: str) -> datetime:
- text = value.strip()
- if text.endswith("Z"):
- text = text[:-1] + "+00:00"
- dt = datetime.fromisoformat(text)
- if dt.tzinfo is None:
- return dt.replace(tzinfo=timezone.utc)
- return dt.astimezone(timezone.utc)
- def _download_bulk_archives(*, symbol: str, interval: str, start: datetime, end: datetime, raw_dir: Path) -> list[str]:
- downloaded: list[str] = []
- start_day = start.date()
- end_day = end.date()
- for year, month in _month_iter(start_day, end_day):
- month_path = download_month_archive(symbol, interval, year, month, cache_dir=raw_dir)
- if month_path is not None:
- downloaded.append(str(month_path))
- continue
- month_start = date(year, month, 1)
- next_month = date(year + 1, 1, 1) if month == 12 else date(year, month + 1, 1)
- month_end = next_month.fromordinal(next_month.toordinal() - 1)
- for day in _daily_iter(max(start_day, month_start), min(end_day, month_end)):
- day_path = download_day_archive(symbol, interval, day, cache_dir=raw_dir)
- if day_path is not None:
- downloaded.append(str(day_path))
- return downloaded
- def main() -> int:
- parser = argparse.ArgumentParser(description="Download raw Binance candle archives for Hermes simulation.")
- parser.add_argument("--symbol", default="XRPUSDT")
- parser.add_argument("--interval", default="1m")
- parser.add_argument("--start", required=True, help="ISO timestamp, e.g. 2024-01-01T00:00:00Z")
- parser.add_argument("--end", required=True, help="ISO timestamp, e.g. 2024-02-01T00:00:00Z")
- parser.add_argument("--raw-dir", default=str(ROOT / "simulation" / "data" / "raw"))
- parser.add_argument("--manifest-out", default=str(ROOT / "simulation" / "data" / "manifests" / "raw_download_manifest.json"))
- args = parser.parse_args()
- start = _parse_dt(args.start)
- end = _parse_dt(args.end)
- raw_dir = Path(args.raw_dir)
- downloaded = _download_bulk_archives(symbol=args.symbol, interval=args.interval, start=start, end=end, raw_dir=raw_dir)
- manifest = {
- "symbol": args.symbol.upper(),
- "interval": args.interval,
- "start": start.isoformat(),
- "end": end.isoformat(),
- "source_kind": "bulk",
- "raw_dir": str(raw_dir),
- "archives": downloaded,
- "archive_count": len(downloaded),
- }
- manifest_path = Path(args.manifest_out)
- manifest_path.parent.mkdir(parents=True, exist_ok=True)
- manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
- print(json.dumps({"downloaded": len(downloaded), "manifest": str(manifest_path)}, indent=2))
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|