download_candles.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. from __future__ import annotations
  2. from datetime import date, datetime, timezone
  3. from pathlib import Path
  4. import argparse
  5. import json
  6. import sys
  7. ROOT = Path(__file__).resolve().parents[1]
  8. SRC = ROOT / "src"
  9. SIM_SRC = ROOT / "simulation" / "src"
  10. for path in (str(SRC), str(SIM_SRC)):
  11. if path not in sys.path:
  12. sys.path.insert(0, path)
  13. from hermes_sim.binance_bulk import _daily_iter, _month_iter, download_day_archive, download_month_archive # noqa: E402
  14. def _parse_dt(value: str) -> datetime:
  15. text = value.strip()
  16. if text.endswith("Z"):
  17. text = text[:-1] + "+00:00"
  18. dt = datetime.fromisoformat(text)
  19. if dt.tzinfo is None:
  20. return dt.replace(tzinfo=timezone.utc)
  21. return dt.astimezone(timezone.utc)
  22. def _download_bulk_archives(*, symbol: str, interval: str, start: datetime, end: datetime, raw_dir: Path) -> list[str]:
  23. downloaded: list[str] = []
  24. start_day = start.date()
  25. end_day = end.date()
  26. for year, month in _month_iter(start_day, end_day):
  27. month_path = download_month_archive(symbol, interval, year, month, cache_dir=raw_dir)
  28. if month_path is not None:
  29. downloaded.append(str(month_path))
  30. continue
  31. month_start = date(year, month, 1)
  32. next_month = date(year + 1, 1, 1) if month == 12 else date(year, month + 1, 1)
  33. month_end = next_month.fromordinal(next_month.toordinal() - 1)
  34. for day in _daily_iter(max(start_day, month_start), min(end_day, month_end)):
  35. day_path = download_day_archive(symbol, interval, day, cache_dir=raw_dir)
  36. if day_path is not None:
  37. downloaded.append(str(day_path))
  38. return downloaded
  39. def main() -> int:
  40. parser = argparse.ArgumentParser(description="Download raw Binance candle archives for Hermes simulation.")
  41. parser.add_argument("--symbol", default="XRPUSDT")
  42. parser.add_argument("--interval", default="1m")
  43. parser.add_argument("--start", required=True, help="ISO timestamp, e.g. 2024-01-01T00:00:00Z")
  44. parser.add_argument("--end", required=True, help="ISO timestamp, e.g. 2024-02-01T00:00:00Z")
  45. parser.add_argument("--raw-dir", default=str(ROOT / "simulation" / "data" / "raw"))
  46. parser.add_argument("--manifest-out", default=str(ROOT / "simulation" / "data" / "manifests" / "raw_download_manifest.json"))
  47. args = parser.parse_args()
  48. start = _parse_dt(args.start)
  49. end = _parse_dt(args.end)
  50. raw_dir = Path(args.raw_dir)
  51. downloaded = _download_bulk_archives(symbol=args.symbol, interval=args.interval, start=start, end=end, raw_dir=raw_dir)
  52. manifest = {
  53. "symbol": args.symbol.upper(),
  54. "interval": args.interval,
  55. "start": start.isoformat(),
  56. "end": end.isoformat(),
  57. "source_kind": "bulk",
  58. "raw_dir": str(raw_dir),
  59. "archives": downloaded,
  60. "archive_count": len(downloaded),
  61. }
  62. manifest_path = Path(args.manifest_out)
  63. manifest_path.parent.mkdir(parents=True, exist_ok=True)
  64. manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
  65. print(json.dumps({"downloaded": len(downloaded), "manifest": str(manifest_path)}, indent=2))
  66. return 0
  67. if __name__ == "__main__":
  68. raise SystemExit(main())