app.py 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """FastAPI replacement for the legacy PHP proxy + static hosting.
  2. Why this exists:
  3. - Replace ba-simple-proxy.php (CORS proxy) with a safer, modern service.
  4. - Serve the static Wikipedia3D frontend without Apache/PHP.
  5. """
  6. from __future__ import annotations
  7. import os
  8. from pathlib import Path
  9. from typing import Optional
  10. from urllib.parse import urlparse
  11. import httpx
  12. from fastapi import FastAPI, HTTPException, Query, Request
  13. from fastapi.responses import FileResponse, JSONResponse, Response
  14. from fastapi.staticfiles import StaticFiles
  15. PROJECT_ROOT = Path(__file__).resolve().parents[1]
  16. APP_DIR = PROJECT_ROOT / "wikipedia3d"
  17. # Allowlist keeps the proxy safe; extend as needed.
  18. ALLOWED_HOST_SUFFIXES = {
  19. "wikipedia.org",
  20. "wikimedia.org",
  21. "dbpedia.org",
  22. "geonames.org",
  23. "query.wikidata.org",
  24. }
  25. def _is_allowed_url(raw_url: str) -> bool:
  26. try:
  27. parsed = urlparse(raw_url)
  28. except Exception:
  29. return False
  30. if parsed.scheme not in {"http", "https"}:
  31. return False
  32. if not parsed.hostname:
  33. return False
  34. host = parsed.hostname.lower()
  35. return any(host == suffix or host.endswith("." + suffix) for suffix in ALLOWED_HOST_SUFFIXES)
  36. app = FastAPI(title="Wikipedia3D Proxy")
  37. # Serve the static site (including index.html we generate).
  38. app.mount("/static", StaticFiles(directory=PROJECT_ROOT), name="static")
  39. @app.get("/")
  40. def root() -> FileResponse:
  41. return FileResponse(PROJECT_ROOT / "index.html")
  42. @app.get("/wikipedia3d/")
  43. def app_root() -> FileResponse:
  44. return FileResponse(APP_DIR / "index.html")
  45. @app.get("/proxy")
  46. async def proxy(
  47. request: Request,
  48. url: str = Query(..., description="Absolute URL to fetch"),
  49. mode: Optional[str] = Query(None, description="Use 'native' to stream raw content"),
  50. ) -> Response:
  51. if not _is_allowed_url(url):
  52. raise HTTPException(status_code=400, detail="URL not allowed")
  53. # Forward user agent to upstream where helpful.
  54. headers = {"User-Agent": request.headers.get("user-agent", "Wikipedia3D-Proxy")}
  55. async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client:
  56. upstream = await client.get(url, headers=headers)
  57. if mode == "native":
  58. # Return upstream content + content-type only (mimics PHP native mode).
  59. content_type = upstream.headers.get("content-type", "application/octet-stream")
  60. return Response(content=upstream.content, media_type=content_type)
  61. # JSON mode (mimics ba-simple-proxy.php default).
  62. payload = {
  63. "contents": upstream.json() if "application/json" in upstream.headers.get("content-type", "") else upstream.text,
  64. "status": {"http_code": upstream.status_code},
  65. }
  66. return JSONResponse(payload)
  67. # Convenience: serve assets directly under root (matching old paths)
  68. # Example: /wikipedia3d/images/..., /css/... etc.
  69. app.mount("/wikipedia3d", StaticFiles(directory=APP_DIR), name="wikipedia3d")
  70. app.mount("/", StaticFiles(directory=PROJECT_ROOT), name="root-static")