"""FastAPI replacement for the legacy PHP proxy + static hosting. Why this exists: - Replace ba-simple-proxy.php (CORS proxy) with a safer, modern service. - Serve the static Wikipedia3D frontend without Apache/PHP. """ from __future__ import annotations import os from pathlib import Path from typing import Optional from urllib.parse import urlparse import httpx from fastapi import FastAPI, HTTPException, Query, Request from fastapi.responses import FileResponse, JSONResponse, Response from fastapi.staticfiles import StaticFiles PROJECT_ROOT = Path(__file__).resolve().parents[1] APP_DIR = PROJECT_ROOT / "wikipedia3d" # Allowlist keeps the proxy safe; extend as needed. ALLOWED_HOST_SUFFIXES = { "wikipedia.org", "wikimedia.org", "dbpedia.org", "geonames.org", "query.wikidata.org", } def _is_allowed_url(raw_url: str) -> bool: try: parsed = urlparse(raw_url) except Exception: return False if parsed.scheme not in {"http", "https"}: return False if not parsed.hostname: return False host = parsed.hostname.lower() return any(host == suffix or host.endswith("." + suffix) for suffix in ALLOWED_HOST_SUFFIXES) app = FastAPI(title="Wikipedia3D Proxy") # Serve the static site (including index.html we generate). app.mount("/static", StaticFiles(directory=PROJECT_ROOT), name="static") @app.get("/") def root() -> FileResponse: return FileResponse(PROJECT_ROOT / "index.html") @app.get("/wikipedia3d/") def app_root() -> FileResponse: return FileResponse(APP_DIR / "index.html") @app.get("/proxy") async def proxy( request: Request, url: str = Query(..., description="Absolute URL to fetch"), mode: Optional[str] = Query(None, description="Use 'native' to stream raw content"), ) -> Response: if not _is_allowed_url(url): raise HTTPException(status_code=400, detail="URL not allowed") # Forward user agent to upstream where helpful. headers = {"User-Agent": request.headers.get("user-agent", "Wikipedia3D-Proxy")} async with httpx.AsyncClient(timeout=20.0, follow_redirects=True) as client: upstream = await client.get(url, headers=headers) if mode == "native": # Return upstream content + content-type only (mimics PHP native mode). content_type = upstream.headers.get("content-type", "application/octet-stream") return Response(content=upstream.content, media_type=content_type) # JSON mode (mimics ba-simple-proxy.php default). payload = { "contents": upstream.json() if "application/json" in upstream.headers.get("content-type", "") else upstream.text, "status": {"http_code": upstream.status_code}, } return JSONResponse(payload) # Convenience: serve assets directly under root (matching old paths) # Example: /wikipedia3d/images/..., /css/... etc. app.mount("/wikipedia3d", StaticFiles(directory=APP_DIR), name="wikipedia3d") app.mount("/", StaticFiles(directory=PROJECT_ROOT), name="root-static")