Lukas Goldschmidt 1 сар өмнө
parent
commit
45e98ebd9c

+ 25 - 41
app/atlas_store.py

@@ -19,12 +19,10 @@ import re
 import asyncio
 from typing import Any, Dict, List, Optional
 
-from SPARQLWrapper import JSON, POST, SPARQLWrapper
-
 from mcp import ClientSession
 from mcp.client.sse import sse_client
 
-from atlas_model import Claim, CurateFlag, Entity, Identifier, Provenance
+from .atlas_model import Claim, CurateFlag, Entity, Identifier, Provenance
 
 # ---------------------------------------------------------------------------
 # Namespace constants — keep in sync with atlas.ttl
@@ -82,52 +80,38 @@ def _escape(s: str) -> str:
     return s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
 
 
-def _sparql_update(endpoint: str, query: str) -> None:
-    # If endpoint looks like an MCP SSE URL, call the remote MCP tool.
-    if "/mcp/sse" in endpoint:
-        async def _run() -> None:
-            async with sse_client(endpoint, timeout=10, sse_read_timeout=300) as (read_stream, write_stream):
+async def _sparql_update(endpoint: str, query: str) -> None:
+    if "/mcp/sse" not in endpoint:
+        raise RuntimeError("atlas_store only supports Virtuoso MCP/SSE endpoints in this scaffold")
+
+    async def _run() -> None:
+        async with sse_client(endpoint, timeout=10, sse_read_timeout=300) as (read_stream, write_stream):
                 async with ClientSession(read_stream, write_stream) as session:
                     await session.initialize()
                     result = await session.call_tool("sparql_update", {"input": {"query": query}})
                     if result.isError:
-                        raise RuntimeError(f"sparql_update failed: {result.error}")
-
-        asyncio.run(_run())
-        return
-
-    # Fallback: plain SPARQL endpoint URL.
-    sparql = SPARQLWrapper(endpoint)
-    sparql.setMethod(POST)
-    sparql.setQuery(query)
-    sparql.setReturnFormat(JSON)
-    sparql.query()
-
-def _sparql_select(endpoint: str, query: str) -> List[Dict[str, Any]]:
-    # If endpoint looks like an MCP SSE URL, call the remote MCP tool.
-    if "/mcp/sse" in endpoint:
-        async def _run() -> List[Dict[str, Any]]:
-            async with sse_client(endpoint, timeout=10, sse_read_timeout=300) as (read_stream, write_stream):
+                        raise RuntimeError(f"sparql_update failed: {result}")
+
+    await _run()
+
+async def _sparql_select(endpoint: str, query: str) -> List[Dict[str, Any]]:
+    if "/mcp/sse" not in endpoint:
+        raise RuntimeError("atlas_store only supports Virtuoso MCP/SSE endpoints in this scaffold")
+
+    async def _run() -> List[Dict[str, Any]]:
+        async with sse_client(endpoint, timeout=10, sse_read_timeout=300) as (read_stream, write_stream):
                 async with ClientSession(read_stream, write_stream) as session:
                     await session.initialize()
                     result = await session.call_tool("sparql_query", {"input": {"query": query}})
                     if result.isError:
-                        raise RuntimeError(f"sparql_query failed: {result.error}")
-
-                    data = result.structuredContent if result.structuredContent is not None else result.content
-                    if isinstance(data, dict):
-                        return data.get("results", {}).get("bindings", []) or []
-                    return []
-
-        return asyncio.run(_run())
-
-    # Fallback: plain SPARQL endpoint URL.
-    sparql = SPARQLWrapper(endpoint)
-    sparql.setMethod('GET')
-    sparql.setQuery(query)
-    sparql.setReturnFormat(JSON)
-    res = sparql.query().convert()
-    return res.get("results", {}).get("bindings", [])
+                        raise RuntimeError(f"sparql_query failed: {result}")
+
+                data = result.structuredContent if result.structuredContent is not None else result.content
+                if isinstance(data, dict):
+                    return data.get("results", {}).get("bindings", []) or []
+                return []
+
+    return await _run()
 
 # ---------------------------------------------------------------------------
 # Save

+ 1 - 2
app/remote_sparql_client.py

@@ -37,11 +37,10 @@ class RemoteSparqlClient:
                 await session.initialize()
                 result = await session.call_tool(tool_name, {"input": payload})
                 if result.isError:
-                    raise RuntimeError(f"MCP tool {tool_name} failed: {result.error}")
+                    raise RuntimeError(f"MCP tool {tool_name} failed: {result}")
 
                 data = result.structuredContent if result.structuredContent is not None else result.content
                 # We expect dict-like data for sparql_*.
                 if isinstance(data, dict):
                     return data
                 return {"raw": data}
-

+ 171 - 59
app/resolve.py

@@ -1,86 +1,198 @@
 from __future__ import annotations
 
+import hashlib
 import os
-import time
-import uuid
 from dataclasses import dataclass
-from typing import Any, Awaitable, Callable
+from typing import Any
 
+from .atlas_model import CurateFlag, Entity, Identifier
+from .atlas_store import _sparql_select, _sparql_update
+from .wikidata import WikidataSearch
 
-CallToolFn = Callable[[str, dict[str, Any]], Awaitable[dict[str, Any]]]
 
+ATLAS = "http://world.eu.org/atlas_ontology#"
+ATLAS_D = "http://world.eu.org/atlas_data#"
+DEFAULT_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", "http://192.168.0.249:8501/mcp/sse")
+DEFAULT_UPDATE_ENDPOINT = os.getenv("ATLAS_VIRTUOSO_MCP_SSE_URL", DEFAULT_ENDPOINT)
 
-def _extract_bindings(result_payload: Any) -> list[dict[str, Any]]:
-    """Best-effort extraction for Virtuoso/MCP-style SPARQL results."""
-    if isinstance(result_payload, dict):
-        return result_payload.get("results", {}).get("bindings", []) or []
-    return []
 
+def _hash_id(subject: str) -> str:
+    return hashlib.sha1(subject.strip().lower().encode("utf-8")).hexdigest()[:16]
 
-def _to_float(value: Any) -> float:
-    try:
-        return float(value)
-    except Exception:
-        return 0.0
 
+def _entity_iri(atlas_id: str) -> str:
+    return f"<{ATLAS_D}entity_{atlas_id}>"
 
-def _now_iso() -> str:
-    # Avoid datetime imports; keep it lightweight.
-    import datetime
 
-    return datetime.datetime.now(datetime.timezone.utc).isoformat()
+def _escape_literal(value: str) -> str:
+    return value.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
 
 
-def _build_candidates_query(*, subject: str, max_candidates: int, graph_iri: str) -> str:
-    # Scaffolding query: adjust the predicate/shape once the remote schema is fixed.
-    # Keep it deterministic and parameterized by the provided subject string.
-    safe = subject.replace("\\", "\\\\").replace('"', '\\"')
+def _label_lookup_query(subject: str) -> str:
+    safe = _escape_literal(subject)
     return f"""
-PREFIX atlas: <http://world.eu.org/atlas_ontology#>
-SELECT ?id ?label ?type ?source ?confidence ?description ?uri
-WHERE {{
-  GRAPH <{graph_iri}> {{
+PREFIX atlas: <{ATLAS}>
+SELECT ?atlasId ?label ?type ?qid ?alias WHERE {{
+  VALUES ?needle {{ \"{safe}\" }}
+  {{
     ?entity a atlas:Entity ;
+            atlas:atlasId ?atlasId ;
             atlas:canonicalLabel ?label .
-
-    FILTER(LCASE(STR(?label)) = LCASE(\"{safe}\"))
-
     OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
-    OPTIONAL {{ ?entity atlas:canonicalDescription ?description . }}
+    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
+    FILTER(LCASE(STR(?label)) = LCASE(?needle))
+  }}
+  UNION
+  {{
+    ?entity a atlas:Entity ;
+            atlas:atlasId ?atlasId ;
+            atlas:aliasLabel ?alias .
+    OPTIONAL {{ ?entity atlas:canonicalLabel ?label . }}
+    OPTIONAL {{ ?entity atlas:hasCanonicalType ?type . }}
+    OPTIONAL {{ ?entity atlas:hasIdentifier ?ident . ?ident atlas:scheme \"wikidata-qid\" ; atlas:value ?qid . }}
+    FILTER(LCASE(STR(?alias)) = LCASE(?needle))
+  }}
+}}
+LIMIT 1
+""".strip()
+
 
-    BIND(STR(?entity) AS ?uri)
-    BIND(STRAFTER(STR(?entity), '#') AS ?id)
-    BIND(0.9 AS ?confidence)
-    BIND("sparql" AS ?source)
+async def _wikidata_lookup(subject: str) -> dict[str, Any] | None:
+    search = WikidataSearch({"search": subject, "limit": 1})
+    result = await search.search()
+    items = result.get("results", [])
+    return items[0] if items else None
+
+
+def _entity_from_wikidata(subject: str, wd: dict[str, Any]) -> Entity:
+    atlas_id = _hash_id(subject)
+    label = wd.get("label") or subject
+    description = wd.get("description")
+    qid = wd.get("id")
+    entity_type = wd.get("type") or "Thing"
+
+    ent = Entity(
+        id=atlas_id,
+        label=label,
+        description=description,
+        type=f"atlas:{entity_type}" if not entity_type.startswith("atlas:") else entity_type,
+        aliases=[subject] if subject.lower() != label.lower() else [],
+        identifiers=[Identifier(scheme="wikidata-qid", value=qid)] if qid else [],
+        needs_curation=False,
+    )
+    return ent
+
+
+def _entity_to_turtle(entity: Entity) -> str:
+    lines = []
+    e = _entity_iri(entity.id)
+    lines.append(f"{e}")
+    lines.append("  a atlas:Entity ;")
+    lines.append(f'  atlas:atlasId "{_escape_literal(entity.id)}" ;')
+    lines.append(f'  atlas:canonicalLabel "{_escape_literal(entity.label)}"@en ;')
+    if entity.description:
+        lines.append(f'  atlas:canonicalDescription "{_escape_literal(entity.description)}"@en ;')
+    if entity.type:
+        lines.append(f"  atlas:hasCanonicalType {entity.type} ;")
+    for alias in entity.aliases:
+        lines.append(f'  atlas:aliasLabel "{_escape_literal(alias)}"@en ;')
+    for ident in entity.identifiers:
+        ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>.".rstrip(".")
+        lines.append(f"  atlas:hasIdentifier {ident_iri} ;")
+    lines.append(f'  atlas:needsCuration "{str(entity.needs_curation).lower()}"^^xsd:boolean .')
+    lines.append("")
+    for ident in entity.identifiers:
+        ident_iri = f"<{ATLAS_D}ident_{ident.scheme}_{_hash_id(ident.value)}>"
+        lines.append(f"{ident_iri}")
+        lines.append("  a atlas:Identifier ;")
+        lines.append(f'  atlas:scheme "{_escape_literal(ident.scheme)}" ;')
+        lines.append(f'  atlas:value "{_escape_literal(ident.value)}" .')
+        lines.append("")
+    return "\n".join(lines)
+
+
+def _flatten_exception_details(exc: BaseException) -> list[str]:
+    parts = [f"{type(exc).__name__}: {exc}"]
+    nested = getattr(exc, "exceptions", None)
+    if nested:
+        for sub in nested:
+            parts.extend(_flatten_exception_details(sub))
+    return parts
+
+
+async def _persist_entity(entity: Entity) -> None:
+    ttl = _entity_to_turtle(entity)
+    query = f"""
+PREFIX atlas: <{ATLAS}>
+PREFIX atlas_data: <{ATLAS_D}>
+PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+INSERT DATA {{
+  GRAPH <{ATLAS_D}> {{
+{ttl}
   }}
 }}
-LIMIT {max_candidates}
 """.strip()
+    await _sparql_update(DEFAULT_UPDATE_ENDPOINT, query)
+
+
+async def _load_entity(subject: str) -> dict[str, Any] | None:
+    rows = await _sparql_select(DEFAULT_ENDPOINT, _label_lookup_query(subject))
+    if not rows:
+        return None
+    row = rows[0]
+    return {
+        "atlas_id": row.get("atlasId", {}).get("value"),
+        "label": row.get("label", {}).get("value"),
+        "type": row.get("type", {}).get("value"),
+        "wikidata_id": row.get("qid", {}).get("value"),
+        "alias": row.get("alias", {}).get("value"),
+    }
 
 
 @dataclass
 class ResolveService:
-    call_tool: CallToolFn | None = None
-
-    async def _call_tool(self, tool_name: str, payload: dict[str, Any]) -> dict[str, Any]:
-        if self.call_tool is None:
-            # Important: default behavior is a stub. This scaffolding should run
-            # safely without requiring a live remote MCP/Sparql backend.
-            raise RuntimeError(
-                "REMOTE_SPASQL_MCP_NOT_CONFIGURED (stub). "
-                "Inject call_tool in tests or wire a real RemoteSparqlClient explicitly."
-            )
-        return await self.call_tool(tool_name, payload)
-
-    async def resolve(
-        self,
-        *,
-        subject: str,
-        context: dict[str, Any] | None,
-        constraints: dict[str, Any] | None,
-        hints: dict[str, Any] | None,
-        debug: dict[str, Any] | None,
-    ) -> dict[str, Any]:
-        # Stubbed implementation for “works first, decide logic later”.
-        # We intentionally ignore inputs until you confirm the app structure.
-        return {"status": "ok"}
+    load_entity_fn: Any = _load_entity
+    wikidata_lookup_fn: Any = _wikidata_lookup
+    persist_entity_fn: Any = _persist_entity
+
+    async def resolve(self, *, subject: str, context: dict[str, Any] | None = None,
+                      constraints: dict[str, Any] | None = None,
+                      hints: dict[str, Any] | None = None,
+                      debug: dict[str, Any] | None = None) -> dict[str, Any]:
+        try:
+            subject = (subject or "").strip()
+            if not subject:
+                return {"status": "not_found"}
+
+            stored = await self.load_entity_fn(subject)
+            if stored:
+                return {
+                    "status": "resolved",
+                    "atlas_id": stored.get("atlas_id"),
+                    "label": stored.get("label"),
+                    "type": stored.get("type"),
+                    "wikidata_id": stored.get("wikidata_id"),
+                    "alias": stored.get("alias") or subject,
+                }
+
+            wd = await self.wikidata_lookup_fn(subject)
+            if not wd:
+                return {"status": "not_found"}
+
+            entity = _entity_from_wikidata(subject, wd)
+            await self.persist_entity_fn(entity)
+
+            return {
+                "status": "resolved",
+                "atlas_id": entity.id,
+                "label": entity.label,
+                "type": entity.type,
+                "wikidata_id": wd.get("id"),
+                "alias": subject,
+            }
+        except Exception as exc:
+            detail = " | ".join(_flatten_exception_details(exc))
+            return {
+                "status": "error",
+                "error": {"code": "RESOLVE_FAILED", "message": detail},
+            }

+ 239 - 0
app/wikidata.py

@@ -0,0 +1,239 @@
+from __future__ import annotations
+
+import asyncio
+import os
+from dataclasses import dataclass, field
+from typing import Any, Callable
+from urllib.parse import urlencode
+
+import httpx
+
+
+PROPERTY_CACHE: dict[str, str] = {}
+WIKIDATA_USER_AGENT = os.getenv(
+    "ATLAS_WIKIDATA_USER_AGENT",
+    "Atlas/1.0 (contact: lukas.goldschmidt+atlas@googlemail.com)",
+)
+
+
+@dataclass
+class WikidataOptions:
+    search: str = ""
+    language: str = "en"
+    strictlanguage: bool = True
+    type: str = "item"
+    limit: int = 7
+    searchAction: str = "wbsearchentities"
+    getAction: str = "wbgetentities"
+    apiHost: str = "www.wikidata.org"
+    apiPath: str = "/w/api.php"
+
+
+def _is_null(value: Any) -> bool:
+    return value is None
+
+
+def _build_url(opts: WikidataOptions, params: dict[str, Any]) -> str:
+    query = urlencode(params)
+    return f"https://{opts.apiHost}{opts.apiPath}?{query}"
+
+
+class WikidataSearch:
+    def __init__(self, options: dict[str, Any] | None = None, *, client: httpx.AsyncClient | None = None):
+        self.defaultOptions = WikidataOptions()
+        self.options = WikidataOptions(**{k: v for k, v in (options or {}).items() if hasattr(self.defaultOptions, k)})
+        self._client = client
+
+    def set(self, key: str, value: Any) -> None:
+        if hasattr(self.options, key):
+            setattr(self.options, key, value)
+
+    def validateOptions(self) -> bool:
+        if len(self.options.search) == 0:
+            return False
+        if self.options.limit > 50 or self.options.limit < 1:
+            return False
+        return True
+
+    def clearPropertyCache(self) -> None:
+        PROPERTY_CACHE.clear()
+
+    async def search(self) -> dict[str, Any]:
+        if not self.validateOptions():
+            return {"results": [], "error": "Bad options"}
+
+        params = {
+            "action": self.options.searchAction,
+            "language": self.options.language,
+            "search": self.options.search,
+            "type": self.options.type,
+            "limit": self.options.limit,
+            "format": "json",
+        }
+        url = _build_url(self.options, params)
+
+        client = self._client or httpx.AsyncClient(
+            timeout=20,
+            headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
+        )
+        close_client = self._client is None
+        try:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+            results = []
+            for item in data.get("search", []):
+                trimmed = {}
+                if item.get("url"):
+                    trimmed["url"] = item["url"]
+                if item.get("id"):
+                    trimmed["id"] = item["id"]
+                if item.get("label"):
+                    trimmed["label"] = item["label"]
+                if item.get("description"):
+                    trimmed["description"] = item["description"]
+                if {"url", "id", "label"}.issubset(trimmed):
+                    results.append(trimmed)
+            return {"results": results}
+        finally:
+            if close_client:
+                await client.aclose()
+
+    def search_sync(self) -> dict[str, Any]:
+        return asyncio.run(self.search())
+
+    async def get_entities(self, entities: list[str], resolve_properties: bool = True) -> dict[str, Any]:
+        if not isinstance(entities, list):
+            return {"error": "Bad |entities| parameter. Must be an array of strings"}
+        if len(entities) == 0:
+            return {"entities": []}
+        if len(entities) > 50:
+            entities = entities[:50]
+
+        params = {
+            "action": self.options.getAction,
+            "languages": self.options.language,
+            "redirects": "yes",
+            "props": "claims|descriptions|labels",
+            "normalize": "true",
+            "ids": "|".join(entities),
+            "format": "json",
+        }
+        url = _build_url(self.options, params)
+
+        client = self._client or httpx.AsyncClient(
+            timeout=20,
+            headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
+        )
+        close_client = self._client is None
+        try:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+            return self._parse_entities(data, resolve_properties)
+        finally:
+            if close_client:
+                await client.aclose()
+
+    def get_entities_sync(self, entities: list[str], resolve_properties: bool = True) -> dict[str, Any]:
+        return asyncio.run(self.get_entities(entities, resolve_properties))
+
+    def _parse_entities(self, data: dict[str, Any], resolve_properties: bool) -> dict[str, Any]:
+        out_entities = []
+        combined_property_list: set[str] = set()
+
+        for entity in data.get("entities", {}).values():
+            description = entity.get("descriptions", {}).get(self.options.language, {}).get("value", "")
+            label = entity.get("labels", {}).get(self.options.language, {}).get("value", "")
+            claims = []
+
+            for claim_group in entity.get("claims", {}).values():
+                for claim in claim_group:
+                    snak = claim.get("mainsnak", {})
+                    if snak.get("snaktype") != "value":
+                        continue
+
+                    prop = snak.get("property", "")
+                    prop_type = snak.get("datatype", "")
+                    val = ""
+                    dv = snak.get("datavalue", {}).get("value")
+                    if not dv:
+                        continue
+                    if prop_type == "wikibase-item":
+                        val = f"Q{dv.get('numeric-id')}"
+                    elif prop_type in {"string", "url", "external-id"}:
+                        val = dv
+                    elif prop_type == "time":
+                        val = dv.get("time", "")
+                    elif prop_type == "globe-coordinate":
+                        val = f"{dv.get('longitude')},{dv.get('latitude')}"
+                    elif prop_type == "quantity":
+                        val = dv.get("amount", "")
+                        if dv.get("unit") and dv.get("unit") != "1":
+                            val = f"{val}{dv.get('unit')}"
+                    else:
+                        continue
+
+                    if prop and val and prop_type:
+                        if resolve_properties:
+                            prop_cached = prop in PROPERTY_CACHE
+                            if prop_cached:
+                                prop = PROPERTY_CACHE[prop]
+                            else:
+                                combined_property_list.add(prop)
+
+                            value_cached = True
+                            if prop_type == "wikibase-item":
+                                value_cached = val in PROPERTY_CACHE
+                                if value_cached:
+                                    val = PROPERTY_CACHE[val]
+                                else:
+                                    combined_property_list.add(val)
+
+                            claims.append({"property": prop, "value": val, "type": prop_type, "propertyCached": prop_cached, "valueCached": value_cached})
+                        else:
+                            claims.append({"property": prop, "value": val, "type": prop_type})
+
+            if description and label and claims:
+                out_entities.append({"label": label, "description": description, "claims": claims})
+
+        if not resolve_properties:
+            return {"entities": out_entities}
+
+        if combined_property_list:
+            self._resolve_properties(list(combined_property_list))
+
+        for ent in out_entities:
+            for claim in ent["claims"]:
+                prop_cached = claim.pop("propertyCached", False)
+                val_cached = claim.pop("valueCached", False)
+                if not prop_cached and claim["property"] in PROPERTY_CACHE:
+                    claim["property"] = PROPERTY_CACHE[claim["property"]]
+                if not val_cached and claim["value"] in PROPERTY_CACHE:
+                    claim["value"] = PROPERTY_CACHE[claim["value"]]
+                    claim["type"] = "string"
+
+        return {"entities": out_entities}
+
+    def _resolve_properties(self, property_list: list[str]) -> None:
+        # Placeholder for batch property label resolution, kept synchronous for now.
+        # Use wbgetentities in batches of 50.
+        for prop_id in property_list:
+            PROPERTY_CACHE.setdefault(prop_id, prop_id)
+
+    async def get_entity_data(self, qid: str) -> dict[str, Any]:
+        client = self._client or httpx.AsyncClient(
+            timeout=20,
+            headers={"Accept": "application/json", "User-Agent": WIKIDATA_USER_AGENT},
+        )
+        close_client = self._client is None
+        try:
+            resp = await client.get(
+                f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json",
+                params={"flavor": "dump"},
+            )
+            resp.raise_for_status()
+            return resp.json()
+        finally:
+            if close_client:
+                await client.aclose()

+ 15 - 9
tests/test_resolve_tool.py

@@ -5,14 +5,20 @@ from app.resolve import ResolveService
 
 @pytest.mark.anyio
 async def test_resolve_tool_is_stubbed_and_returns_ok():
-    svc = ResolveService(call_tool=None)
-    result = await svc.resolve(
-        subject="anything",
-        context=None,
-        constraints=None,
-        hints=None,
-        debug=None,
-    )
+    async def no_hit(_s):
+        return None
+
+    async def no_persist(_e):
+        return None
 
-    assert result == {"status": "ok"}
+    async def no_wikidata(_s):
+        return None
+
+    svc = ResolveService(
+        load_entity_fn=no_hit,
+        wikidata_lookup_fn=no_wikidata,
+        persist_entity_fn=no_persist,
+    )
+    result = await svc.resolve(subject="anything")
 
+    assert result == {"status": "not_found"}

+ 19 - 0
tests/test_wikidata.py

@@ -0,0 +1,19 @@
+from app.wikidata import WikidataSearch, WikidataOptions
+
+
+def test_search_url_encoding():
+    ws = WikidataSearch({"search": "Joe Biden"})
+    assert ws.validateOptions() is True
+
+
+def test_parse_search_results_filters_minimum_fields():
+    ws = WikidataSearch({"search": "Joe Biden"})
+    data = {
+        "search": [
+            {"url": "https://www.wikidata.org/wiki/Q6279", "id": "Q6279", "label": "Joe Biden", "description": "46th President"},
+            {"id": "Q1", "label": "Bad"},
+        ]
+    }
+    out = ws._parse_entities({"entities": {}}, resolve_properties=False)
+    assert out == {"entities": []}
+