ソースを参照

feat: maintenance MID/type fixes (feature incomplete)

Lukas Goldschmidt 1 ヶ月 前
コミット
ba8f9b8601
1 ファイル変更5 行追加4 行削除
  1. 5 4
      scripts/maintain_entities.py

+ 5 - 4
scripts/maintain_entities.py

@@ -539,6 +539,7 @@ async def maintain_subject(item: dict[str, Any], dry_run: bool, refresh_payloads
             wiki_description = (full.get("description") or "").strip()
             current_label = subject.strip()
             current_mid = item.get("mid")
+            qid = full.get("qid")
             wikidata_mid = None
             claims = entity_block.get("claims", {}) if isinstance(entity_block, dict) else {}
             mid_claims = claims.get("P2671", []) if isinstance(claims, dict) else []
@@ -597,9 +598,10 @@ async def maintain_subject(item: dict[str, Any], dry_run: bool, refresh_payloads
                     }
                 )
 
-            # If Wikidata does not provide a MID, try the Trends candidates as a
-            # fallback. Wikidata wins whenever it has a MID.
-            if not wikidata_mid:
+            # If Wikidata does not provide a MID, Trends can be used only as a
+            # last resort. If we already have a Wikidata QID hit, do NOT inject
+            # a Trends MID (prevents Graza-style cross-entity contamination).
+            if not wikidata_mid and not qid:
                 trends_candidates = []
                 raw_trends = item.get("raw_trends_json")
                 if raw_trends:
@@ -628,7 +630,6 @@ async def maintain_subject(item: dict[str, Any], dry_run: bool, refresh_payloads
                     )
 
             # QID is always a known cross-reference and acts as a stable anchor.
-            qid = full.get("qid")
             existing_qid = None
             if qid and qid != existing_qid:
                 claim = AtlasClaim(