|
|
@@ -255,6 +255,8 @@ async def detect_emerging_topics(limit: int = 10):
|
|
|
|
|
|
entity_counts = Counter()
|
|
|
entity_importance_sum = Counter()
|
|
|
+ # co-occurrence: ent -> other_ent -> count
|
|
|
+ entity_cooccur = {}
|
|
|
phrase_counts = Counter()
|
|
|
topic_counts = Counter()
|
|
|
|
|
|
@@ -273,17 +275,31 @@ async def detect_emerging_topics(limit: int = 10):
|
|
|
|
|
|
for c in clusters:
|
|
|
topic_counts[c.get("topic", "other")] += 1
|
|
|
- for ent in c.get("entities", []) or []:
|
|
|
+ ents_in_cluster = [e for e in (c.get("entities", []) or []) if not _is_generic_entity(e)]
|
|
|
+ ents_in_cluster_norm = [str(e).strip().lower() for e in ents_in_cluster if str(e).strip()]
|
|
|
+ for ent in ents_in_cluster_norm:
|
|
|
if _is_generic_entity(ent):
|
|
|
continue
|
|
|
- key = str(ent).strip().lower()
|
|
|
- if key:
|
|
|
- entity_counts[key] += 1
|
|
|
- try:
|
|
|
- entity_importance_sum[key] += float(c.get("importance", 0.0) or 0.0)
|
|
|
- except Exception:
|
|
|
+ entity_counts[ent] += 1
|
|
|
+ try:
|
|
|
+ entity_importance_sum[ent] += float(c.get("importance", 0.0) or 0.0)
|
|
|
+ except Exception:
|
|
|
pass
|
|
|
|
|
|
+ # update co-occurrence counts
|
|
|
+ for i in range(len(ents_in_cluster_norm)):
|
|
|
+ a = ents_in_cluster_norm[i]
|
|
|
+ if not a:
|
|
|
+ continue
|
|
|
+ entity_cooccur.setdefault(a, Counter())
|
|
|
+ for j in range(len(ents_in_cluster_norm)):
|
|
|
+ if i == j:
|
|
|
+ continue
|
|
|
+ b = ents_in_cluster_norm[j]
|
|
|
+ if not b:
|
|
|
+ continue
|
|
|
+ entity_cooccur[a][b] += 1
|
|
|
+
|
|
|
text = f"{c.get('headline','')} {c.get('summary','')}"
|
|
|
words = [w for w in re.findall(r"[A-Za-z][A-Za-z0-9\-]{2,}", text.lower())]
|
|
|
for i in range(len(words) - 1):
|
|
|
@@ -297,10 +313,16 @@ async def detect_emerging_topics(limit: int = 10):
|
|
|
avg_imp = entity_importance_sum[ent] / max(1, count)
|
|
|
# avg_imp is typically 0..~1; keep score bounded.
|
|
|
trend_score = 0.25 + 0.40 * min(1.0, avg_imp) + 0.08 * min(6.0, float(count))
|
|
|
+ related = []
|
|
|
+ for other, _cnt in (entity_cooccur.get(ent) or Counter()).most_common(3):
|
|
|
+ # avoid returning the entity itself (shouldn't happen, but be safe)
|
|
|
+ if other != ent:
|
|
|
+ related.append(other)
|
|
|
+
|
|
|
emerging.append({
|
|
|
"topic": ent,
|
|
|
"trend_score": min(0.99, round(trend_score, 2)),
|
|
|
- "related_entities": [ent],
|
|
|
+ "related_entities": related if related else [ent],
|
|
|
"signal_type": "entity",
|
|
|
"count": count,
|
|
|
"avg_importance": round(avg_imp, 3),
|