|
|
@@ -872,8 +872,20 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
|
|
|
|
|
|
related_kws = []
|
|
|
if ent in entity_kw_cooccur:
|
|
|
- for kw, _cnt in entity_kw_cooccur[ent].most_common(5):
|
|
|
+ # Build a set of related entity names (lowercased) to deduplicate
|
|
|
+ # keywords that are already represented in related_entities
|
|
|
+ related_ent_names = {e.strip().lower() for e in related}
|
|
|
+ # Also include the entity itself and its common aliases
|
|
|
+ related_ent_names.add(ent.strip().lower())
|
|
|
+ for kw, _cnt in entity_kw_cooccur[ent].most_common(10):
|
|
|
+ kw_lower = kw.strip().lower()
|
|
|
+ # Skip keywords that are just a related entity name (substring match)
|
|
|
+ if any(kw_lower in ent_name or ent_name in kw_lower
|
|
|
+ for ent_name in related_ent_names):
|
|
|
+ continue
|
|
|
related_kws.append(kw)
|
|
|
+ if len(related_kws) >= 5:
|
|
|
+ break
|
|
|
|
|
|
scored.append({
|
|
|
"topic": ent,
|
|
|
@@ -920,15 +932,23 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
|
|
|
)
|
|
|
|
|
|
kw_related_kws = []
|
|
|
+ kw_related_ents = []
|
|
|
if kw in kw_cooccur:
|
|
|
- for other, _cnt in kw_cooccur[kw].most_common(5):
|
|
|
- if other != kw:
|
|
|
+ for other, _cnt in kw_cooccur[kw].most_common(10):
|
|
|
+ if other == kw:
|
|
|
+ continue
|
|
|
+ # If this co-occurring term is a known entity, route to related_entities
|
|
|
+ if other in all_entities:
|
|
|
+ kw_related_ents.append(other)
|
|
|
+ else:
|
|
|
kw_related_kws.append(other)
|
|
|
+ if len(kw_related_kws) >= 5 and len(kw_related_ents) >= 3:
|
|
|
+ break
|
|
|
|
|
|
kw_scored.append({
|
|
|
"topic": kw,
|
|
|
"trend_score": min(0.99, round(composed_score, 3)),
|
|
|
- "related_entities": [],
|
|
|
+ "related_entities": kw_related_ents[:5],
|
|
|
"related_keywords": kw_related_kws[:5],
|
|
|
"velocity": round(velocity, 2),
|
|
|
"recent_count": recent_n,
|