|
|
@@ -711,6 +711,7 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
|
|
|
kw_sources: dict[str, set] = {}
|
|
|
kw_buckets: dict[str, set] = {}
|
|
|
kw_cooccur: dict[str, Counter] = {}
|
|
|
+ entity_kw_cooccur: dict[str, Counter] = {} # entity -> Counter of co-occurring keywords
|
|
|
|
|
|
bucket_size_hours = max(1.0, hours / 6.0) # split window into ~6 buckets
|
|
|
|
|
|
@@ -807,11 +808,15 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
|
|
|
continue
|
|
|
kb = kws_in_cluster[j]
|
|
|
kw_cooccur[ka][kb] += 1
|
|
|
- # also track entity↔keyword co-occurrence
|
|
|
+ # also track entity<->keyword co-occurrence (bidirectional)
|
|
|
for ent in ents_norm:
|
|
|
if _is_generic_entity(ent):
|
|
|
continue
|
|
|
kw_cooccur[ka][ent] += 1
|
|
|
+ # and the reverse: entity -> keyword
|
|
|
+ if ent not in entity_kw_cooccur:
|
|
|
+ entity_kw_cooccur[ent] = Counter()
|
|
|
+ entity_kw_cooccur[ent][ka] += 1
|
|
|
|
|
|
# bigram phrases (recent only)
|
|
|
if is_recent:
|
|
|
@@ -866,10 +871,9 @@ async def detect_emerging_topics(limit: int = 10, timeframe: str = "24h", topic:
|
|
|
related.append(other)
|
|
|
|
|
|
related_kws = []
|
|
|
- if ent in kw_cooccur:
|
|
|
- for kw, _cnt in kw_cooccur[ent].most_common(5):
|
|
|
- if kw != ent:
|
|
|
- related_kws.append(kw)
|
|
|
+ if ent in entity_kw_cooccur:
|
|
|
+ for kw, _cnt in entity_kw_cooccur[ent].most_common(5):
|
|
|
+ related_kws.append(kw)
|
|
|
|
|
|
scored.append({
|
|
|
"topic": ent,
|