|
@@ -0,0 +1,387 @@
|
|
|
|
|
+import json
|
|
|
|
|
+from pathlib import Path
|
|
|
|
|
+
|
|
|
|
|
+# 30 diverse clusters selected from live data, manually annotated
|
|
|
|
|
+ANNOTATED_SAMPLES = [
|
|
|
|
|
+ # === REGULATION (6) ===
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "sec_binance_lawsuit",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "SEC sues Binance over unregistered securities",
|
|
|
|
|
+ "summary": "The Securities and Exchange Commission filed a lawsuit against Binance, the world's largest crypto exchange, alleging it operated as an unregistered securities exchange and commingled customer funds."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["SEC", "Binance"],
|
|
|
|
|
+ "keywords": ["securities law", "crypto exchange", "enforcement action"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "iran_frozen_funds_talks",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Khamenei aide says $24B in frozen funds blocking talks",
|
|
|
|
|
+ "summary": "A senior aide to Iran's Supreme Leader said $24 billion in frozen Iranian funds are blocking progress in indirect talks with the United States."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Iran", "United States", "Khamenei", "Mohsen Rezaei"],
|
|
|
|
|
+ "keywords": ["frozen funds", "peace talks", "Iran sanctions"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "us_iran_sanctions",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "US issues new Iran-linked sanctions",
|
|
|
|
|
+ "summary": "The US Treasury Department imposed new sanctions targeting Iranian oil and petrochemical networks."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["United States", "Iran", "Department of the Treasury", "OFAC"],
|
|
|
|
|
+ "keywords": ["sanctions", "oil network", "petrochemical"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "house_crypto_bills",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "U.S. House tax committee weighs crypto bills, including relief for small transactions",
|
|
|
|
|
+ "summary": "The House Ways and Means Committee discussed legislation providing tax relief for small crypto transactions and clarifying digital asset rules."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["U.S. House tax committee", "Bitcoin", "Ethereum", "SEC"],
|
|
|
|
|
+ "keywords": ["crypto bills", "tax relief", "small transactions"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "wamco_sec_settlement",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Wamco to Pay $100 Million in SEC Settlement Over Leech Trades",
|
|
|
|
|
+ "summary": "Western Asset Management agreed to pay $100 million to settle SEC charges over improper trading by former portfolio manager Ken Leech."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Western Asset Management Co.", "Securities and Exchange Commission", "Ken Leech"],
|
|
|
|
|
+ "keywords": ["SEC settlement", "trading practices", "portfolio manager"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "us_cuba_sanctions",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "US imposes sanctions on Cuban president, Castro family members",
|
|
|
|
|
+ "summary": "The United States sanctioned Cuban President Miguel Diaz-Canel and members of the Castro family over human rights abuses."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Cuba", "Cuban president", "Castro family", "Raul Castro", "United States"],
|
|
|
|
|
+ "keywords": ["US sanctions", "Cuba tensions", "human rights"],
|
|
|
|
|
+ "topic": "regulation"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+
|
|
|
|
|
+ # === MACRO (7) ===
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "fed_rates_inflation",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Fed holds rates steady as inflation cools",
|
|
|
|
|
+ "summary": "The Federal Reserve kept interest rates unchanged at 5.25-5.50%, citing progress on inflation but signaling caution on future cuts."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Federal Reserve"],
|
|
|
|
|
+ "keywords": ["interest rates", "inflation", "monetary policy"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "ecb_rate_cut",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "ECB cuts rates as eurozone inflation falls to 2.4%",
|
|
|
|
|
+ "summary": "The European Central Bank lowered its deposit rate by 25 basis points to 3.75%, marking its first cut since 2019 as inflation approaches target."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["European Central Bank", "ECB"],
|
|
|
|
|
+ "keywords": ["rate cut", "eurozone inflation", "deposit rate", "monetary easing"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "china_stimulus",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "China unveils stimulus package to boost slowing economy",
|
|
|
|
|
+ "summary": "Beijing announced a comprehensive stimulus package including infrastructure spending, tax cuts, and monetary easing to counter slowing growth and property sector weakness."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["China", "Beijing"],
|
|
|
|
|
+ "keywords": ["stimulus package", "infrastructure spending", "monetary easing", "property sector"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "oil_opep_cuts",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Oil jumps after OPEC+ extends production cuts",
|
|
|
|
|
+ "summary": "Crude oil prices rose 3% after OPEC+ agreed to extend production cuts through year-end, tightening global supply amid demand concerns."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["OPEC+"],
|
|
|
|
|
+ "keywords": ["production cuts", "oil prices", "global supply", "demand concerns"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "india_forex_reserves",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "India's Forex Reserves Hit $682.32 Billion as RBI Tightens Its Economic Grip",
|
|
|
|
|
+ "summary": "India's foreign exchange reserves reached a record high as the Reserve Bank of India maintains tight monetary policy."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["India", "RBI"],
|
|
|
|
|
+ "keywords": ["forex reserves", "monetary policy", "economic grip"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "india_us_trade_pact",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "India, US May Execute Interim Trade Pact by July, Minister Says",
|
|
|
|
|
+ "summary": "Commerce Minister Piyush Goyal said India and the US could finalize an interim trade agreement by July, addressing tariffs and market access."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["India", "US", "Piyush Goyal"],
|
|
|
|
|
+ "keywords": ["trade deal", "tariffs", "market access"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "jobs_report_fed_bets",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Investors boost bets for Fed rate rise after bumper US jobs report",
|
|
|
|
|
+ "summary": "Strong US payrolls data led traders to increase wagers on Federal Reserve interest rate hikes."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["US", "Federal Reserve"],
|
|
|
|
|
+ "keywords": ["jobs report", "rate hike", "rate rise"],
|
|
|
|
|
+ "topic": "macro"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+
|
|
|
|
|
+ # === CRYPTO (6) ===
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "bitcoin_etf_flows",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Bitcoin ETFs see record inflows as BTC tops $70k",
|
|
|
|
|
+ "summary": "US spot Bitcoin ETFs attracted $2.3 billion in net inflows this week as Bitcoin surged past $70,000, driven by institutional demand."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Bitcoin", "BTC"],
|
|
|
|
|
+ "keywords": ["ETF inflows", "institutional demand", "price surge"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "memecoins_dive",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Memecoins dogecoin, shiba inu dive 9% as bitcoin nears $60,000",
|
|
|
|
|
+ "summary": "Dogecoin and Shiba Inu led memecoin losses as Bitcoin approached the $60,000 level."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["dogecoin", "shiba inu", "Bitcoin", "memecoins"],
|
|
|
|
|
+ "keywords": ["crypto crash", "memecoins dive", "price drop"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "bitcoin_seller_exhaustion",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Bitcoin teases 'seller exhaustion' as BTC price downside reaches $60.3K",
|
|
|
|
|
+ "summary": "Technical analysts note signs of seller exhaustion in Bitcoin as the cryptocurrency tests support near $60,300."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Bitcoin", "BTC"],
|
|
|
|
|
+ "keywords": ["seller exhaustion", "price support", "technical analysis"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "xrp_liquidation_selloff",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "XRP falls toward $1.10 as liquidation-driven selloff pushes token to multi-month lows",
|
|
|
|
|
+ "summary": "XRP dropped sharply as leveraged positions were liquidated, pushing the token to its lowest level in months."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["XRP", "Bitcoin", "Ethereum"],
|
|
|
|
|
+ "keywords": ["liquidation", "selloff", "price crash"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "visa_stablecoin_test",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Visa tests private stablecoin settlement with Brale, Canton",
|
|
|
|
|
+ "summary": "Visa is piloting private stablecoin settlement using Brale and Canton networks for institutional payments."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Visa", "Brale", "Canton"],
|
|
|
|
|
+ "keywords": ["stablecoin settlement", "private network", "institutional payments"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "prediction_markets_kalshi",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Prediction Markets Hit $29.4 Billion in May as Kalshi Leads and Brokers Pile In",
|
|
|
|
|
+ "summary": "Prediction market volume surged to record levels with Kalshi leading the growth as traditional brokers enter the space."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Kalshi", "Prediction Markets", "Bitcoin", "Ethereum", "SEC"],
|
|
|
|
|
+ "keywords": ["prediction markets", "trading volume", "broker adoption"],
|
|
|
|
|
+ "topic": "crypto"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+
|
|
|
|
|
+ # === AI (5) ===
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "nvidia_earnings_ai",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Nvidia beats earnings on AI chip demand",
|
|
|
|
|
+ "summary": "Nvidia reported quarterly revenue of $26 billion, up 262% year-over-year, driven by insatiable demand for its H100 and Blackwell AI chips."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Nvidia", "H100", "Blackwell"],
|
|
|
|
|
+ "keywords": ["AI chips", "earnings beat", "revenue growth", "chip demand"],
|
|
|
|
|
+ "topic": "ai"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "anthropic_ai_pause",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Anthropic calls for pause of global AI development",
|
|
|
|
|
+ "summary": "AI safety company Anthropic urged a coordinated pause on advanced AI development to establish safety standards."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Anthropic", "Claude"],
|
|
|
|
|
+ "keywords": ["AI development", "global pause", "AI safety"],
|
|
|
|
|
+ "topic": "ai"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "microsoft_ai_products",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Has Microsoft Lost Its Mojo (Again)?",
|
|
|
|
|
+ "summary": "Analysts question whether Microsoft's AI product strategy is falling behind competitors despite massive investment."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Microsoft", "Scott Hanselman", "Github"],
|
|
|
|
|
+ "keywords": ["AI products", "catch-up mode", "competition"],
|
|
|
|
|
+ "topic": "ai"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "ai_bubble_debate",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "`There Is No AI Bubble,' Says BI's Rob Schiffman",
|
|
|
|
|
+ "summary": "Business Insider's Rob Schiffman argues current AI investment levels are justified by real revenue growth, not speculation."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Robert Schiffman", "New York", "Business Insider"],
|
|
|
|
|
+ "keywords": ["AI bubble", "investment thesis", "revenue growth"],
|
|
|
|
|
+ "topic": "ai"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "morgan_stanley_ai_funding",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Morgan Stanley Sees AI-Related Funding Expanding to 15% of All Credit Deals",
|
|
|
|
|
+ "summary": "Morgan Stanley reports AI-related financing now represents 15% of credit deals, up from near zero two years ago."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Morgan Stanley", "Diameter Capital Partners", "Scott Goodwin"],
|
|
|
|
|
+ "keywords": ["AI funding", "credit deals", "financing growth"],
|
|
|
|
|
+ "topic": "ai"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+
|
|
|
|
|
+ # === OTHER (6) ===
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "israel_iran_strikes",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Israel strikes Iranian missile sites in Syria",
|
|
|
|
|
+ "summary": "Israeli warplanes targeted Iranian missile depots near Damascus overnight, escalating regional tensions."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Israel", "Iran", "Syria", "Damascus"],
|
|
|
|
|
+ "keywords": ["airstrikes", "missile sites", "regional escalation"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "trump_intel_firings",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Trump orders Pulte to start mass firings at intel agencies",
|
|
|
|
|
+ "summary": "President Trump directed Bill Pulte to begin mass firings across US intelligence agencies."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Donald Trump", "Bill Pulte", "United States"],
|
|
|
|
|
+ "keywords": ["mass firings", "intelligence agencies", "government restructuring"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "boeing_737_max",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Boeing to launch 737 Max production in July",
|
|
|
|
|
+ "summary": "Boeing plans to restart 737 Max production in July under new CEO Kelly Ortberg."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Boeing", "Kelly Ortberg", "Seattle", "Everett", "737 Max"],
|
|
|
|
|
+ "keywords": ["aircraft manufacturing", "production restart", "737 Max"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "putin_trump_peer",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Putin says he treats Trump as 'peer, with respect'",
|
|
|
|
|
+ "summary": "Vladimir Putin described his relationship with Donald Trump as one of mutual respect between peers."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Vladimir Putin", "Donald Trump", "Ukraine", "St. Petersburg"],
|
|
|
|
|
+ "keywords": ["Ukraine war", "diplomatic relations", "peer respect"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "paris_bridge_history",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Why is Paris's oldest bridge called the 'New Bridge'?",
|
|
|
|
|
+ "summary": "The history of Paris's Pont Neuf, which despite its name is the city's oldest standing bridge."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Paris", "Pont Neuf", "Louis Vuitton", "Tanishk Saha"],
|
|
|
|
|
+ "keywords": ["Paris bridge", "bridge history", "artistic installations"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "ukraine_drone_attack",
|
|
|
|
|
+ "cluster": {
|
|
|
|
|
+ "headline": "Ukraine under heavy drone attack as Zelensky seeks direct meeting with Putin",
|
|
|
|
|
+ "summary": "Russia launched a massive drone barrage on Ukraine as President Zelensky pushes for direct talks with Putin."
|
|
|
|
|
+ },
|
|
|
|
|
+ "expected": {
|
|
|
|
|
+ "entities": ["Ukraine", "Russia", "Volodymyr Zelensky", "Vladimir Putin", "Moscow", "Kyiv"],
|
|
|
|
|
+ "keywords": ["drone strikes", "conflict escalation", "peace talks"],
|
|
|
|
|
+ "topic": "other"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+]
|
|
|
|
|
+
|
|
|
|
|
+# Write to JSON file
|
|
|
|
|
+output_path = Path(__file__).parent.parent / "data" / "annotated_samples.json"
|
|
|
|
|
+output_path.write_text(json.dumps(ANNOTATED_SAMPLES, indent=2, ensure_ascii=False))
|
|
|
|
|
+print(f"Wrote {len(ANNOTATED_SAMPLES)} annotated samples to {output_path}")
|
|
|
|
|
+
|
|
|
|
|
+# Print distribution
|
|
|
|
|
+from collections import Counter
|
|
|
|
|
+topics = Counter(s["expected"]["topic"] for s in ANNOTATED_SAMPLES)
|
|
|
|
|
+print("\nTopic distribution:")
|
|
|
|
|
+for t, c in sorted(topics.items()):
|
|
|
|
|
+ print(f" {t}: {c}")
|