|
|
|
""" |
|
Simple test for word variety logic without dependencies. |
|
""" |
|
|
|
import random |
|
from typing import List, Dict, Any |
|
|
|
def weighted_random_selection(candidates: List[Dict[str, Any]], max_words: int) -> List[Dict[str, Any]]: |
|
""" |
|
Test version of weighted random selection. |
|
""" |
|
if len(candidates) <= max_words: |
|
return candidates |
|
|
|
|
|
candidates_sorted = sorted(candidates, key=lambda w: w["similarity"], reverse=True) |
|
|
|
|
|
tier1_size = max(1, len(candidates_sorted) // 4) |
|
tier1 = candidates_sorted[:tier1_size] |
|
|
|
|
|
tier2_size = max(1, len(candidates_sorted) // 4) |
|
tier2 = candidates_sorted[tier1_size:tier1_size + tier2_size] |
|
|
|
|
|
tier3_size = max(1, len(candidates_sorted) * 35 // 100) |
|
tier3 = candidates_sorted[tier1_size + tier2_size:tier1_size + tier2_size + tier3_size] |
|
|
|
|
|
tier4 = candidates_sorted[tier1_size + tier2_size + tier3_size:] |
|
|
|
selected = [] |
|
|
|
|
|
tier1_count = min(max_words // 3, len(tier1)) |
|
selected.extend(random.sample(tier1, tier1_count)) |
|
|
|
|
|
remaining_slots = max_words - len(selected) |
|
|
|
if remaining_slots > 0: |
|
|
|
weighted_pool = [] |
|
weighted_pool.extend([(w, 3) for w in tier2]) |
|
weighted_pool.extend([(w, 2) for w in tier3]) |
|
weighted_pool.extend([(w, 1) for w in tier4]) |
|
|
|
|
|
remaining_tier1 = [w for w in tier1 if w not in selected] |
|
weighted_pool.extend([(w, 4) for w in remaining_tier1]) |
|
|
|
|
|
for _ in range(remaining_slots): |
|
if not weighted_pool: |
|
break |
|
|
|
|
|
weighted_words = [] |
|
for word, weight in weighted_pool: |
|
weighted_words.extend([word] * weight) |
|
|
|
if weighted_words: |
|
chosen = random.choice(weighted_words) |
|
selected.append(chosen) |
|
|
|
|
|
weighted_pool = [(w, wt) for w, wt in weighted_pool if w != chosen] |
|
|
|
|
|
random.shuffle(selected) |
|
|
|
return selected[:max_words] |
|
|
|
def create_test_candidates(): |
|
"""Create test word candidates.""" |
|
words = [ |
|
"SCIENTIFIC", "SCIENTIST", "CHEMISTRY", "ASTRONOMY", "BIOLOGIST", |
|
"PHYSICIST", "RESEARCH", "ZOOLOGY", "GEOLOGY", "BIOLOGY", |
|
"ECOLOGY", "BOTANY", "THEORY", "EXPERIMENT", "DISCOVERY", |
|
"LABORATORY", "MOLECULE", "EQUATION", "HYPOTHESIS", "ANALYSIS", |
|
"PHYSICS", "QUANTUM", "GENETICS", "EVOLUTION", "MICROSCOPE" |
|
] |
|
|
|
candidates = [] |
|
for i, word in enumerate(words): |
|
similarity = 0.9 - (i * 0.02) |
|
candidates.append({ |
|
"word": word, |
|
"clue": f"{word.lower()} (scientific term)", |
|
"similarity": similarity, |
|
"source": "vector_search" |
|
}) |
|
|
|
return candidates |
|
|
|
def test_variety(): |
|
"""Test word variety.""" |
|
print("🧪 Testing word variety\n") |
|
|
|
candidates = create_test_candidates() |
|
|
|
|
|
results = [] |
|
for i in range(5): |
|
selected = weighted_random_selection(candidates, 12) |
|
word_list = [w["word"] for w in selected] |
|
results.append(word_list) |
|
print(f"Selection {i+1}: {word_list[:5]}...") |
|
|
|
|
|
unique_words_per_position = [] |
|
for pos in range(5): |
|
words_at_pos = [result[pos] for result in results if len(result) > pos] |
|
unique_at_pos = len(set(words_at_pos)) |
|
unique_words_per_position.append(unique_at_pos) |
|
print(f"Position {pos}: {unique_at_pos} different words across 5 selections") |
|
|
|
total_variety = sum(unique_words_per_position) |
|
max_possible = len(unique_words_per_position) * len(results) |
|
variety_percentage = (total_variety / max_possible) * 100 |
|
|
|
print(f"\n📊 Variety Score: {variety_percentage:.1f}%") |
|
|
|
return variety_percentage > 60 |
|
|
|
if __name__ == "__main__": |
|
success = test_variety() |
|
if success: |
|
print("✅ Word variety test passed!") |
|
else: |
|
print("❌ Word variety test failed!") |