abc123 / hack /my_wordnet.py
vimalk78's picture
feat(crossword): generated crosswords with clues
486eff6
raw
history blame
2.87 kB
#!/usr/bin/env python3
import re
from nltk.corpus import wordnet as wn
from wordfreq import word_frequency
def best_synset(word, lang="en"):
"""Pick the most likely synset using WordNet + Wordfreq scoring."""
synsets = wn.synsets(word)
if not synsets:
return None
# Score synsets by the highest wordfreq of any lemma
scored = []
for syn in synsets:
freqs = [word_frequency(lemma.name().replace("_", " "), lang)
for lemma in syn.lemmas()]
scored.append((max(freqs), syn))
scored.sort(key=lambda x: -x[0])
return scored[0][1] # return synset with highest freq lemma
def simplify_definition(defn):
"""Shorten WordNet definition into clue-like form."""
# Remove phrases like "of a" or "the act of"
defn = re.sub(r'\bthe act of\b', '', defn)
defn = re.sub(r'\bthe process of\b', '', defn)
# Remove trailing clauses after ';' or ','
defn = defn.split(';')[0].split(',')[0]
# Capitalize first letter, end with a period
defn = defn.strip().capitalize()
if not defn.endswith('.'):
defn += '.'
return defn
def crossword_clue(word, lang="en"):
syn = best_synset(word, lang)
if not syn:
return f"No clue available for '{word}'."
lemmas = [lemma for lemma in syn.lemmas()]
return {'defn': syn.definition(), 'lemmas': lemmas}
defn = simplify_definition(syn.definition())
# Optionally include synonym variant
synonyms = [lemma.name().replace("_", " ") for lemma in syn.lemmas()
if lemma.name().lower() != word.lower()]
synonyms = list(set(synonyms))
if synonyms:
# Pick most frequent synonym
synonyms.sort(key=lambda w: -word_frequency(w, lang))
alt = synonyms[0]
return [syn.definition() , f"{word}: {defn} (synonym: {alt})."]
return [ syn.definition() , defn]
# -------------------------------
# Examples
words = ["bank", "abode", "hurricane", "competition", "agriculture", "poultry", "reptile", "chimpanzee", "lion",
"vote",
"democracy",
"pakistan",
"election",
"ethnographic",
"laughter",
"illness",
"childhood",
"scientist",
"ethiopian",
"humane",
"medicate",
"cryptography",
"ology",
"materialism",
"dolmen",
"amazing",
]
for word in words:
syns = wn.synsets(word)
defns = []
print(f"{'='*20} {word} {'='*20}")
for syn in syns:
print(f"\"{syn.definition()}\"")
# print(f"lemmas:{[lemma for lemma in syn.lemmas()]}")
# defns.append(syn.definition())
# print(f"{word}: \"{defns}\"")
# print(crossword_clue("bank")) # common word
# print(crossword_clue("abode")) # rarer word
# print(crossword_clue("zephyr")) # tricky word