File size: 2,866 Bytes
486eff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
#!/usr/bin/env python3
import re
from nltk.corpus import wordnet as wn
from wordfreq import word_frequency
def best_synset(word, lang="en"):
"""Pick the most likely synset using WordNet + Wordfreq scoring."""
synsets = wn.synsets(word)
if not synsets:
return None
# Score synsets by the highest wordfreq of any lemma
scored = []
for syn in synsets:
freqs = [word_frequency(lemma.name().replace("_", " "), lang)
for lemma in syn.lemmas()]
scored.append((max(freqs), syn))
scored.sort(key=lambda x: -x[0])
return scored[0][1] # return synset with highest freq lemma
def simplify_definition(defn):
"""Shorten WordNet definition into clue-like form."""
# Remove phrases like "of a" or "the act of"
defn = re.sub(r'\bthe act of\b', '', defn)
defn = re.sub(r'\bthe process of\b', '', defn)
# Remove trailing clauses after ';' or ','
defn = defn.split(';')[0].split(',')[0]
# Capitalize first letter, end with a period
defn = defn.strip().capitalize()
if not defn.endswith('.'):
defn += '.'
return defn
def crossword_clue(word, lang="en"):
syn = best_synset(word, lang)
if not syn:
return f"No clue available for '{word}'."
lemmas = [lemma for lemma in syn.lemmas()]
return {'defn': syn.definition(), 'lemmas': lemmas}
defn = simplify_definition(syn.definition())
# Optionally include synonym variant
synonyms = [lemma.name().replace("_", " ") for lemma in syn.lemmas()
if lemma.name().lower() != word.lower()]
synonyms = list(set(synonyms))
if synonyms:
# Pick most frequent synonym
synonyms.sort(key=lambda w: -word_frequency(w, lang))
alt = synonyms[0]
return [syn.definition() , f"{word}: {defn} (synonym: {alt})."]
return [ syn.definition() , defn]
# -------------------------------
# Examples
words = ["bank", "abode", "hurricane", "competition", "agriculture", "poultry", "reptile", "chimpanzee", "lion",
"vote",
"democracy",
"pakistan",
"election",
"ethnographic",
"laughter",
"illness",
"childhood",
"scientist",
"ethiopian",
"humane",
"medicate",
"cryptography",
"ology",
"materialism",
"dolmen",
"amazing",
]
for word in words:
syns = wn.synsets(word)
defns = []
print(f"{'='*20} {word} {'='*20}")
for syn in syns:
print(f"\"{syn.definition()}\"")
# print(f"lemmas:{[lemma for lemma in syn.lemmas()]}")
# defns.append(syn.definition())
# print(f"{word}: \"{defns}\"")
# print(crossword_clue("bank")) # common word
# print(crossword_clue("abode")) # rarer word
# print(crossword_clue("zephyr")) # tricky word
|