|
|
|
|
|
import re |
|
from nltk.corpus import wordnet as wn |
|
from wordfreq import word_frequency |
|
|
|
def best_synset(word, lang="en"): |
|
"""Pick the most likely synset using WordNet + Wordfreq scoring.""" |
|
synsets = wn.synsets(word) |
|
if not synsets: |
|
return None |
|
|
|
|
|
scored = [] |
|
for syn in synsets: |
|
freqs = [word_frequency(lemma.name().replace("_", " "), lang) |
|
for lemma in syn.lemmas()] |
|
scored.append((max(freqs), syn)) |
|
scored.sort(key=lambda x: -x[0]) |
|
return scored[0][1] |
|
|
|
def simplify_definition(defn): |
|
"""Shorten WordNet definition into clue-like form.""" |
|
|
|
defn = re.sub(r'\bthe act of\b', '', defn) |
|
defn = re.sub(r'\bthe process of\b', '', defn) |
|
|
|
defn = defn.split(';')[0].split(',')[0] |
|
|
|
defn = defn.strip().capitalize() |
|
if not defn.endswith('.'): |
|
defn += '.' |
|
return defn |
|
|
|
def crossword_clue(word, lang="en"): |
|
syn = best_synset(word, lang) |
|
if not syn: |
|
return f"No clue available for '{word}'." |
|
lemmas = [lemma for lemma in syn.lemmas()] |
|
return {'defn': syn.definition(), 'lemmas': lemmas} |
|
|
|
defn = simplify_definition(syn.definition()) |
|
|
|
synonyms = [lemma.name().replace("_", " ") for lemma in syn.lemmas() |
|
if lemma.name().lower() != word.lower()] |
|
synonyms = list(set(synonyms)) |
|
if synonyms: |
|
|
|
synonyms.sort(key=lambda w: -word_frequency(w, lang)) |
|
alt = synonyms[0] |
|
return [syn.definition() , f"{word}: {defn} (synonym: {alt})."] |
|
return [ syn.definition() , defn] |
|
|
|
|
|
|
|
words = ["bank", "abode", "hurricane", "competition", "agriculture", "poultry", "reptile", "chimpanzee", "lion", |
|
"vote", |
|
"democracy", |
|
"pakistan", |
|
"election", |
|
"ethnographic", |
|
"laughter", |
|
"illness", |
|
"childhood", |
|
"scientist", |
|
"ethiopian", |
|
"humane", |
|
"medicate", |
|
"cryptography", |
|
"ology", |
|
"materialism", |
|
"dolmen", |
|
"amazing", |
|
] |
|
for word in words: |
|
syns = wn.synsets(word) |
|
defns = [] |
|
print(f"{'='*20} {word} {'='*20}") |
|
for syn in syns: |
|
print(f"\"{syn.definition()}\"") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|