File size: 2,866 Bytes
486eff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3

import re
from nltk.corpus import wordnet as wn
from wordfreq import word_frequency

def best_synset(word, lang="en"):
    """Pick the most likely synset using WordNet + Wordfreq scoring."""
    synsets = wn.synsets(word)
    if not synsets:
        return None

    # Score synsets by the highest wordfreq of any lemma
    scored = []
    for syn in synsets:
        freqs = [word_frequency(lemma.name().replace("_", " "), lang) 
                 for lemma in syn.lemmas()]
        scored.append((max(freqs), syn))
    scored.sort(key=lambda x: -x[0])
    return scored[0][1]  # return synset with highest freq lemma

def simplify_definition(defn):
    """Shorten WordNet definition into clue-like form."""
    # Remove phrases like "of a" or "the act of"
    defn = re.sub(r'\bthe act of\b', '', defn)
    defn = re.sub(r'\bthe process of\b', '', defn)
    # Remove trailing clauses after ';' or ','
    defn = defn.split(';')[0].split(',')[0]
    # Capitalize first letter, end with a period
    defn = defn.strip().capitalize()
    if not defn.endswith('.'):
        defn += '.'
    return defn

def crossword_clue(word, lang="en"):
    syn = best_synset(word, lang)
    if not syn:
        return f"No clue available for '{word}'."
    lemmas = [lemma for lemma in syn.lemmas()]
    return  {'defn': syn.definition(), 'lemmas': lemmas}
    
    defn = simplify_definition(syn.definition())
    # Optionally include synonym variant
    synonyms = [lemma.name().replace("_", " ") for lemma in syn.lemmas()
                if lemma.name().lower() != word.lower()]
    synonyms = list(set(synonyms))
    if synonyms:
        # Pick most frequent synonym
        synonyms.sort(key=lambda w: -word_frequency(w, lang))
        alt = synonyms[0]
        return [syn.definition() , f"{word}: {defn} (synonym: {alt})."]
    return [ syn.definition() , defn]

# -------------------------------
# Examples
words = ["bank", "abode", "hurricane", "competition", "agriculture", "poultry", "reptile", "chimpanzee", "lion",
         "vote",
         "democracy",
         "pakistan",
         "election",
         "ethnographic",
         "laughter",
         "illness",
         "childhood",
         "scientist",
         "ethiopian",
         "humane",
         "medicate",
         "cryptography",
         "ology",
         "materialism",
         "dolmen",
         "amazing",
         ]
for word in words:
    syns = wn.synsets(word)
    defns = []
    print(f"{'='*20} {word} {'='*20}")
    for syn in syns:
        print(f"\"{syn.definition()}\"")
        # print(f"lemmas:{[lemma for lemma in syn.lemmas()]}")
        # defns.append(syn.definition())
    # print(f"{word}: \"{defns}\"")
# print(crossword_clue("bank"))       # common word
# print(crossword_clue("abode"))      # rarer word
# print(crossword_clue("zephyr"))     # tricky word