#!/usr/bin/env python3 import re from nltk.corpus import wordnet as wn from wordfreq import word_frequency def best_synset(word, lang="en"): """Pick the most likely synset using WordNet + Wordfreq scoring.""" synsets = wn.synsets(word) if not synsets: return None # Score synsets by the highest wordfreq of any lemma scored = [] for syn in synsets: freqs = [word_frequency(lemma.name().replace("_", " "), lang) for lemma in syn.lemmas()] scored.append((max(freqs), syn)) scored.sort(key=lambda x: -x[0]) return scored[0][1] # return synset with highest freq lemma def simplify_definition(defn): """Shorten WordNet definition into clue-like form.""" # Remove phrases like "of a" or "the act of" defn = re.sub(r'\bthe act of\b', '', defn) defn = re.sub(r'\bthe process of\b', '', defn) # Remove trailing clauses after ';' or ',' defn = defn.split(';')[0].split(',')[0] # Capitalize first letter, end with a period defn = defn.strip().capitalize() if not defn.endswith('.'): defn += '.' return defn def crossword_clue(word, lang="en"): syn = best_synset(word, lang) if not syn: return f"No clue available for '{word}'." lemmas = [lemma for lemma in syn.lemmas()] return {'defn': syn.definition(), 'lemmas': lemmas} defn = simplify_definition(syn.definition()) # Optionally include synonym variant synonyms = [lemma.name().replace("_", " ") for lemma in syn.lemmas() if lemma.name().lower() != word.lower()] synonyms = list(set(synonyms)) if synonyms: # Pick most frequent synonym synonyms.sort(key=lambda w: -word_frequency(w, lang)) alt = synonyms[0] return [syn.definition() , f"{word}: {defn} (synonym: {alt})."] return [ syn.definition() , defn] # ------------------------------- # Examples words = ["bank", "abode", "hurricane", "competition", "agriculture", "poultry", "reptile", "chimpanzee", "lion", "vote", "democracy", "pakistan", "election", "ethnographic", "laughter", "illness", "childhood", "scientist", "ethiopian", "humane", "medicate", "cryptography", "ology", "materialism", "dolmen", "amazing", ] for word in words: syns = wn.synsets(word) defns = [] print(f"{'='*20} {word} {'='*20}") for syn in syns: print(f"\"{syn.definition()}\"") # print(f"lemmas:{[lemma for lemma in syn.lemmas()]}") # defns.append(syn.definition()) # print(f"{word}: \"{defns}\"") # print(crossword_clue("bank")) # common word # print(crossword_clue("abode")) # rarer word # print(crossword_clue("zephyr")) # tricky word